From 665f1388bc9713c81989dda6eed5cde52d57c255 Mon Sep 17 00:00:00 2001 From: Andrey Zhizhikin Date: Mon, 30 Nov 2020 12:42:33 +0000 Subject: [PATCH 0001/2012] ARM: omap2plus_defconfig: drop unused POWER_AVS option Commit 785b5bb41b0a ("PM: AVS: Drop the avs directory and the corresponding Kconfig") moved AVS code to SOC-specific folders, and removed corresponding Kconfig from drivers/power, leaving original POWER_AVS config option enabled in omap2plus_defconfig file. Remove the option, which has no references in the tree anymore. Fixes: 785b5bb41b0a ("PM: AVS: Drop the avs directory and the corresponding Kconfig") Signed-off-by: Andrey Zhizhikin Reviewed-by: Krzysztof Kozlowski Reviewed-by: Nishanth Menon Cc: Nishanth Menon Cc: Ulf Hansson Signed-off-by: Tony Lindgren --- arch/arm/configs/omap2plus_defconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 77716f500813f..23b53526c4eb3 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -297,7 +297,6 @@ CONFIG_GPIO_TWL4030=y CONFIG_W1=m CONFIG_HDQ_MASTER_OMAP=m CONFIG_W1_SLAVE_DS250X=m -CONFIG_POWER_AVS=y CONFIG_POWER_RESET=y CONFIG_POWER_RESET_GPIO=y CONFIG_BATTERY_BQ27XXX=m -- GitLab From f1dc15cd7fc146107cad2a926d9c1d005f69002a Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 29 Nov 2020 16:47:10 +0200 Subject: [PATCH 0002/2012] ARM: dts: OMAP3: disable AES on N950/N9 AES needs to be disabled on Nokia N950/N9 as well (HS devices), otherwise kernel fails to boot. Fixes: c312f066314e ("ARM: dts: omap3: Migrate AES from hwmods to sysc-omap2") Signed-off-by: Aaro Koskinen Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap3-n950-n9.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi index 11d41e86f814d..7dde9fbb06d33 100644 --- a/arch/arm/boot/dts/omap3-n950-n9.dtsi +++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi @@ -494,3 +494,11 @@ clock-names = "sysclk"; }; }; + +&aes1_target { + status = "disabled"; +}; + +&aes2_target { + status = "disabled"; +}; -- GitLab From ec76c2eea903947202098090bbe07a739b5246e9 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Fri, 4 Dec 2020 10:55:39 +0100 Subject: [PATCH 0003/2012] ARM: OMAP2+: omap_device: fix idling of devices during probe On the GTA04A5 od->_driver_status was not set to BUS_NOTIFY_BIND_DRIVER during probe of the second mmc used for wifi. Therefore omap_device_late_idle idled the device during probing causing oopses when accessing the registers. It was not set because od->_state was set to OMAP_DEVICE_STATE_IDLE in the notifier callback. Therefore set od->_driver_status also in that case. This came apparent after commit 21b2cec61c04 ("mmc: Set PROBE_PREFER_ASYNCHRONOUS for drivers that existed in v4.4") causing this oops: omap_hsmmc 480b4000.mmc: omap_device_late_idle: enabled but no driver. Idling 8<--- cut here --- Unhandled fault: external abort on non-linefetch (0x1028) at 0xfa0b402c ... (omap_hsmmc_set_bus_width) from [] (omap_hsmmc_set_ios+0x11c/0x258) (omap_hsmmc_set_ios) from [] (mmc_power_up.part.8+0x3c/0xd0) (mmc_power_up.part.8) from [] (mmc_start_host+0x88/0x9c) (mmc_start_host) from [] (mmc_add_host+0x58/0x84) (mmc_add_host) from [] (omap_hsmmc_probe+0x5fc/0x8c0) (omap_hsmmc_probe) from [] (platform_drv_probe+0x48/0x98) (platform_drv_probe) from [] (really_probe+0x1dc/0x3b4) Fixes: 04abaf07f6d5 ("ARM: OMAP2+: omap_device: Sync omap_device and pm_runtime after probe defer") Fixes: 21b2cec61c04 ("mmc: Set PROBE_PREFER_ASYNCHRONOUS for drivers that existed in v4.4") Acked-by: Ulf Hansson Signed-off-by: Andreas Kemnade [tony@atomide.com: left out extra parens, trimmed description stack trace] Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_device.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index fc7bb2ca16727..64b23b0cd23c7 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -230,10 +230,12 @@ static int _omap_device_notifier_call(struct notifier_block *nb, break; case BUS_NOTIFY_BIND_DRIVER: od = to_omap_device(pdev); - if (od && (od->_state == OMAP_DEVICE_STATE_ENABLED) && - pm_runtime_status_suspended(dev)) { + if (od) { od->_driver_status = BUS_NOTIFY_BIND_DRIVER; - pm_runtime_set_active(dev); + if (od->_state == OMAP_DEVICE_STATE_ENABLED && + pm_runtime_status_suspended(dev)) { + pm_runtime_set_active(dev); + } } break; case BUS_NOTIFY_ADD_DEVICE: -- GitLab From 2f6fc9e08bf79f11516edef855283c6212bbe78f Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Tue, 1 Dec 2020 20:12:37 +0100 Subject: [PATCH 0004/2012] ARM: omap2plus_defconfig: enable SPI GPIO GTA04 uses that for controlling the td028ttec1 panel. So for easier testing/bisecting it is useful to have it enabled in the defconfig. Signed-off-by: Andreas Kemnade Signed-off-by: Tony Lindgren --- arch/arm/configs/omap2plus_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 23b53526c4eb3..28add0b64d424 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -280,6 +280,7 @@ CONFIG_SERIAL_OMAP_CONSOLE=y CONFIG_SERIAL_DEV_BUS=y CONFIG_I2C_CHARDEV=y CONFIG_SPI=y +CONFIG_SPI_GPIO=m CONFIG_SPI_OMAP24XX=y CONFIG_SPI_TI_QSPI=m CONFIG_HSI=m -- GitLab From c0bc969c176b10598b31d5d1a5edf9a5261f0a9f Mon Sep 17 00:00:00 2001 From: Carl Philipp Klemm Date: Mon, 7 Dec 2020 20:58:01 +0100 Subject: [PATCH 0005/2012] ARM: omap2: pmic-cpcap: fix maximum voltage to be consistent with defaults on xt875 xt875 comes up with a iva voltage of 1375000 and android runs at this too. fix maximum voltage to be consistent with this. Signed-off-by: Carl Philipp Klemm Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/pmic-cpcap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/pmic-cpcap.c b/arch/arm/mach-omap2/pmic-cpcap.c index eab281a5fc9f7..09076ad0576d9 100644 --- a/arch/arm/mach-omap2/pmic-cpcap.c +++ b/arch/arm/mach-omap2/pmic-cpcap.c @@ -71,7 +71,7 @@ static struct omap_voltdm_pmic omap_cpcap_iva = { .vp_vstepmin = OMAP4_VP_VSTEPMIN_VSTEPMIN, .vp_vstepmax = OMAP4_VP_VSTEPMAX_VSTEPMAX, .vddmin = 900000, - .vddmax = 1350000, + .vddmax = 1375000, .vp_timeout_us = OMAP4_VP_VLIMITTO_TIMEOUT_US, .i2c_slave_addr = 0x44, .volt_reg_addr = 0x0, -- GitLab From 43ffe817bfe3871ffbaa1e98952a2a01b140e71e Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 10 Dec 2020 15:10:56 +0530 Subject: [PATCH 0006/2012] arm64: dts: bitmain: Use generic "ngpios" rather than "snps,nr-gpios" This is to remove similar errors as below: OF: /.../gpio-port@0: could not find phandle Commit 7569486d79ae ("gpio: dwapb: Add ngpios DT-property support") explained the reason of above errors well and added the generic "ngpios" property, let's use it. Signed-off-by: Jisheng Zhang Signed-off-by: Manivannan Sadhasivam Reviewed-by: Linus Walleij Acked-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20201210094056.54553-1-manivannan.sadhasivam@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/bitmain/bm1880.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/boot/dts/bitmain/bm1880.dtsi b/arch/arm64/boot/dts/bitmain/bm1880.dtsi index fa6e6905f5888..53a9b76057aa1 100644 --- a/arch/arm64/boot/dts/bitmain/bm1880.dtsi +++ b/arch/arm64/boot/dts/bitmain/bm1880.dtsi @@ -127,7 +127,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <32>; + ngpios = <32>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; @@ -145,7 +145,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <32>; + ngpios = <32>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; @@ -163,7 +163,7 @@ compatible = "snps,dw-apb-gpio-port"; gpio-controller; #gpio-cells = <2>; - snps,nr-gpios = <8>; + ngpios = <8>; reg = <0>; interrupt-controller; #interrupt-cells = <2>; -- GitLab From f373a811fd9a69fc8bafb9bcb41d2cfa36c62665 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 11 Dec 2020 13:06:52 +0300 Subject: [PATCH 0007/2012] ASoC: Intel: fix error code cnl_set_dsp_D0() Return -ETIMEDOUT if the dsp boot times out instead of returning success. Fixes: cb6a55284629 ("ASoC: Intel: cnl: Add sst library functions for cnl platform") Signed-off-by: Dan Carpenter Reviewed-by: Cezary Rojewski Link: https://lore.kernel.org/r/X9NEvCzuN+IObnTN@mwanda Signed-off-by: Mark Brown --- sound/soc/intel/skylake/cnl-sst.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/skylake/cnl-sst.c b/sound/soc/intel/skylake/cnl-sst.c index fcd8dff27ae8e..1275c149acc02 100644 --- a/sound/soc/intel/skylake/cnl-sst.c +++ b/sound/soc/intel/skylake/cnl-sst.c @@ -224,6 +224,7 @@ static int cnl_set_dsp_D0(struct sst_dsp *ctx, unsigned int core_id) "dsp boot timeout, status=%#x error=%#x\n", sst_dsp_shim_read(ctx, CNL_ADSP_FW_STATUS), sst_dsp_shim_read(ctx, CNL_ADSP_ERROR_CODE)); + ret = -ETIMEDOUT; goto err; } } else { -- GitLab From fe6ce6c394fb1ef1d8a6384c5180e70893157f22 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 15 Dec 2020 15:05:11 +0200 Subject: [PATCH 0008/2012] MAINTAINERS: Update email address for TI ASoC and twl4030 codec drivers My employment with TI is coming to an end, it is my intention to look after the drivers I have worked with over the years. Signed-off-by: Peter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201215130512.8753-2-peter.ujfalusi@ti.com Signed-off-by: Mark Brown --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3da6d8c154e48..666b76a634a8d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12644,7 +12644,7 @@ F: include/misc/ocxl* F: include/uapi/misc/ocxl.h OMAP AUDIO SUPPORT -M: Peter Ujfalusi +M: Peter Ujfalusi M: Jarkko Nikula L: alsa-devel@alsa-project.org (moderated for non-subscribers) L: linux-omap@vger.kernel.org @@ -17280,7 +17280,7 @@ F: arch/xtensa/ F: drivers/irqchip/irq-xtensa-* TEXAS INSTRUMENTS ASoC DRIVERS -M: Peter Ujfalusi +M: Peter Ujfalusi L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: sound/soc/ti/ @@ -17571,7 +17571,7 @@ F: Documentation/devicetree/bindings/net/nfc/trf7970a.txt F: drivers/nfc/trf7970a.c TI TWL4030 SERIES SOC CODEC DRIVER -M: Peter Ujfalusi +M: Peter Ujfalusi L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: sound/soc/codecs/twl4030* -- GitLab From 61fc03b6512b18f27a25002426d595f5a36645ed Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 15 Dec 2020 15:05:12 +0200 Subject: [PATCH 0009/2012] ASoC: dt-bindings: ti, j721e: Update maintainer and author information My employment with TI is coming to an end, add the copyright and author comments as they due and change the maintainer mail address. Signed-off-by: Peter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201215130512.8753-3-peter.ujfalusi@ti.com Signed-off-by: Mark Brown --- .../devicetree/bindings/sound/ti,j721e-cpb-audio.yaml | 4 +++- .../devicetree/bindings/sound/ti,j721e-cpb-ivi-audio.yaml | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml index 805da4d6a88ed..ec06789b21dfc 100644 --- a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml +++ b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-audio.yaml @@ -1,4 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2020 Texas Instruments Incorporated +# Author: Peter Ujfalusi %YAML 1.2 --- $id: http://devicetree.org/schemas/sound/ti,j721e-cpb-audio.yaml# @@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Texas Instruments J721e Common Processor Board Audio Support maintainers: - - Peter Ujfalusi + - Peter Ujfalusi description: | The audio support on the board is using pcm3168a codec connected to McASP10 diff --git a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-ivi-audio.yaml b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-ivi-audio.yaml index bb780f6216287..ee9f960de36b7 100644 --- a/Documentation/devicetree/bindings/sound/ti,j721e-cpb-ivi-audio.yaml +++ b/Documentation/devicetree/bindings/sound/ti,j721e-cpb-ivi-audio.yaml @@ -1,4 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2020 Texas Instruments Incorporated +# Author: Peter Ujfalusi %YAML 1.2 --- $id: http://devicetree.org/schemas/sound/ti,j721e-cpb-ivi-audio.yaml# @@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Texas Instruments J721e Common Processor Board Audio Support maintainers: - - Peter Ujfalusi + - Peter Ujfalusi description: | The Infotainment board plugs into the Common Processor Board, the support of the -- GitLab From 5c6679b5cb120f07652418524ab186ac47680b49 Mon Sep 17 00:00:00 2001 From: Thomas Hebb Date: Sat, 12 Dec 2020 17:20:12 -0800 Subject: [PATCH 0010/2012] ASoC: dapm: remove widget from dirty list on free A widget's "dirty" list_head, much like its "list" list_head, eventually chains back to a list_head on the snd_soc_card itself. This means that the list can stick around even after the widget (or all widgets) have been freed. Currently, however, widgets that are in the dirty list when freed remain there, corrupting the entire list and leading to memory errors and undefined behavior when the list is next accessed or modified. I encountered this issue when a component failed to probe relatively late in snd_soc_bind_card(), causing it to bail out and call soc_cleanup_card_resources(), which eventually called snd_soc_dapm_free() with widgets that were still dirty from when they'd been added. Fixes: db432b414e20 ("ASoC: Do DAPM power checks only for widgets changed since last run") Cc: stable@vger.kernel.org Signed-off-by: Thomas Hebb Reviewed-by: Charles Keepax Link: https://lore.kernel.org/r/f8b5f031d50122bf1a9bfc9cae046badf4a7a31a.1607822410.git.tommyhebb@gmail.com Signed-off-by: Mark Brown --- sound/soc/soc-dapm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 9f0c86cbdcca2..2b75d0139e478 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -2486,6 +2486,7 @@ void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w) enum snd_soc_dapm_direction dir; list_del(&w->list); + list_del(&w->dirty); /* * remove source and sink paths associated to this widget. * While removing the path, remove reference to it from both -- GitLab From 57f04815fd95bb8c46f6ec5c9d25430bb52d419f Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 10 Dec 2020 09:40:28 -0800 Subject: [PATCH 0011/2012] drm/msm: Fix WARN_ON() splat in _free_object() [ 192.062000] ------------[ cut here ]------------ [ 192.062498] WARNING: CPU: 3 PID: 2039 at drivers/gpu/drm/msm/msm_gem.c:381 put_iova_vmas+0x94/0xa0 [msm] [ 192.062870] Modules linked in: snd_hrtimer snd_seq snd_seq_device rfcomm algif_hash algif_skcipher af_alg bnep xt_CHECKSUM nft_chain_nat xt_MASQUERADE nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nft_counter xt_tcpudp nft_compat cpufreq_powersave cpufreq_conservative q6asm_dai q6routing q6afe_dai q6adm bridge q6afe q6asm q6dsp_common q6core stp llc nf_tables libcrc32c nfnetlink snd_soc_wsa881x regmap_sdw soundwire_qcom gpio_wcd934x snd_soc_wcd934x wcd934x regmap_slimbus venus_enc venus_dec apr videobuf2_dma_sg qrtr_smd uvcvideo videobuf2_vmalloc videobuf2_memops ath10k_snoc ath10k_core hci_uart btqca btbcm mac80211 bluetooth snd_soc_sdm845 ath snd_soc_rt5663 snd_soc_qcom_common snd_soc_rl6231 soundwire_bus ecdh_generic ecc qcom_spmi_adc5 venus_core qcom_pon qcom_spmi_temp_alarm qcom_vadc_common v4l2_mem2mem videobuf2_v4l2 cfg80211 videobuf2_common hid_multitouch reset_qcom_pdc qcrypto qcom_rng rfkill qcom_q6v5_mss libarc4 libdes qrtr ns qcom_wdt socinfo slim_qcom_ngd_ctrl [ 192.065739] pdr_interface qcom_q6v5_pas slimbus qcom_pil_info qcom_q6v5 qcom_sysmon qcom_common qcom_glink_smem qmi_helpers rmtfs_mem tcp_bbr sch_fq fuse ip_tables x_tables ipv6 crc_ccitt ti_sn65dsi86 i2c_hid msm mdt_loader llcc_qcom rtc_pm8xxx ocmem drm_kms_helper crct10dif_ce phy_qcom_qusb2 i2c_qcom_geni panel_simple drm pwm_bl [ 192.066066] CPU: 3 PID: 2039 Comm: gnome-shell Tainted: G W 5.10.0-rc7-next-20201208 #1 [ 192.066068] Hardware name: LENOVO 81JL/LNVNB161216, BIOS 9UCN33WW(V2.06) 06/ 4/2019 [ 192.066072] pstate: 40400005 (nZcv daif +PAN -UAO -TCO BTYPE=--) [ 192.066099] pc : put_iova_vmas+0x94/0xa0 [msm] [ 192.066262] lr : put_iova_vmas+0x1c/0xa0 [msm] [ 192.066403] sp : ffff800019efbbb0 [ 192.066405] x29: ffff800019efbbb0 x28: ffff800019efbd88 [ 192.066411] x27: 0000000000000000 x26: ffff109582efa400 [ 192.066417] x25: 0000000000000009 x24: 000000000000012b [ 192.066422] x23: ffff109582efa438 x22: ffff109582efa450 [ 192.066427] x21: ffff109582efa528 x20: ffff1095cbd4f200 [ 192.066432] x19: ffff1095cbd4f200 x18: 0000000000000000 [ 192.066438] x17: 0000000000000000 x16: ffffc26c200ca750 [ 192.066727] x15: 0000000000000000 x14: 0000000000000000 [ 192.066741] x13: ffff1096fb8c9100 x12: 0000000000000002 [ 192.066754] x11: ffffffffffffffff x10: 0000000000000002 [ 192.067046] x9 : 0000000000000001 x8 : 0000000000000a36 [ 192.067060] x7 : ffff4e2ad9f11000 x6 : ffffc26c216d4000 [ 192.067212] x5 : ffffc26c2022661c x4 : ffff1095c2b98000 [ 192.067367] x3 : ffff1095cbd4f300 x2 : 0000000000000000 [ 192.067380] x1 : ffff1095c2b98000 x0 : 0000000000000000 [ 192.067667] Call trace: [ 192.067734] put_iova_vmas+0x94/0xa0 [msm] [ 192.068078] msm_gem_free_object+0xb4/0x110 [msm] [ 192.068399] drm_gem_object_free+0x1c/0x30 [drm] [ 192.068717] drm_gem_object_handle_put_unlocked+0xf0/0xf8 [drm] [ 192.069032] drm_gem_object_release_handle+0x6c/0x88 [drm] [ 192.069349] drm_gem_handle_delete+0x68/0xc0 [drm] [ 192.069666] drm_gem_close_ioctl+0x30/0x48 [drm] [ 192.069984] drm_ioctl_kernel+0xc0/0x110 [drm] [ 192.070303] drm_ioctl+0x210/0x440 [drm] [ 192.070588] __arm64_sys_ioctl+0xa8/0xf0 [ 192.070599] el0_svc_common.constprop.0+0x74/0x190 [ 192.070608] do_el0_svc+0x24/0x90 [ 192.070618] el0_svc+0x14/0x20 [ 192.070903] el0_sync_handler+0xb0/0xb8 [ 192.070911] el0_sync+0x174/0x180 [ 192.070918] ---[ end trace bee6b12a899001a3 ]--- [ 192.072140] ------------[ cut here ]------------ Fixes: 9b73bde39cf2 ("drm/msm: Fix use-after-free in msm_gem with carveout") Signed-off-by: Rob Clark Acked-by: Iskren Chernev --- drivers/gpu/drm/msm/msm_gem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 68a6c7eacc0a7..a21be5b910ff6 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -990,6 +990,8 @@ void msm_gem_free_object(struct drm_gem_object *obj) if (msm_obj->pages) kvfree(msm_obj->pages); + put_iova_vmas(obj); + /* dma_buf_detach() grabs resv lock, so we need to unlock * prior to drm_prime_gem_destroy */ @@ -999,11 +1001,10 @@ void msm_gem_free_object(struct drm_gem_object *obj) } else { msm_gem_vunmap(obj); put_pages(obj); + put_iova_vmas(obj); msm_gem_unlock(obj); } - put_iova_vmas(obj); - drm_gem_object_release(obj); kfree(msm_obj); -- GitLab From 4ad2d3cf2a299645bdc6d72e5b8ee11b2ed147ac Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Dec 2020 11:28:59 +0000 Subject: [PATCH 0012/2012] ASoC: codecs: fix spelling mistake in Kconfig "comunicate" -> "communicate" There is a spelling mistake in the Kconfig help text. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20201216112859.11564-1-colin.king@canonical.com Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 5e4e681127912..ac63e7c176c1a 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -458,7 +458,7 @@ config SND_SOC_ADAU7118_HW help Enable support for the Analog Devices ADAU7118 8 Channel PDM-to-I2S/TDM Converter. In this mode, the device works in standalone mode which - means that there is no bus to comunicate with it. Stereo mode is not + means that there is no bus to communicate with it. Stereo mode is not supported in this mode. To compile this driver as a module, choose M here: the module -- GitLab From e49037ad12e47cd34239b99b010c5438844923af Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Dec 2020 12:59:13 +0000 Subject: [PATCH 0013/2012] ASoC: SOF: Fix spelling mistake in Kconfig "ond" -> "and" There is a spelling mistake in the Kconfig help text. Fix it. Signed-off-by: Colin Ian King Acked-by: Kai Vehmanen Link: https://lore.kernel.org/r/20201216125913.16041-1-colin.king@canonical.com Signed-off-by: Mark Brown --- sound/soc/sof/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/Kconfig b/sound/soc/sof/Kconfig index 031dad5fc4c70..3e8b6c035ce3f 100644 --- a/sound/soc/sof/Kconfig +++ b/sound/soc/sof/Kconfig @@ -122,7 +122,7 @@ config SND_SOC_SOF_DEBUG_XRUN_STOP bool "SOF stop on XRUN" help This option forces PCMs to stop on any XRUN event. This is useful to - preserve any trace data ond pipeline status prior to the XRUN. + preserve any trace data and pipeline status prior to the XRUN. Say Y if you are debugging SOF FW pipeline XRUNs. If unsure select "N". -- GitLab From acd894aee3149c15847bc4f0690fccba59ced5e7 Mon Sep 17 00:00:00 2001 From: shengjiu wang Date: Wed, 16 Dec 2020 18:44:24 +0800 Subject: [PATCH 0014/2012] ASoC: imx-hdmi: Fix warning of the uninitialized variable ret When condition ((hdmi_out && hdmi_in) || (!hdmi_out && !hdmi_in)) is true, then goto fail, the uninitialized variable ret will be returned. Signed-off-by: shengjiu wang Reported-by: kernel test robot Acked-by: Nicolin Chen Fixes: 6a5f850aa83a ("ASoC: fsl: Add imx-hdmi machine driver") Reviewed-by: Fabio Estevam Link: https://lore.kernel.org/r/1608115464-18710-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/imx-hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/fsl/imx-hdmi.c b/sound/soc/fsl/imx-hdmi.c index 2c2a76a719401..ede4a9ad1054c 100644 --- a/sound/soc/fsl/imx-hdmi.c +++ b/sound/soc/fsl/imx-hdmi.c @@ -164,6 +164,7 @@ static int imx_hdmi_probe(struct platform_device *pdev) if ((hdmi_out && hdmi_in) || (!hdmi_out && !hdmi_in)) { dev_err(&pdev->dev, "Invalid HDMI DAI link\n"); + ret = -EINVAL; goto fail; } -- GitLab From 13733775326ea9eb81c6148ad60c43b8d231a343 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Dec 2020 11:26:08 +0000 Subject: [PATCH 0015/2012] ASoC: atmel: fix spelling mistake in Kconfig "programable" -> "programmable" There are a couple of spelling mistakes in the Kconfig help text. Fix them. Signed-off-by: Colin Ian King Reviewed-by: Codrin Ciubotariu Link: https://lore.kernel.org/r/20201216112608.11385-1-colin.king@canonical.com Signed-off-by: Mark Brown --- sound/soc/atmel/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index 142373ec411ad..9fe9471f4514d 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -143,7 +143,7 @@ config SND_MCHP_SOC_SPDIFTX - sama7g5 This S/PDIF TX driver is compliant with IEC-60958 standard and - includes programable User Data and Channel Status fields. + includes programmable User Data and Channel Status fields. config SND_MCHP_SOC_SPDIFRX tristate "Microchip ASoC driver for boards using S/PDIF RX" @@ -157,5 +157,5 @@ config SND_MCHP_SOC_SPDIFRX - sama7g5 This S/PDIF RX driver is compliant with IEC-60958 standard and - includes programable User Data and Channel Status fields. + includes programmable User Data and Channel Status fields. endif -- GitLab From 81e7eb5bf08f36d34495a5898f6ef3fec05d9776 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 16 Dec 2020 22:43:44 -0500 Subject: [PATCH 0016/2012] Revert "Revert "scsi: megaraid_sas: Added support for shared host tagset for cpuhotplug"" This reverts commit 1a0e1943d8798cb3241fb5edb9a836af1611b60a. Commit b3c6a5997541 ("block: Fix a lockdep complaint triggered by request queue flushing") has been reverted and commit fb01a2932e81 has been introduced in its place. Consequently, it is now safe to reinstate the megaraid_sas tagset changes that led to boot problems in 5.10. Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 39 +++++++++++++++++++++ drivers/scsi/megaraid/megaraid_sas_fusion.c | 29 ++++++++------- 2 files changed, 55 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index e158d3d62056b..41cd66fc7d812 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -113,6 +114,10 @@ unsigned int enable_sdev_max_qd; module_param(enable_sdev_max_qd, int, 0444); MODULE_PARM_DESC(enable_sdev_max_qd, "Enable sdev max qd as can_queue. Default: 0"); +int host_tagset_enable = 1; +module_param(host_tagset_enable, int, 0444); +MODULE_PARM_DESC(host_tagset_enable, "Shared host tagset enable/disable Default: enable(1)"); + MODULE_LICENSE("GPL"); MODULE_VERSION(MEGASAS_VERSION); MODULE_AUTHOR("megaraidlinux.pdl@broadcom.com"); @@ -3119,6 +3124,19 @@ megasas_bios_param(struct scsi_device *sdev, struct block_device *bdev, return 0; } +static int megasas_map_queues(struct Scsi_Host *shost) +{ + struct megasas_instance *instance; + + instance = (struct megasas_instance *)shost->hostdata; + + if (shost->nr_hw_queues == 1) + return 0; + + return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT], + instance->pdev, instance->low_latency_index_start); +} + static void megasas_aen_polling(struct work_struct *work); /** @@ -3427,6 +3445,7 @@ static struct scsi_host_template megasas_template = { .eh_timed_out = megasas_reset_timer, .shost_attrs = megaraid_host_attrs, .bios_param = megasas_bios_param, + .map_queues = megasas_map_queues, .change_queue_depth = scsi_change_queue_depth, .max_segment_size = 0xffffffff, }; @@ -6808,6 +6827,26 @@ static int megasas_io_attach(struct megasas_instance *instance) host->max_lun = MEGASAS_MAX_LUN; host->max_cmd_len = 16; + /* Use shared host tagset only for fusion adaptors + * if there are managed interrupts (smp affinity enabled case). + * Single msix_vectors in kdump, so shared host tag is also disabled. + */ + + host->host_tagset = 0; + host->nr_hw_queues = 1; + + if ((instance->adapter_type != MFI_SERIES) && + (instance->msix_vectors > instance->low_latency_index_start) && + host_tagset_enable && + instance->smp_affinity_enable) { + host->host_tagset = 1; + host->nr_hw_queues = instance->msix_vectors - + instance->low_latency_index_start; + } + + dev_info(&instance->pdev->dev, + "Max firmware commands: %d shared with nr_hw_queues = %d\n", + instance->max_fw_cmds, host->nr_hw_queues); /* * Notify the mid-layer about the new controller */ diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index b0c01cf0428f2..fd607287608e1 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -359,24 +359,29 @@ megasas_get_msix_index(struct megasas_instance *instance, { int sdev_busy; - /* nr_hw_queue = 1 for MegaRAID */ - struct blk_mq_hw_ctx *hctx = - scmd->device->request_queue->queue_hw_ctx[0]; - - sdev_busy = atomic_read(&hctx->nr_active); + /* TBD - if sml remove device_busy in future, driver + * should track counter in internal structure. + */ + sdev_busy = atomic_read(&scmd->device->device_busy); if (instance->perf_mode == MR_BALANCED_PERF_MODE && - sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) + sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) { cmd->request_desc->SCSIIO.MSIxIndex = mega_mod64((atomic64_add_return(1, &instance->high_iops_outstanding) / MR_HIGH_IOPS_BATCH_COUNT), instance->low_latency_index_start); - else if (instance->msix_load_balance) + } else if (instance->msix_load_balance) { cmd->request_desc->SCSIIO.MSIxIndex = (mega_mod64(atomic64_add_return(1, &instance->total_io_count), instance->msix_vectors)); - else + } else if (instance->host->nr_hw_queues > 1) { + u32 tag = blk_mq_unique_tag(scmd->request); + + cmd->request_desc->SCSIIO.MSIxIndex = blk_mq_unique_tag_to_hwq(tag) + + instance->low_latency_index_start; + } else { cmd->request_desc->SCSIIO.MSIxIndex = instance->reply_map[raw_smp_processor_id()]; + } } /** @@ -956,9 +961,6 @@ megasas_alloc_cmds_fusion(struct megasas_instance *instance) if (megasas_alloc_cmdlist_fusion(instance)) goto fail_exit; - dev_info(&instance->pdev->dev, "Configured max firmware commands: %d\n", - instance->max_fw_cmds); - /* The first 256 bytes (SMID 0) is not used. Don't add to the cmd list */ io_req_base = fusion->io_request_frames + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE; io_req_base_phys = fusion->io_request_frames_phys + MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE; @@ -1102,8 +1104,9 @@ megasas_ioc_init_fusion(struct megasas_instance *instance) MR_HIGH_IOPS_QUEUE_COUNT) && cur_intr_coalescing) instance->perf_mode = MR_BALANCED_PERF_MODE; - dev_info(&instance->pdev->dev, "Performance mode :%s\n", - MEGASAS_PERF_MODE_2STR(instance->perf_mode)); + dev_info(&instance->pdev->dev, "Performance mode :%s (latency index = %d)\n", + MEGASAS_PERF_MODE_2STR(instance->perf_mode), + instance->low_latency_index_start); instance->fw_sync_cache_support = (scratch_pad_1 & MR_CAN_HANDLE_SYNC_CACHE_OFFSET) ? 1 : 0; -- GitLab From 315fbe4cef98ee5fb6085bc54c7f25eb06466c70 Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Thu, 17 Dec 2020 13:38:33 +0530 Subject: [PATCH 0017/2012] ASoC: qcom: Fix incorrect volatile registers MI2S and DMA control registers are not volatile, so remove these from volatile registers list. Registers reset state check by reading non volatile registers makes no use, so remove error check from cpu and platform trigger callbacks. Initialized map variable two times in lpass platform trigger API, so remove redundant initialization. Fixes commit b1824968221cc ("ASoC: qcom: Fix enabling BCLK and LRCLK in LPAIF invalid state") Signed-off-by: V Sujith Kumar Reddy Signed-off-by: Srinivasa Rao Mandadapu Link: https://lore.kernel.org/r/1608192514-29695-2-git-send-email-srivasam@codeaurora.org Signed-off-by: Mark Brown --- sound/soc/qcom/lpass-cpu.c | 20 ++------------------ sound/soc/qcom/lpass-platform.c | 15 --------------- 2 files changed, 2 insertions(+), 33 deletions(-) diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index af684fd19ab9e..c5e99c2d89c7e 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -270,18 +270,6 @@ static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream, struct lpaif_i2sctl *i2sctl = drvdata->i2sctl; unsigned int id = dai->driver->id; int ret = -EINVAL; - unsigned int val = 0; - - ret = regmap_read(drvdata->lpaif_map, - LPAIF_I2SCTL_REG(drvdata->variant, dai->driver->id), &val); - if (ret) { - dev_err(dai->dev, "error reading from i2sctl reg: %d\n", ret); - return ret; - } - if (val == LPAIF_I2SCTL_RESET_STATE) { - dev_err(dai->dev, "error in i2sctl register state\n"); - return -ENOTRECOVERABLE; - } switch (cmd) { case SNDRV_PCM_TRIGGER_START: @@ -454,20 +442,16 @@ static bool lpass_cpu_regmap_volatile(struct device *dev, unsigned int reg) struct lpass_variant *v = drvdata->variant; int i; - for (i = 0; i < v->i2s_ports; ++i) - if (reg == LPAIF_I2SCTL_REG(v, i)) - return true; for (i = 0; i < v->irq_ports; ++i) if (reg == LPAIF_IRQSTAT_REG(v, i)) return true; for (i = 0; i < v->rdma_channels; ++i) - if (reg == LPAIF_RDMACURR_REG(v, i) || reg == LPAIF_RDMACTL_REG(v, i)) + if (reg == LPAIF_RDMACURR_REG(v, i)) return true; for (i = 0; i < v->wrdma_channels; ++i) - if (reg == LPAIF_WRDMACURR_REG(v, i + v->wrdma_channel_start) || - reg == LPAIF_WRDMACTL_REG(v, i + v->wrdma_channel_start)) + if (reg == LPAIF_WRDMACURR_REG(v, i + v->wrdma_channel_start)) return true; return false; diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index 80b09dede5f9c..232deee6fde56 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -452,7 +452,6 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, unsigned int reg_irqclr = 0, val_irqclr = 0; unsigned int reg_irqen = 0, val_irqen = 0, val_mask = 0; unsigned int dai_id = cpu_dai->driver->id; - unsigned int dma_ctrl_reg = 0; ch = pcm_data->dma_ch; if (dir == SNDRV_PCM_STREAM_PLAYBACK) { @@ -469,17 +468,7 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, id = pcm_data->dma_ch - v->wrdma_channel_start; map = drvdata->lpaif_map; } - ret = regmap_read(map, LPAIF_DMACTL_REG(v, ch, dir, dai_id), &dma_ctrl_reg); - if (ret) { - dev_err(soc_runtime->dev, "error reading from rdmactl reg: %d\n", ret); - return ret; - } - if (dma_ctrl_reg == LPAIF_DMACTL_RESET_STATE || - dma_ctrl_reg == LPAIF_DMACTL_RESET_STATE + 1) { - dev_err(soc_runtime->dev, "error in rdmactl register state\n"); - return -ENOTRECOVERABLE; - } switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: @@ -500,7 +489,6 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, "error writing to rdmactl reg: %d\n", ret); return ret; } - map = drvdata->hdmiif_map; reg_irqclr = LPASS_HDMITX_APP_IRQCLEAR_REG(v); val_irqclr = (LPAIF_IRQ_ALL(ch) | LPAIF_IRQ_HDMI_REQ_ON_PRELOAD(ch) | @@ -519,7 +507,6 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, break; case MI2S_PRIMARY: case MI2S_SECONDARY: - map = drvdata->lpaif_map; reg_irqclr = LPAIF_IRQCLEAR_REG(v, LPAIF_IRQ_PORT_HOST); val_irqclr = LPAIF_IRQ_ALL(ch); @@ -563,7 +550,6 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, "error writing to rdmactl reg: %d\n", ret); return ret; } - map = drvdata->hdmiif_map; reg_irqen = LPASS_HDMITX_APP_IRQEN_REG(v); val_mask = (LPAIF_IRQ_ALL(ch) | LPAIF_IRQ_HDMI_REQ_ON_PRELOAD(ch) | @@ -573,7 +559,6 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, break; case MI2S_PRIMARY: case MI2S_SECONDARY: - map = drvdata->lpaif_map; reg_irqen = LPAIF_IRQEN_REG(v, LPAIF_IRQ_PORT_HOST); val_mask = LPAIF_IRQ_ALL(ch); val_irqen = 0; -- GitLab From 8d1bfc04c97407767559f6389a0f0fb060cbe25e Mon Sep 17 00:00:00 2001 From: Srinivasa Rao Mandadapu Date: Thu, 17 Dec 2020 13:38:34 +0530 Subject: [PATCH 0018/2012] ASoC: qcom: Add support for playback recover after resume To support playback continuation after hard suspend(bypass powerd) and resume do regcache sync with component driver pm ops. Signed-off-by: V Sujith Kumar Reddy Signed-off-by: Srinivasa Rao Mandadapu Reviewed-by: Srinivas Kandagatla Tested-by: Steev Klimaszewski Link: https://lore.kernel.org/r/1608192514-29695-3-git-send-email-srivasam@codeaurora.org Signed-off-by: Mark Brown --- sound/soc/qcom/lpass-platform.c | 35 +++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index 232deee6fde56..d1c248590f3ab 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -823,6 +823,39 @@ static void lpass_platform_pcm_free(struct snd_soc_component *component, } } +static int lpass_platform_pcmops_suspend(struct snd_soc_component *component) +{ + struct lpass_data *drvdata = snd_soc_component_get_drvdata(component); + struct regmap *map; + unsigned int dai_id = component->id; + + if (dai_id == LPASS_DP_RX) + map = drvdata->hdmiif_map; + else + map = drvdata->lpaif_map; + + regcache_cache_only(map, true); + regcache_mark_dirty(map); + + return 0; +} + +static int lpass_platform_pcmops_resume(struct snd_soc_component *component) +{ + struct lpass_data *drvdata = snd_soc_component_get_drvdata(component); + struct regmap *map; + unsigned int dai_id = component->id; + + if (dai_id == LPASS_DP_RX) + map = drvdata->hdmiif_map; + else + map = drvdata->lpaif_map; + + regcache_cache_only(map, false); + return regcache_sync(map); +} + + static const struct snd_soc_component_driver lpass_component_driver = { .name = DRV_NAME, .open = lpass_platform_pcmops_open, @@ -835,6 +868,8 @@ static const struct snd_soc_component_driver lpass_component_driver = { .mmap = lpass_platform_pcmops_mmap, .pcm_construct = lpass_platform_pcm_new, .pcm_destruct = lpass_platform_pcm_free, + .suspend = lpass_platform_pcmops_suspend, + .resume = lpass_platform_pcmops_resume, }; -- GitLab From 61c7dbec33777ade95d3db58beec8d7f177868c8 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 17 Dec 2020 11:28:49 +0900 Subject: [PATCH 0019/2012] ASoC: rsnd: don't call clk_disable_unprepare() if can't use We need to care clock accessibility, because we might can't use clock for some reasons. It sets clk_rate for each clocks when enabled. This means it doesn't have clk_rate if we can't use. We can avoid to call clk_disable_unprepare() in such case. Link: https://lore.kernel.org/r/CAMuHMdWvB+p=2JqTsO7bR8uJqKqO5A2XgXFXsVAjHk3hcxgcTw@mail.gmail.com Reported-by: Geert Uytterhoeven Signed-off-by: Kuninori Morimoto Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/87eejpgoi9.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/sh/rcar/adg.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c index b9aacf3d3b29c..abdfd9cf91e2a 100644 --- a/sound/soc/sh/rcar/adg.c +++ b/sound/soc/sh/rcar/adg.c @@ -366,25 +366,27 @@ void rsnd_adg_clk_control(struct rsnd_priv *priv, int enable) struct rsnd_adg *adg = rsnd_priv_to_adg(priv); struct device *dev = rsnd_priv_to_dev(priv); struct clk *clk; - int i, ret; + int i; for_each_rsnd_clk(clk, adg, i) { - ret = 0; if (enable) { - ret = clk_prepare_enable(clk); + int ret = clk_prepare_enable(clk); /* * We shouldn't use clk_get_rate() under * atomic context. Let's keep it when * rsnd_adg_clk_enable() was called */ - adg->clk_rate[i] = clk_get_rate(adg->clk[i]); + adg->clk_rate[i] = 0; + if (ret < 0) + dev_warn(dev, "can't use clk %d\n", i); + else + adg->clk_rate[i] = clk_get_rate(clk); } else { - clk_disable_unprepare(clk); + if (adg->clk_rate[i]) + clk_disable_unprepare(clk); + adg->clk_rate[i] = 0; } - - if (ret < 0) - dev_warn(dev, "can't use clk %d\n", i); } } -- GitLab From bb224c3e3e41d940612d4cc9573289cdbd5cb8f5 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Thu, 17 Dec 2020 11:54:01 +0100 Subject: [PATCH 0020/2012] ASoC: Intel: haswell: Add missing pm_ops haswell machine board is missing pm_ops what prevents it from undergoing suspend-resume procedure successfully. Assign default snd_soc_pm_ops so this is no longer the case. Signed-off-by: Cezary Rojewski Link: https://lore.kernel.org/r/20201217105401.27865-1-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/haswell.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/boards/haswell.c b/sound/soc/intel/boards/haswell.c index c55d1239e705b..c763bfeb1f38f 100644 --- a/sound/soc/intel/boards/haswell.c +++ b/sound/soc/intel/boards/haswell.c @@ -189,6 +189,7 @@ static struct platform_driver haswell_audio = { .probe = haswell_audio_probe, .driver = { .name = "haswell-audio", + .pm = &snd_soc_pm_ops, }, }; -- GitLab From 6108f990c0887d3e8f1db2d13c7012e40a061f28 Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Thu, 17 Dec 2020 16:56:51 +0800 Subject: [PATCH 0021/2012] ASoC: rt711: mutex between calibration and power state changes To avoid calibration time-out, this patch adds the mutex between calibration and power state changes Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20201217085651.24580-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt711.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/codecs/rt711.c b/sound/soc/codecs/rt711.c index 5771c02c34596..85f744184a60f 100644 --- a/sound/soc/codecs/rt711.c +++ b/sound/soc/codecs/rt711.c @@ -462,6 +462,8 @@ static int rt711_set_amp_gain_put(struct snd_kcontrol *kcontrol, unsigned int read_ll, read_rl; int i; + mutex_lock(&rt711->calibrate_mutex); + /* Can't use update bit function, so read the original value first */ addr_h = mc->reg; addr_l = mc->rreg; @@ -547,6 +549,8 @@ static int rt711_set_amp_gain_put(struct snd_kcontrol *kcontrol, if (dapm->bias_level <= SND_SOC_BIAS_STANDBY) regmap_write(rt711->regmap, RT711_SET_AUDIO_POWER_STATE, AC_PWRST_D3); + + mutex_unlock(&rt711->calibrate_mutex); return 0; } @@ -859,9 +863,11 @@ static int rt711_set_bias_level(struct snd_soc_component *component, break; case SND_SOC_BIAS_STANDBY: + mutex_lock(&rt711->calibrate_mutex); regmap_write(rt711->regmap, RT711_SET_AUDIO_POWER_STATE, AC_PWRST_D3); + mutex_unlock(&rt711->calibrate_mutex); break; default: -- GitLab From 349dd23931d1943b1083182e35715eba8b150fe1 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Thu, 17 Dec 2020 15:45:56 +0800 Subject: [PATCH 0022/2012] ASoC: max98373: don't access volatile registers in bias level off We will set regcache_cache_only true in suspend. As a result, regmap_read will return error when we try to read volatile registers in suspend. Besides, it doesn't make sense to read feedback data when codec is not active. To make userspace happy, this patch returns a cached value shich should be a valid value. Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20201217074556.32370-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/codecs/max98373-i2c.c | 20 +++++++++++++++++++ sound/soc/codecs/max98373-sdw.c | 20 +++++++++++++++++++ sound/soc/codecs/max98373.c | 34 ++++++++++++++++++++++++++++++--- sound/soc/codecs/max98373.h | 8 ++++++++ 4 files changed, 79 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/max98373-i2c.c b/sound/soc/codecs/max98373-i2c.c index 92921e34f9486..85f6865019d4a 100644 --- a/sound/soc/codecs/max98373-i2c.c +++ b/sound/soc/codecs/max98373-i2c.c @@ -19,6 +19,12 @@ #include #include "max98373.h" +static const u32 max98373_i2c_cache_reg[] = { + MAX98373_R2054_MEAS_ADC_PVDD_CH_READBACK, + MAX98373_R2055_MEAS_ADC_THERM_CH_READBACK, + MAX98373_R20B6_BDE_CUR_STATE_READBACK, +}; + static struct reg_default max98373_reg[] = { {MAX98373_R2000_SW_RESET, 0x00}, {MAX98373_R2001_INT_RAW1, 0x00}, @@ -472,6 +478,11 @@ static struct snd_soc_dai_driver max98373_dai[] = { static int max98373_suspend(struct device *dev) { struct max98373_priv *max98373 = dev_get_drvdata(dev); + int i; + + /* cache feedback register values before suspend */ + for (i = 0; i < max98373->cache_num; i++) + regmap_read(max98373->regmap, max98373->cache[i].reg, &max98373->cache[i].val); regcache_cache_only(max98373->regmap, true); regcache_mark_dirty(max98373->regmap); @@ -509,6 +520,7 @@ static int max98373_i2c_probe(struct i2c_client *i2c, { int ret = 0; int reg = 0; + int i; struct max98373_priv *max98373 = NULL; max98373 = devm_kzalloc(&i2c->dev, sizeof(*max98373), GFP_KERNEL); @@ -534,6 +546,14 @@ static int max98373_i2c_probe(struct i2c_client *i2c, return ret; } + max98373->cache_num = ARRAY_SIZE(max98373_i2c_cache_reg); + max98373->cache = devm_kcalloc(&i2c->dev, max98373->cache_num, + sizeof(*max98373->cache), + GFP_KERNEL); + + for (i = 0; i < max98373->cache_num; i++) + max98373->cache[i].reg = max98373_i2c_cache_reg[i]; + /* voltage/current slot & gpio configuration */ max98373_slot_config(&i2c->dev, max98373); diff --git a/sound/soc/codecs/max98373-sdw.c b/sound/soc/codecs/max98373-sdw.c index ec2e79c573577..b8d471d79e939 100644 --- a/sound/soc/codecs/max98373-sdw.c +++ b/sound/soc/codecs/max98373-sdw.c @@ -23,6 +23,12 @@ struct sdw_stream_data { struct sdw_stream_runtime *sdw_stream; }; +static const u32 max98373_sdw_cache_reg[] = { + MAX98373_R2054_MEAS_ADC_PVDD_CH_READBACK, + MAX98373_R2055_MEAS_ADC_THERM_CH_READBACK, + MAX98373_R20B6_BDE_CUR_STATE_READBACK, +}; + static struct reg_default max98373_reg[] = { {MAX98373_R0040_SCP_INIT_STAT_1, 0x00}, {MAX98373_R0041_SCP_INIT_MASK_1, 0x00}, @@ -245,6 +251,11 @@ static const struct regmap_config max98373_sdw_regmap = { static __maybe_unused int max98373_suspend(struct device *dev) { struct max98373_priv *max98373 = dev_get_drvdata(dev); + int i; + + /* cache feedback register values before suspend */ + for (i = 0; i < max98373->cache_num; i++) + regmap_read(max98373->regmap, max98373->cache[i].reg, &max98373->cache[i].val); regcache_cache_only(max98373->regmap, true); @@ -757,6 +768,7 @@ static int max98373_init(struct sdw_slave *slave, struct regmap *regmap) { struct max98373_priv *max98373; int ret; + int i; struct device *dev = &slave->dev; /* Allocate and assign private driver data structure */ @@ -768,6 +780,14 @@ static int max98373_init(struct sdw_slave *slave, struct regmap *regmap) max98373->regmap = regmap; max98373->slave = slave; + max98373->cache_num = ARRAY_SIZE(max98373_sdw_cache_reg); + max98373->cache = devm_kcalloc(dev, max98373->cache_num, + sizeof(*max98373->cache), + GFP_KERNEL); + + for (i = 0; i < max98373->cache_num; i++) + max98373->cache[i].reg = max98373_sdw_cache_reg[i]; + /* Read voltage and slot configuration */ max98373_slot_config(dev, max98373); diff --git a/sound/soc/codecs/max98373.c b/sound/soc/codecs/max98373.c index 929bb1798c43f..31d571d4fac1c 100644 --- a/sound/soc/codecs/max98373.c +++ b/sound/soc/codecs/max98373.c @@ -168,6 +168,31 @@ static SOC_ENUM_SINGLE_DECL(max98373_adc_samplerate_enum, MAX98373_R2051_MEAS_ADC_SAMPLING_RATE, 0, max98373_ADC_samplerate_text); +static int max98373_feedback_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); + struct soc_mixer_control *mc = + (struct soc_mixer_control *)kcontrol->private_value; + struct max98373_priv *max98373 = snd_soc_component_get_drvdata(component); + int i; + + if (snd_soc_component_get_bias_level(component) == SND_SOC_BIAS_OFF) { + /* + * Register values will be cached before suspend. The cached value + * will be a valid value and userspace will happy with that. + */ + for (i = 0; i < max98373->cache_num; i++) { + if (mc->reg == max98373->cache[i].reg) { + ucontrol->value.integer.value[0] = max98373->cache[i].val; + return 0; + } + } + } + + return snd_soc_put_volsw(kcontrol, ucontrol); +} + static const struct snd_kcontrol_new max98373_snd_controls[] = { SOC_SINGLE("Digital Vol Sel Switch", MAX98373_R203F_AMP_DSP_CFG, MAX98373_AMP_VOL_SEL_SHIFT, 1, 0), @@ -209,8 +234,10 @@ SOC_SINGLE("ADC PVDD FLT Switch", MAX98373_R2052_MEAS_ADC_PVDD_FLT_CFG, MAX98373_FLT_EN_SHIFT, 1, 0), SOC_SINGLE("ADC TEMP FLT Switch", MAX98373_R2053_MEAS_ADC_THERM_FLT_CFG, MAX98373_FLT_EN_SHIFT, 1, 0), -SOC_SINGLE("ADC PVDD", MAX98373_R2054_MEAS_ADC_PVDD_CH_READBACK, 0, 0xFF, 0), -SOC_SINGLE("ADC TEMP", MAX98373_R2055_MEAS_ADC_THERM_CH_READBACK, 0, 0xFF, 0), +SOC_SINGLE_EXT("ADC PVDD", MAX98373_R2054_MEAS_ADC_PVDD_CH_READBACK, 0, 0xFF, 0, + max98373_feedback_get, NULL), +SOC_SINGLE_EXT("ADC TEMP", MAX98373_R2055_MEAS_ADC_THERM_CH_READBACK, 0, 0xFF, 0, + max98373_feedback_get, NULL), SOC_SINGLE("ADC PVDD FLT Coeff", MAX98373_R2052_MEAS_ADC_PVDD_FLT_CFG, 0, 0x3, 0), SOC_SINGLE("ADC TEMP FLT Coeff", MAX98373_R2053_MEAS_ADC_THERM_FLT_CFG, @@ -226,7 +253,8 @@ SOC_SINGLE("BDE LVL1 Thresh", MAX98373_R2097_BDE_L1_THRESH, 0, 0xFF, 0), SOC_SINGLE("BDE LVL2 Thresh", MAX98373_R2098_BDE_L2_THRESH, 0, 0xFF, 0), SOC_SINGLE("BDE LVL3 Thresh", MAX98373_R2099_BDE_L3_THRESH, 0, 0xFF, 0), SOC_SINGLE("BDE LVL4 Thresh", MAX98373_R209A_BDE_L4_THRESH, 0, 0xFF, 0), -SOC_SINGLE("BDE Active Level", MAX98373_R20B6_BDE_CUR_STATE_READBACK, 0, 8, 0), +SOC_SINGLE_EXT("BDE Active Level", MAX98373_R20B6_BDE_CUR_STATE_READBACK, 0, 8, 0, + max98373_feedback_get, NULL), SOC_SINGLE("BDE Clip Mode Switch", MAX98373_R2092_BDE_CLIPPER_MODE, 0, 1, 0), SOC_SINGLE("BDE Thresh Hysteresis", MAX98373_R209B_BDE_THRESH_HYST, 0, 0xFF, 0), SOC_SINGLE("BDE Hold Time", MAX98373_R2090_BDE_LVL_HOLD, 0, 0xFF, 0), diff --git a/sound/soc/codecs/max98373.h b/sound/soc/codecs/max98373.h index 4ab29b9d51c74..71f5a5228f34b 100644 --- a/sound/soc/codecs/max98373.h +++ b/sound/soc/codecs/max98373.h @@ -203,6 +203,11 @@ /* MAX98373_R2000_SW_RESET */ #define MAX98373_SOFT_RESET (0x1 << 0) +struct max98373_cache { + u32 reg; + u32 val; +}; + struct max98373_priv { struct regmap *regmap; int reset_gpio; @@ -212,6 +217,9 @@ struct max98373_priv { bool interleave_mode; unsigned int ch_size; bool tdm_mode; + /* cache for reading a valid fake feedback value */ + struct max98373_cache *cache; + int cache_num; /* variables to support soundwire */ struct sdw_slave *slave; bool hw_init; -- GitLab From a84dfb3d55934253de6aed38ad75990278a2d21e Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 17 Dec 2020 16:08:34 +0100 Subject: [PATCH 0023/2012] ASoC: meson: axg-tdmin: fix axg skew offset The signal captured on from tdm decoder of the AXG SoC is incorrect. It appears amplified. The skew offset of the decoder is wrong. Setting the skew offset to 3, like the g12 and sm1 SoCs, solves and gives correct data. Fixes: 13a22e6a98f8 ("ASoC: meson: add tdm input driver") Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201217150834.3247526-1-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-tdmin.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/sound/soc/meson/axg-tdmin.c b/sound/soc/meson/axg-tdmin.c index 88ed95ae886bb..b4faf9d5c1aad 100644 --- a/sound/soc/meson/axg-tdmin.c +++ b/sound/soc/meson/axg-tdmin.c @@ -224,15 +224,6 @@ static const struct axg_tdm_formatter_ops axg_tdmin_ops = { }; static const struct axg_tdm_formatter_driver axg_tdmin_drv = { - .component_drv = &axg_tdmin_component_drv, - .regmap_cfg = &axg_tdmin_regmap_cfg, - .ops = &axg_tdmin_ops, - .quirks = &(const struct axg_tdm_formatter_hw) { - .skew_offset = 2, - }, -}; - -static const struct axg_tdm_formatter_driver g12a_tdmin_drv = { .component_drv = &axg_tdmin_component_drv, .regmap_cfg = &axg_tdmin_regmap_cfg, .ops = &axg_tdmin_ops, @@ -247,10 +238,10 @@ static const struct of_device_id axg_tdmin_of_match[] = { .data = &axg_tdmin_drv, }, { .compatible = "amlogic,g12a-tdmin", - .data = &g12a_tdmin_drv, + .data = &axg_tdmin_drv, }, { .compatible = "amlogic,sm1-tdmin", - .data = &g12a_tdmin_drv, + .data = &axg_tdmin_drv, }, {} }; MODULE_DEVICE_TABLE(of, axg_tdmin_of_match); -- GitLab From 671ee4db952449acde126965bf76817a3159040d Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 17 Dec 2020 16:08:12 +0100 Subject: [PATCH 0024/2012] ASoC: meson: axg-tdm-interface: fix loopback When the axg-tdm-interface was introduced, the backend DAI was marked as an endpoint when DPCM was walking the DAPM graph to find a its BE. It is no longer the case since this commit 8dd26dff00c0 ("ASoC: dapm: Fix handling of custom_stop_condition on DAPM graph walks") Because of this, when DPCM finds a BE it does everything it needs on the DAIs but it won't power up the widgets between the FE and the BE if there is no actual endpoint after the BE. On meson-axg HWs, the loopback is a special DAI of the tdm-interface BE. It is only linked to the dummy codec since there no actual HW after it. >From the DAPM perspective, the DAI has no endpoint. Because of this, the TDM decoder, which is a widget between the FE and BE is not powered up. >From the user perspective, everything seems fine but no data is produced. Connecting the Loopback DAI to a dummy DAPM endpoint solves the problem. Fixes: 8dd26dff00c0 ("ASoC: dapm: Fix handling of custom_stop_condition on DAPM graph walks") Cc: Charles Keepax Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20201217150812.3247405-1-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/meson/axg-tdm-interface.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sound/soc/meson/axg-tdm-interface.c b/sound/soc/meson/axg-tdm-interface.c index c8664ab80d45a..87cac440b3693 100644 --- a/sound/soc/meson/axg-tdm-interface.c +++ b/sound/soc/meson/axg-tdm-interface.c @@ -467,8 +467,20 @@ static int axg_tdm_iface_set_bias_level(struct snd_soc_component *component, return ret; } +static const struct snd_soc_dapm_widget axg_tdm_iface_dapm_widgets[] = { + SND_SOC_DAPM_SIGGEN("Playback Signal"), +}; + +static const struct snd_soc_dapm_route axg_tdm_iface_dapm_routes[] = { + { "Loopback", NULL, "Playback Signal" }, +}; + static const struct snd_soc_component_driver axg_tdm_iface_component_drv = { - .set_bias_level = axg_tdm_iface_set_bias_level, + .dapm_widgets = axg_tdm_iface_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(axg_tdm_iface_dapm_widgets), + .dapm_routes = axg_tdm_iface_dapm_routes, + .num_dapm_routes = ARRAY_SIZE(axg_tdm_iface_dapm_routes), + .set_bias_level = axg_tdm_iface_set_bias_level, }; static const struct of_device_id axg_tdm_iface_of_match[] = { -- GitLab From 72d78717c6d06adf65d2e3dccc96d9e9dc978593 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 11 Dec 2020 12:26:14 -0500 Subject: [PATCH 0025/2012] nfsd: Fixes for nfsd4_encode_read_plus_data() Ensure that we encode the data payload + padding, and that we truncate the preallocated buffer to the actual read size. Fixes: 528b84934eb9 ("NFSD: Add READ_PLUS data support") Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 45ee6b12ce5b7..5dacc673ef173 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4756,6 +4756,7 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof); if (nfserr) return nfserr; + xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount)); tmp = htonl(NFS4_CONTENT_DATA); write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); @@ -4763,6 +4764,10 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); tmp = htonl(*maxcount); write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); + + tmp = xdr_zero; + write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp, + xdr_pad_size(*maxcount)); return nfs_ok; } -- GitLab From b68f0cbd3f95f2df81e525c310a41fc73c2ed0d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 11 Dec 2020 12:26:15 -0500 Subject: [PATCH 0026/2012] nfsd: Don't set eof on a truncated READ_PLUS If the READ_PLUS operation was truncated due to an error, then ensure we clear the 'eof' flag. Fixes: 9f0b5792f07d ("NFSD: Encode a full READ_PLUS reply") Signed-off-by: Trond Myklebust Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5dacc673ef173..20178f60f6121 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4860,14 +4860,15 @@ out: if (nfserr && segments == 0) xdr_truncate_encode(xdr, starting_len); else { - tmp = htonl(eof); - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); - tmp = htonl(segments); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); if (nfserr) { xdr_truncate_encode(xdr, last_segment); nfserr = nfs_ok; + eof = 0; } + tmp = htonl(eof); + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); + tmp = htonl(segments); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); } return nfserr; -- GitLab From d6c9e4368cc6a61bf25c9c72437ced509c854563 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 18 Dec 2020 12:28:23 -0500 Subject: [PATCH 0027/2012] NFSD: Fix sparse warning in nfssvc.c fs/nfsd/nfssvc.c:36:6: warning: symbol 'inter_copy_offload_enable' was not declared. Should it be static? The parameter was added by commit ce0887ac96d3 ("NFSD add nfs4 inter ssc to nfsd4_copy"). Relocate it into the source file that uses it, and make it static. This approach is similar to the nfs4_disable_idmapping, cltrack_prog, and cltrack_legacy_disable module parameters. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 5 +++++ fs/nfsd/nfssvc.c | 6 ------ fs/nfsd/xdr4.h | 1 - 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 4727b7f03c5bb..8d6d2678abade 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -50,6 +50,11 @@ #include "pnfs.h" #include "trace.h" +static bool inter_copy_offload_enable; +module_param(inter_copy_offload_enable, bool, 0644); +MODULE_PARM_DESC(inter_copy_offload_enable, + "Enable inter server to server copy offload. Default: false"); + #ifdef CONFIG_NFSD_V4_SECURITY_LABEL #include diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 00384c332f9bb..f9c9f4c63cc77 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -33,12 +33,6 @@ #define NFSDDBG_FACILITY NFSDDBG_SVC -bool inter_copy_offload_enable; -EXPORT_SYMBOL_GPL(inter_copy_offload_enable); -module_param(inter_copy_offload_enable, bool, 0644); -MODULE_PARM_DESC(inter_copy_offload_enable, - "Enable inter server to server copy offload. Default: false"); - extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index a60ff5ce1a375..c300885ae75dd 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -568,7 +568,6 @@ struct nfsd4_copy { struct nfs_fh c_fh; nfs4_stateid stateid; }; -extern bool inter_copy_offload_enable; struct nfsd4_seek { /* request */ -- GitLab From 4a85a6a3320b4a622315d2e0ea91a1d2b013bce4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 18 Dec 2020 12:28:41 -0500 Subject: [PATCH 0028/2012] SUNRPC: Handle TCP socket sends with kernel_sendpage() again Daire Byrne reports a ~50% aggregrate throughput regression on his Linux NFS server after commit da1661b93bf4 ("SUNRPC: Teach server to use xprt_sock_sendmsg for socket sends"), which replaced kernel_send_page() calls in NFSD's socket send path with calls to sock_sendmsg() using iov_iter. Investigation showed that tcp_sendmsg() was not using zero-copy to send the xdr_buf's bvec pages, but instead was relying on memcpy. This means copying every byte of a large NFS READ payload. It looks like TLS sockets do indeed support a ->sendpage method, so it's really not necessary to use xprt_sock_sendmsg() to support TLS fully on the server. A mechanical reversion of da1661b93bf4 is not possible at this point, but we can re-implement the server's TCP socket sendmsg path using kernel_sendpage(). Reported-by: Daire Byrne BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=209439 Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 86 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b248f2349437d..c9766d07eb81a 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1062,6 +1062,90 @@ err_noclose: return 0; /* record not complete */ } +static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec, + int flags) +{ + return kernel_sendpage(sock, virt_to_page(vec->iov_base), + offset_in_page(vec->iov_base), + vec->iov_len, flags); +} + +/* + * kernel_sendpage() is used exclusively to reduce the number of + * copy operations in this path. Therefore the caller must ensure + * that the pages backing @xdr are unchanging. + * + * In addition, the logic assumes that * .bv_len is never larger + * than PAGE_SIZE. + */ +static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg, + struct xdr_buf *xdr, rpc_fraghdr marker, + unsigned int *sentp) +{ + const struct kvec *head = xdr->head; + const struct kvec *tail = xdr->tail; + struct kvec rm = { + .iov_base = &marker, + .iov_len = sizeof(marker), + }; + int flags, ret; + + *sentp = 0; + xdr_alloc_bvec(xdr, GFP_KERNEL); + + msg->msg_flags = MSG_MORE; + ret = kernel_sendmsg(sock, msg, &rm, 1, rm.iov_len); + if (ret < 0) + return ret; + *sentp += ret; + if (ret != rm.iov_len) + return -EAGAIN; + + flags = head->iov_len < xdr->len ? MSG_MORE | MSG_SENDPAGE_NOTLAST : 0; + ret = svc_tcp_send_kvec(sock, head, flags); + if (ret < 0) + return ret; + *sentp += ret; + if (ret != head->iov_len) + goto out; + + if (xdr->page_len) { + unsigned int offset, len, remaining; + struct bio_vec *bvec; + + bvec = xdr->bvec; + offset = xdr->page_base; + remaining = xdr->page_len; + flags = MSG_MORE | MSG_SENDPAGE_NOTLAST; + while (remaining > 0) { + if (remaining <= PAGE_SIZE && tail->iov_len == 0) + flags = 0; + len = min(remaining, bvec->bv_len); + ret = kernel_sendpage(sock, bvec->bv_page, + bvec->bv_offset + offset, + len, flags); + if (ret < 0) + return ret; + *sentp += ret; + if (ret != len) + goto out; + remaining -= len; + offset = 0; + bvec++; + } + } + + if (tail->iov_len) { + ret = svc_tcp_send_kvec(sock, tail, 0); + if (ret < 0) + return ret; + *sentp += ret; + } + +out: + return 0; +} + /** * svc_tcp_sendto - Send out a reply on a TCP socket * @rqstp: completed svc_rqst @@ -1089,7 +1173,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) mutex_lock(&xprt->xpt_mutex); if (svc_xprt_is_dead(xprt)) goto out_notconn; - err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); + err = svc_tcp_sendmsg(svsk->sk_sock, &msg, xdr, marker, &sent); xdr_free_bvec(xdr); trace_svcsock_tcp_send(xprt, err < 0 ? err : sent); if (err < 0 || sent != (xdr->len + sizeof(marker))) -- GitLab From 7b723008f9c95624c848fad661c01b06e47b20da Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 18 Dec 2020 12:28:58 -0500 Subject: [PATCH 0029/2012] NFSD: Restore NFSv4 decoding's SAVEMEM functionality While converting the NFSv4 decoder to use xdr_stream-based XDR processing, I removed the old SAVEMEM() macro. This macro wrapped a bit of logic that avoided a memory allocation by recognizing when the decoded item resides in a linear section of the Receive buffer. In that case, it returned a pointer into that buffer instead of allocating a bounce buffer. The bounce buffer is necessary only when xdr_inline_decode() has placed the decoded item in the xdr_stream's scratch buffer, which disappears the next time xdr_inline_decode() is called with that xdr_stream. That happens only if the data item crosses a page boundary in the receive buffer, an exceedingly rare occurrence. Allocating a bounce buffer every time results in a minor performance regression that was introduced by the recent NFSv4 decoder overhaul. Let's restore the previous behavior. On average, it saves about 1.5 kmalloc() calls per COMPOUND. Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 20178f60f6121..eaaa1605b5b5f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -147,6 +147,25 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) return p; } +static void * +svcxdr_savemem(struct nfsd4_compoundargs *argp, __be32 *p, u32 len) +{ + __be32 *tmp; + + /* + * The location of the decoded data item is stable, + * so @p is OK to use. This is the common case. + */ + if (p != argp->xdr->scratch.iov_base) + return p; + + tmp = svcxdr_tmpalloc(argp, len); + if (!tmp) + return NULL; + memcpy(tmp, p, len); + return tmp; +} + /* * NFSv4 basic data type decoders */ @@ -183,11 +202,10 @@ nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) p = xdr_inline_decode(argp->xdr, len); if (!p) return nfserr_bad_xdr; - o->data = svcxdr_tmpalloc(argp, len); + o->data = svcxdr_savemem(argp, p, len); if (!o->data) return nfserr_jukebox; o->len = len; - memcpy(o->data, p, len); return nfs_ok; } @@ -205,10 +223,9 @@ nfsd4_decode_component4(struct nfsd4_compoundargs *argp, char **namp, u32 *lenp) status = check_filename((char *)p, *lenp); if (status) return status; - *namp = svcxdr_tmpalloc(argp, *lenp); + *namp = svcxdr_savemem(argp, p, *lenp); if (!*namp) return nfserr_jukebox; - memcpy(*namp, p, *lenp); return nfs_ok; } @@ -1200,10 +1217,9 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) p = xdr_inline_decode(argp->xdr, putfh->pf_fhlen); if (!p) return nfserr_bad_xdr; - putfh->pf_fhval = svcxdr_tmpalloc(argp, putfh->pf_fhlen); + putfh->pf_fhval = svcxdr_savemem(argp, p, putfh->pf_fhlen); if (!putfh->pf_fhval) return nfserr_jukebox; - memcpy(putfh->pf_fhval, p, putfh->pf_fhlen); return nfs_ok; } @@ -1318,24 +1334,20 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient p = xdr_inline_decode(argp->xdr, setclientid->se_callback_netid_len); if (!p) return nfserr_bad_xdr; - setclientid->se_callback_netid_val = svcxdr_tmpalloc(argp, + setclientid->se_callback_netid_val = svcxdr_savemem(argp, p, setclientid->se_callback_netid_len); if (!setclientid->se_callback_netid_val) return nfserr_jukebox; - memcpy(setclientid->se_callback_netid_val, p, - setclientid->se_callback_netid_len); if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_addr_len) < 0) return nfserr_bad_xdr; p = xdr_inline_decode(argp->xdr, setclientid->se_callback_addr_len); if (!p) return nfserr_bad_xdr; - setclientid->se_callback_addr_val = svcxdr_tmpalloc(argp, + setclientid->se_callback_addr_val = svcxdr_savemem(argp, p, setclientid->se_callback_addr_len); if (!setclientid->se_callback_addr_val) return nfserr_jukebox; - memcpy(setclientid->se_callback_addr_val, p, - setclientid->se_callback_addr_len); if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_ident) < 0) return nfserr_bad_xdr; @@ -1375,10 +1387,9 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify p = xdr_inline_decode(argp->xdr, verify->ve_attrlen); if (!p) return nfserr_bad_xdr; - verify->ve_attrval = svcxdr_tmpalloc(argp, verify->ve_attrlen); + verify->ve_attrval = svcxdr_savemem(argp, p, verify->ve_attrlen); if (!verify->ve_attrval) return nfserr_jukebox; - memcpy(verify->ve_attrval, p, verify->ve_attrlen); return nfs_ok; } @@ -2333,10 +2344,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) p = xdr_inline_decode(argp->xdr, argp->taglen); if (!p) return 0; - argp->tag = svcxdr_tmpalloc(argp, argp->taglen); + argp->tag = svcxdr_savemem(argp, p, argp->taglen); if (!argp->tag) return 0; - memcpy(argp->tag, p, argp->taglen); max_reply += xdr_align_size(argp->taglen); } -- GitLab From 06fde695ee76429634c1e8c8c1154035aa61191e Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Fri, 18 Dec 2020 14:00:39 +0800 Subject: [PATCH 0030/2012] genirq/msi: Initialize msi_alloc_info before calling msi_domain_prepare_irqs() Since commit 5fe71d271df8 ("irqchip/gic-v3-its: Tag ITS device as shared if allocating for a proxy device"), some of the devices are wrongly marked as "shared" by the ITS driver on systems equipped with the ITS(es). The problem is that the @info->flags may not be initialized anywhere and we end up looking at random bits on the stack. That's obviously not good. We can perform the initialization in the IRQ core layer before calling msi_domain_prepare_irqs(), which is neat enough. Fixes: 5fe71d271df8 ("irqchip/gic-v3-its: Tag ITS device as shared if allocating for a proxy device") Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201218060039.1770-1-yuzenghui@huawei.com --- kernel/irq/msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 2c0c4d6d0f83a..dc0e2d7fbdfd9 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -402,7 +402,7 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, struct msi_domain_ops *ops = info->ops; struct irq_data *irq_data; struct msi_desc *desc; - msi_alloc_info_t arg; + msi_alloc_info_t arg = { }; int i, ret, virq; bool can_reserve; -- GitLab From e90f55e0196a66f8e9e445f7f33f876dd889be9a Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Mon, 14 Dec 2020 21:35:30 +0800 Subject: [PATCH 0031/2012] irqchip/irq-sl28cpld: Convert comma to semicolon Replace a comma between expression statements by a semicolon. Signed-off-by: Zheng Yongjun Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201214133530.3783-1-zhengyongjun3@huawei.com --- drivers/irqchip/irq-sl28cpld.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-sl28cpld.c b/drivers/irqchip/irq-sl28cpld.c index 0aa50d025ef6c..fbb354413ffa1 100644 --- a/drivers/irqchip/irq-sl28cpld.c +++ b/drivers/irqchip/irq-sl28cpld.c @@ -66,7 +66,7 @@ static int sl28cpld_intc_probe(struct platform_device *pdev) irqchip->chip.num_regs = 1; irqchip->chip.status_base = base + INTC_IP; irqchip->chip.mask_base = base + INTC_IE; - irqchip->chip.mask_invert = true, + irqchip->chip.mask_invert = true; irqchip->chip.ack_base = base + INTC_IP; return devm_regmap_add_irq_chip_fwnode(dev, dev_fwnode(dev), -- GitLab From d7f39c40ebb6986e7371510d1c20a4efee4a7f0d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Dec 2020 18:03:46 +0000 Subject: [PATCH 0032/2012] irqchip/bcm2836: Fix IPI acknowledgement after conversion to handle_percpu_devid_irq It appears that despite its name, the bcm2836_arm_irqchip_ipi_eoi() callback is an acknowledgement, and not an EOI. This means that we lose IPIs that are made pending between the handling of the IPI and the write to LOCAL_MAILBOX0_CLR0. With the right timing, things fail nicely. This used to work with handle_percpu_devid_fasteoi_ipi(), which started by eoi-ing the interrupt. With the standard fasteoi flow, this doesn't work anymore. So let's use this callback for what it is, an ack. Your favourite RPi-2/3 is back up and running. Fixes: ffdad793d579 ("irqchip/bcm2836: Make IPIs use handle_percpu_devid_irq()") Cc: Valentin Schneider Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/c9fb4ab3-a5cb-648c-6de3-c6a871e60870@roeck-us.net --- drivers/irqchip/irq-bcm2836.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c index 5f5eb8877c413..25c9a9c06e410 100644 --- a/drivers/irqchip/irq-bcm2836.c +++ b/drivers/irqchip/irq-bcm2836.c @@ -167,7 +167,7 @@ static void bcm2836_arm_irqchip_handle_ipi(struct irq_desc *desc) chained_irq_exit(chip, desc); } -static void bcm2836_arm_irqchip_ipi_eoi(struct irq_data *d) +static void bcm2836_arm_irqchip_ipi_ack(struct irq_data *d) { int cpu = smp_processor_id(); @@ -195,7 +195,7 @@ static struct irq_chip bcm2836_arm_irqchip_ipi = { .name = "IPI", .irq_mask = bcm2836_arm_irqchip_dummy_op, .irq_unmask = bcm2836_arm_irqchip_dummy_op, - .irq_eoi = bcm2836_arm_irqchip_ipi_eoi, + .irq_ack = bcm2836_arm_irqchip_ipi_ack, .ipi_send_mask = bcm2836_arm_irqchip_ipi_send_mask, }; -- GitLab From 56ce7c25ae1525d83cf80a880cf506ead1914250 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Mon, 14 Dec 2020 15:38:32 +0200 Subject: [PATCH 0033/2012] xfrm: Fix oops in xfrm_replay_advance_bmp When setting xfrm replay_window to values higher than 32, a rare page-fault occurs in xfrm_replay_advance_bmp: BUG: unable to handle page fault for address: ffff8af350ad7920 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD ad001067 P4D ad001067 PUD 0 Oops: 0002 [#1] SMP PTI CPU: 3 PID: 30 Comm: ksoftirqd/3 Kdump: loaded Not tainted 5.4.52-050452-generic #202007160732 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 RIP: 0010:xfrm_replay_advance_bmp+0xbb/0x130 RSP: 0018:ffffa1304013ba40 EFLAGS: 00010206 RAX: 000000000000010d RBX: 0000000000000002 RCX: 00000000ffffff4b RDX: 0000000000000018 RSI: 00000000004c234c RDI: 00000000ffb3dbff RBP: ffffa1304013ba50 R08: ffff8af330ad7920 R09: 0000000007fffffa R10: 0000000000000800 R11: 0000000000000010 R12: ffff8af29d6258c0 R13: ffff8af28b95c700 R14: 0000000000000000 R15: ffff8af29d6258fc FS: 0000000000000000(0000) GS:ffff8af339ac0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff8af350ad7920 CR3: 0000000015ee4000 CR4: 00000000001406e0 Call Trace: xfrm_input+0x4e5/0xa10 xfrm4_rcv_encap+0xb5/0xe0 xfrm4_udp_encap_rcv+0x140/0x1c0 Analysis revealed offending code is when accessing: replay_esn->bmp[nr] |= (1U << bitnr); with 'nr' being 0x07fffffa. This happened in an SMP system when reordering of packets was present; A packet arrived with a "too old" sequence number (outside the window, i.e 'diff > replay_window'), and therefore the following calculation: bitnr = replay_esn->replay_window - (diff - pos); yields a negative result, but since bitnr is u32 we get a large unsigned quantity (in crash dump above: 0xffffff4b seen in ecx). This was supposed to be protected by xfrm_input()'s former call to: if (x->repl->check(x, skb, seq)) { However, the state's spinlock x->lock is *released* after '->check()' is performed, and gets re-acquired before '->advance()' - which gives a chance for a different core to update the xfrm state, e.g. by advancing 'replay_esn->seq' when it encounters more packets - leading to a 'diff > replay_window' situation when original core continues to xfrm_replay_advance_bmp(). An attempt to fix this issue was suggested in commit bcf66bf54aab ("xfrm: Perform a replay check after return from async codepaths"), by calling 'x->repl->recheck()' after lock is re-acquired, but fix applied only to asyncronous crypto algorithms. Augment the fix, by *always* calling 'recheck()' - irrespective if we're using async crypto. Fixes: 0ebea8ef3559 ("[IPSEC]: Move state lock into x->type->input") Signed-off-by: Shmulik Ladkani Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 37456d022cfa3..61e6220ddd5ae 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -660,7 +660,7 @@ resume: /* only the first xfrm gets the encap type */ encap_type = 0; - if (async && x->repl->recheck(x, skb, seq)) { + if (x->repl->recheck(x, skb, seq)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } -- GitLab From 74a2921948ed8c0e7f079a98442ec3493168cc85 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 2 Dec 2020 18:36:57 +0800 Subject: [PATCH 0034/2012] scsi: hisi_sas: Expose HW queues for v2 hw As a performance enhancement, make the completion queue interrupts managed. In addition, in commit bf0beec0607d ("blk-mq: drain I/O when all CPUs in a hctx are offline"), CPU hotplug for MQ devices using managed interrupts is made safe. So expose HW queues to blk-mq to take advantage of this. Flag Scsi_host.host_tagset is also set to ensure that the HBA is not sent more commands than it can handle. However the driver still does not use request tag for IPTT as there are many HW bugs means that special rules apply for IPTT allocation. Link: https://lore.kernel.org/r/1606905417-183214-6-git-send-email-john.garry@huawei.com Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas.h | 4 ++ drivers/scsi/hisi_sas/hisi_sas_main.c | 11 +++++ drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 66 +++++++++++++++++++++----- 3 files changed, 68 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index a25cfc11c96de..aa67807c56938 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -312,6 +313,7 @@ enum { struct hisi_sas_hw { int (*hw_init)(struct hisi_hba *hisi_hba); + int (*interrupt_preinit)(struct hisi_hba *hisi_hba); void (*setup_itct)(struct hisi_hba *hisi_hba, struct hisi_sas_device *device); int (*slot_index_alloc)(struct hisi_hba *hisi_hba, @@ -418,6 +420,8 @@ struct hisi_hba { u32 refclk_frequency_mhz; u8 sas_addr[SAS_ADDR_SIZE]; + int *irq_map; /* v2 hw */ + int n_phy; spinlock_t lock; struct semaphore sem; diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 274ccf18ce2db..061d65b8a2878 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -2620,6 +2620,13 @@ err_out: return NULL; } +static int hisi_sas_interrupt_preinit(struct hisi_hba *hisi_hba) +{ + if (hisi_hba->hw->interrupt_preinit) + return hisi_hba->hw->interrupt_preinit(hisi_hba); + return 0; +} + int hisi_sas_probe(struct platform_device *pdev, const struct hisi_sas_hw *hw) { @@ -2677,6 +2684,10 @@ int hisi_sas_probe(struct platform_device *pdev, sha->sas_port[i] = &hisi_hba->port[i].sas_port; } + rc = hisi_sas_interrupt_preinit(hisi_hba); + if (rc) + goto err_out_ha; + rc = scsi_add_host(shost, &pdev->dev); if (rc) goto err_out_ha; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index b57177b52facc..9adfdefef9cad 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -3302,6 +3302,28 @@ static irq_handler_t fatal_interrupts[HISI_SAS_FATAL_INT_NR] = { fatal_axi_int_v2_hw }; +#define CQ0_IRQ_INDEX (96) + +static int hisi_sas_v2_interrupt_preinit(struct hisi_hba *hisi_hba) +{ + struct platform_device *pdev = hisi_hba->platform_dev; + struct Scsi_Host *shost = hisi_hba->shost; + struct irq_affinity desc = { + .pre_vectors = CQ0_IRQ_INDEX, + .post_vectors = 16, + }; + int resv = desc.pre_vectors + desc.post_vectors, minvec = resv + 1, nvec; + + nvec = devm_platform_get_irqs_affinity(pdev, &desc, minvec, 128, + &hisi_hba->irq_map); + if (nvec < 0) + return nvec; + + shost->nr_hw_queues = hisi_hba->cq_nvecs = nvec - resv; + + return 0; +} + /* * There is a limitation in the hip06 chipset that we need * to map in all mbigen interrupts, even if they are not used. @@ -3310,14 +3332,11 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba) { struct platform_device *pdev = hisi_hba->platform_dev; struct device *dev = &pdev->dev; - int irq, rc = 0, irq_map[128]; + int irq, rc = 0; int i, phy_no, fatal_no, queue_no; - for (i = 0; i < 128; i++) - irq_map[i] = platform_get_irq(pdev, i); - for (i = 0; i < HISI_SAS_PHY_INT_NR; i++) { - irq = irq_map[i + 1]; /* Phy up/down is irq1 */ + irq = hisi_hba->irq_map[i + 1]; /* Phy up/down is irq1 */ rc = devm_request_irq(dev, irq, phy_interrupts[i], 0, DRV_NAME " phy", hisi_hba); if (rc) { @@ -3331,7 +3350,7 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba) for (phy_no = 0; phy_no < hisi_hba->n_phy; phy_no++) { struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no]; - irq = irq_map[phy_no + 72]; + irq = hisi_hba->irq_map[phy_no + 72]; rc = devm_request_irq(dev, irq, sata_int_v2_hw, 0, DRV_NAME " sata", phy); if (rc) { @@ -3343,7 +3362,7 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba) } for (fatal_no = 0; fatal_no < HISI_SAS_FATAL_INT_NR; fatal_no++) { - irq = irq_map[fatal_no + 81]; + irq = hisi_hba->irq_map[fatal_no + 81]; rc = devm_request_irq(dev, irq, fatal_interrupts[fatal_no], 0, DRV_NAME " fatal", hisi_hba); if (rc) { @@ -3354,24 +3373,22 @@ static int interrupt_init_v2_hw(struct hisi_hba *hisi_hba) } } - for (queue_no = 0; queue_no < hisi_hba->queue_count; queue_no++) { + for (queue_no = 0; queue_no < hisi_hba->cq_nvecs; queue_no++) { struct hisi_sas_cq *cq = &hisi_hba->cq[queue_no]; - cq->irq_no = irq_map[queue_no + 96]; + cq->irq_no = hisi_hba->irq_map[queue_no + 96]; rc = devm_request_threaded_irq(dev, cq->irq_no, cq_interrupt_v2_hw, cq_thread_v2_hw, IRQF_ONESHOT, DRV_NAME " cq", cq); if (rc) { dev_err(dev, "irq init: could not request cq interrupt %d, rc=%d\n", - irq, rc); + cq->irq_no, rc); rc = -ENOENT; goto err_out; } + cq->irq_mask = irq_get_affinity_mask(cq->irq_no); } - - hisi_hba->cq_nvecs = hisi_hba->queue_count; - err_out: return rc; } @@ -3529,6 +3546,26 @@ static struct device_attribute *host_attrs_v2_hw[] = { NULL }; +static int map_queues_v2_hw(struct Scsi_Host *shost) +{ + struct hisi_hba *hisi_hba = shost_priv(shost); + struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT]; + const struct cpumask *mask; + unsigned int queue, cpu; + + for (queue = 0; queue < qmap->nr_queues; queue++) { + mask = irq_get_affinity_mask(hisi_hba->irq_map[96 + queue]); + if (!mask) + continue; + + for_each_cpu(cpu, mask) + qmap->mq_map[cpu] = qmap->queue_offset + queue; + } + + return 0; + +} + static struct scsi_host_template sht_v2_hw = { .name = DRV_NAME, .proc_name = DRV_NAME, @@ -3553,10 +3590,13 @@ static struct scsi_host_template sht_v2_hw = { #endif .shost_attrs = host_attrs_v2_hw, .host_reset = hisi_sas_host_reset, + .map_queues = map_queues_v2_hw, + .host_tagset = 1, }; static const struct hisi_sas_hw hisi_sas_v2_hw = { .hw_init = hisi_sas_v2_init, + .interrupt_preinit = hisi_sas_v2_interrupt_preinit, .setup_itct = setup_itct_v2_hw, .slot_index_alloc = slot_index_alloc_quirk_v2_hw, .alloc_dev = alloc_dev_quirk_v2_hw, -- GitLab From e1dc20995cb9fa04b46e8f37113a7203c906d2bf Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 21 Dec 2020 22:30:55 +0800 Subject: [PATCH 0035/2012] driver core: platform: Add extra error check in devm_platform_get_irqs_affinity() The current check of nvec < minvec for nvec returned from platform_irq_count() will not detect a negative error code in nvec. This is because minvec is unsigned, and, as such, nvec is promoted to unsigned in that check, which will make it a huge number (if it contained -EPROBE_DEFER). In practice, an error should not occur in nvec for the only in-tree user, but add a check anyway. Fixes: e15f2fa959f2 ("driver core: platform: Add devm_platform_get_irqs_affinity()") Reported-by: Dan Carpenter Signed-off-by: John Garry Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1608561055-231244-1-git-send-email-john.garry@huawei.com --- drivers/base/platform.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index ea8add164b896..74c97b65048c9 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -351,6 +351,8 @@ int devm_platform_get_irqs_affinity(struct platform_device *dev, return -ERANGE; nvec = platform_irq_count(dev); + if (nvec < 0) + return nvec; if (nvec < minvec) return -ENOSPC; -- GitLab From 37309f47e2f5674f3e86cb765312ace42cfcedf5 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Wed, 9 Dec 2020 20:52:30 -0800 Subject: [PATCH 0036/2012] HID: wacom: Fix memory leakage caused by kfifo_alloc As reported by syzbot below, kfifo_alloc'd memory would not be freed if a non-zero return value is triggered in wacom_probe. This patch creates and uses devm_kfifo_alloc to allocate and free itself. BUG: memory leak unreferenced object 0xffff88810dc44a00 (size 512): comm "kworker/1:2", pid 3674, jiffies 4294943617 (age 14.100s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000023e1afac>] kmalloc_array include/linux/slab.h:592 [inline] [<0000000023e1afac>] __kfifo_alloc+0xad/0x100 lib/kfifo.c:43 [<00000000c477f737>] wacom_probe+0x1a1/0x3b0 drivers/hid/wacom_sys.c:2727 [<00000000b3109aca>] hid_device_probe+0x16b/0x210 drivers/hid/hid-core.c:2281 [<00000000aff7c640>] really_probe+0x159/0x480 drivers/base/dd.c:554 [<00000000778d0bc3>] driver_probe_device+0x84/0x100 drivers/base/dd.c:738 [<000000005108dbb5>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:844 [<00000000efb7c59e>] bus_for_each_drv+0xb7/0x100 drivers/base/bus.c:431 [<0000000024ab1590>] __device_attach+0x122/0x250 drivers/base/dd.c:912 [<000000004c7ac048>] bus_probe_device+0xc6/0xe0 drivers/base/bus.c:491 [<00000000b93050a3>] device_add+0x5ac/0xc30 drivers/base/core.c:2936 [<00000000e5b46ea5>] hid_add_device+0x151/0x390 drivers/hid/hid-core.c:2437 [<00000000c6add147>] usbhid_probe+0x412/0x560 drivers/hid/usbhid/hid-core.c:1407 [<00000000c33acdb4>] usb_probe_interface+0x177/0x370 drivers/usb/core/driver.c:396 [<00000000aff7c640>] really_probe+0x159/0x480 drivers/base/dd.c:554 [<00000000778d0bc3>] driver_probe_device+0x84/0x100 drivers/base/dd.c:738 [<000000005108dbb5>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:844 https://syzkaller.appspot.com/bug?extid=5b49c9695968d7250a26 Reported-by: syzbot+5b49c9695968d7250a26@syzkaller.appspotmail.com Signed-off-by: Ping Cheng Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 045c464228d91..e8acd235db2a9 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -1270,6 +1270,37 @@ static int wacom_devm_sysfs_create_group(struct wacom *wacom, group); } +static void wacom_devm_kfifo_release(struct device *dev, void *res) +{ + struct kfifo_rec_ptr_2 *devres = res; + + kfifo_free(devres); +} + +static int wacom_devm_kfifo_alloc(struct wacom *wacom) +{ + struct wacom_wac *wacom_wac = &wacom->wacom_wac; + struct kfifo_rec_ptr_2 *pen_fifo = &wacom_wac->pen_fifo; + int error; + + pen_fifo = devres_alloc(wacom_devm_kfifo_release, + sizeof(struct kfifo_rec_ptr_2), + GFP_KERNEL); + + if (!pen_fifo) + return -ENOMEM; + + error = kfifo_alloc(pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL); + if (error) { + devres_free(pen_fifo); + return error; + } + + devres_add(&wacom->hdev->dev, pen_fifo); + + return 0; +} + enum led_brightness wacom_leds_brightness_get(struct wacom_led *led) { struct wacom *wacom = led->wacom; @@ -2724,7 +2755,7 @@ static int wacom_probe(struct hid_device *hdev, if (features->check_for_hid_type && features->hid_type != hdev->type) return -ENODEV; - error = kfifo_alloc(&wacom_wac->pen_fifo, WACOM_PKGLEN_MAX, GFP_KERNEL); + error = wacom_devm_kfifo_alloc(wacom); if (error) return error; @@ -2786,8 +2817,6 @@ static void wacom_remove(struct hid_device *hdev) if (wacom->wacom_wac.features.type != REMOTE) wacom_release_resources(wacom); - - kfifo_free(&wacom_wac->pen_fifo); } #ifdef CONFIG_PM -- GitLab From 2a5f1b67ec577fb1544b563086e0377f095f88e2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 10 Dec 2020 08:30:59 +0000 Subject: [PATCH 0037/2012] KVM: arm64: Don't access PMCR_EL0 when no PMU is available We reset the guest's view of PMCR_EL0 unconditionally, based on the host's view of this register. It is however legal for an implementation not to provide any PMU, resulting in an UNDEF. The obvious fix is to skip the reset of this shadow register when no PMU is available, sidestepping the issue entirely. If no PMU is available, the guest is not able to request a virtual PMU anyway, so not doing nothing is the right thing to do! It is unlikely that this bug can hit any HW implementation though, as they all provide a PMU. It has been found using nested virt with the host KVM not implementing the PMU itself. Fixes: ab9468340d2bc ("arm64: KVM: Add access handler for PMCR register") Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201210083059.1277162-1-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3313dedfa5053..d46e7f706cb06 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -594,6 +594,10 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 pmcr, val; + /* No PMU available, PMCR_EL0 may UNDEF... */ + if (!kvm_arm_support_pmu_v3()) + return; + pmcr = read_sysreg(pmcr_el0); /* * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN -- GitLab From ff367fe473a9857160c17827931375a899076394 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:47 +0000 Subject: [PATCH 0038/2012] KVM: arm64: Prevent use of invalid PSCI v0.1 function IDs PSCI driver exposes a struct containing the PSCI v0.1 function IDs configured in the DT. However, the struct does not convey the information whether these were set from DT or contain the default value zero. This could be a problem for PSCI proxy in KVM protected mode. Extend config passed to KVM with a bit mask with individual bits set depending on whether the corresponding function pointer in psci_ops is set, eg. set bit for PSCI_CPU_SUSPEND if psci_ops.cpu_suspend != NULL. Previously config was split into multiple global variables. Put everything into a single struct for convenience. Reported-by: Mark Rutland Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-2-dbrazdil@google.com --- arch/arm64/include/asm/kvm_host.h | 20 +++++++++++ arch/arm64/kvm/arm.c | 14 +++++--- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 53 +++++++++++++++++++++------- 3 files changed, 70 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 11beda85ee7e5..828d50d40dc2f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -240,6 +241,25 @@ struct kvm_host_data { struct kvm_pmu_events pmu_events; }; +#define KVM_HOST_PSCI_0_1_CPU_SUSPEND BIT(0) +#define KVM_HOST_PSCI_0_1_CPU_ON BIT(1) +#define KVM_HOST_PSCI_0_1_CPU_OFF BIT(2) +#define KVM_HOST_PSCI_0_1_MIGRATE BIT(3) + +struct kvm_host_psci_config { + /* PSCI version used by host. */ + u32 version; + + /* Function IDs used by host if version is v0.1. */ + struct psci_0_1_function_ids function_ids_0_1; + + /* Bitmask of functions enabled for v0.1, bits KVM_HOST_PSCI_0_1_*. */ + unsigned int enabled_functions_0_1; +}; + +extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config); +#define kvm_host_psci_config CHOOSE_NVHE_SYM(kvm_host_psci_config) + struct vcpu_reset_state { unsigned long pc; unsigned long r0; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 6e637d2b4cfb7..6a2f4e01b04f3 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -66,8 +66,6 @@ static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS]; -extern u32 kvm_nvhe_sym(kvm_host_psci_version); -extern struct psci_0_1_function_ids kvm_nvhe_sym(kvm_host_psci_0_1_function_ids); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { @@ -1618,8 +1616,16 @@ static bool init_psci_relay(void) return false; } - kvm_nvhe_sym(kvm_host_psci_version) = psci_ops.get_version(); - kvm_nvhe_sym(kvm_host_psci_0_1_function_ids) = get_psci_0_1_function_ids(); + kvm_host_psci_config.version = psci_ops.get_version(); + + if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) { + kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids(); + kvm_host_psci_config.enabled_functions_0_1 = + (psci_ops.cpu_suspend ? KVM_HOST_PSCI_0_1_CPU_SUSPEND : 0) | + (psci_ops.cpu_off ? KVM_HOST_PSCI_0_1_CPU_OFF : 0) | + (psci_ops.cpu_on ? KVM_HOST_PSCI_0_1_CPU_ON : 0) | + (psci_ops.migrate ? KVM_HOST_PSCI_0_1_MIGRATE : 0); + } return true; } diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 08dc9de693147..0d6f4aa396217 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -22,9 +22,8 @@ void kvm_hyp_cpu_resume(unsigned long r0); void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); /* Config options set by the host. */ -__ro_after_init u32 kvm_host_psci_version; -__ro_after_init struct psci_0_1_function_ids kvm_host_psci_0_1_function_ids; -__ro_after_init s64 hyp_physvirt_offset; +struct kvm_host_psci_config __ro_after_init kvm_host_psci_config; +s64 __ro_after_init hyp_physvirt_offset; #define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset) @@ -54,12 +53,41 @@ static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt) return func_id; } +static inline bool is_psci_0_1_function_enabled(unsigned int fn_bit) +{ + return kvm_host_psci_config.enabled_functions_0_1 & fn_bit; +} + +static inline bool is_psci_0_1_cpu_suspend(u64 func_id) +{ + return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_SUSPEND) && + (func_id == kvm_host_psci_config.function_ids_0_1.cpu_suspend); +} + +static inline bool is_psci_0_1_cpu_on(u64 func_id) +{ + return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_ON) && + (func_id == kvm_host_psci_config.function_ids_0_1.cpu_on); +} + +static inline bool is_psci_0_1_cpu_off(u64 func_id) +{ + return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_OFF) && + (func_id == kvm_host_psci_config.function_ids_0_1.cpu_off); +} + +static inline bool is_psci_0_1_migrate(u64 func_id) +{ + return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_MIGRATE) && + (func_id == kvm_host_psci_config.function_ids_0_1.migrate); +} + static bool is_psci_0_1_call(u64 func_id) { - return (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) || - (func_id == kvm_host_psci_0_1_function_ids.cpu_on) || - (func_id == kvm_host_psci_0_1_function_ids.cpu_off) || - (func_id == kvm_host_psci_0_1_function_ids.migrate); + return is_psci_0_1_cpu_suspend(func_id) || + is_psci_0_1_cpu_on(func_id) || + is_psci_0_1_cpu_off(func_id) || + is_psci_0_1_migrate(func_id); } static bool is_psci_0_2_call(u64 func_id) @@ -71,7 +99,7 @@ static bool is_psci_0_2_call(u64 func_id) static bool is_psci_call(u64 func_id) { - switch (kvm_host_psci_version) { + switch (kvm_host_psci_config.version) { case PSCI_VERSION(0, 1): return is_psci_0_1_call(func_id); default: @@ -248,12 +276,11 @@ asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on) static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { - if ((func_id == kvm_host_psci_0_1_function_ids.cpu_off) || - (func_id == kvm_host_psci_0_1_function_ids.migrate)) + if (is_psci_0_1_cpu_off(func_id) || is_psci_0_1_migrate(func_id)) return psci_forward(host_ctxt); - else if (func_id == kvm_host_psci_0_1_function_ids.cpu_on) + else if (is_psci_0_1_cpu_on(func_id)) return psci_cpu_on(func_id, host_ctxt); - else if (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) + else if (is_psci_0_1_cpu_suspend(func_id)) return psci_cpu_suspend(func_id, host_ctxt); else return PSCI_RET_NOT_SUPPORTED; @@ -304,7 +331,7 @@ bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt) if (!is_psci_call(func_id)) return false; - switch (kvm_host_psci_version) { + switch (kvm_host_psci_config.version) { case PSCI_VERSION(0, 1): ret = psci_0_1_handler(func_id, host_ctxt); break; -- GitLab From 7a96a0687b80a1870c689418d7b72012c8bdd53d Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:48 +0000 Subject: [PATCH 0039/2012] KVM: arm64: Use lm_alias in nVHE-only VA conversion init_hyp_physvirt_offset() computes PA from a kernel VA. Conversion to kernel linear-map is required first but the code used kvm_ksym_ref() for this purpose. Under VHE that is a NOP and resulted in a runtime warning. Replace kvm_ksym_ref with lm_alias. Reported-by: Qian Cai Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-3-dbrazdil@google.com --- arch/arm64/kvm/va_layout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index d8cc51bd60bf2..914732b88c69f 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -42,7 +42,7 @@ static void init_hyp_physvirt_offset(void) u64 kern_va, hyp_va; /* Compute the offset from the hyp VA and PA of a random symbol. */ - kern_va = (u64)kvm_ksym_ref(__hyp_text_start); + kern_va = (u64)lm_alias(__hyp_text_start); hyp_va = __early_kern_hyp_va(kern_va); CHOOSE_NVHE_SYM(hyp_physvirt_offset) = (s64)__pa(kern_va) - (s64)hyp_va; } -- GitLab From c3e181aec96f6ada84df1cb72a72be8970f8b284 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:49 +0000 Subject: [PATCH 0040/2012] KVM: arm64: Skip computing hyp VA layout for VHE Computing the hyp VA layout is redundant when the kernel runs in EL2 and hyp shares its VA mappings. Make calling kvm_compute_layout() conditional on not just CONFIG_KVM but also !is_kernel_in_hyp_mode(). Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-4-dbrazdil@google.com --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 18e9727d3f645..4e585cc892e82 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -434,7 +434,7 @@ static void __init hyp_mode_check(void) "CPU: CPUs started in inconsistent modes"); else pr_info("CPU: All CPU(s) started at EL1\n"); - if (IS_ENABLED(CONFIG_KVM)) + if (IS_ENABLED(CONFIG_KVM) && !is_kernel_in_hyp_mode()) kvm_compute_layout(); } -- GitLab From 61fe0c37af57ac35472a870581a7d0bb5ac2f63a Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:50 +0000 Subject: [PATCH 0041/2012] KVM: arm64: Minor cleanup of hyp variables used in host Small cleanup moving declarations of hyp-exported variables to kvm_host.h and using macros to avoid having to refer to them with kvm_nvhe_sym() in host. No functional change intended. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-5-dbrazdil@google.com --- arch/arm64/include/asm/kvm_host.h | 6 ++++++ arch/arm64/kvm/arm.c | 4 +--- arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 6 +++--- arch/arm64/kvm/va_layout.c | 5 ++--- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 828d50d40dc2f..bce2452b305c0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -260,6 +260,12 @@ struct kvm_host_psci_config { extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config); #define kvm_host_psci_config CHOOSE_NVHE_SYM(kvm_host_psci_config) +extern s64 kvm_nvhe_sym(hyp_physvirt_offset); +#define hyp_physvirt_offset CHOOSE_NVHE_SYM(hyp_physvirt_offset) + +extern u64 kvm_nvhe_sym(hyp_cpu_logical_map)[NR_CPUS]; +#define hyp_cpu_logical_map CHOOSE_NVHE_SYM(hyp_cpu_logical_map) + struct vcpu_reset_state { unsigned long pc; unsigned long r0; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 6a2f4e01b04f3..836ca763b91d7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -65,8 +65,6 @@ static bool vgic_present; static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); -extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS]; - int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; @@ -1602,7 +1600,7 @@ static void init_cpu_logical_map(void) * allow any other CPUs from the `possible` set to boot. */ for_each_online_cpu(cpu) - kvm_nvhe_sym(__cpu_logical_map)[cpu] = cpu_logical_map(cpu); + hyp_cpu_logical_map[cpu] = cpu_logical_map(cpu); } static bool init_psci_relay(void) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c index cbab0c6246e20..2997aa156d8e5 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -14,14 +14,14 @@ * Other CPUs should not be allowed to boot because their features were * not checked against the finalized system capabilities. */ -u64 __ro_after_init __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; +u64 __ro_after_init hyp_cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; u64 cpu_logical_map(unsigned int cpu) { - if (cpu >= ARRAY_SIZE(__cpu_logical_map)) + if (cpu >= ARRAY_SIZE(hyp_cpu_logical_map)) hyp_panic(); - return __cpu_logical_map[cpu]; + return hyp_cpu_logical_map[cpu]; } unsigned long __hyp_per_cpu_offset(unsigned int cpu) diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index 914732b88c69f..70fcd6a12fe1f 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -34,17 +34,16 @@ static u64 __early_kern_hyp_va(u64 addr) } /* - * Store a hyp VA <-> PA offset into a hyp-owned variable. + * Store a hyp VA <-> PA offset into a EL2-owned variable. */ static void init_hyp_physvirt_offset(void) { - extern s64 kvm_nvhe_sym(hyp_physvirt_offset); u64 kern_va, hyp_va; /* Compute the offset from the hyp VA and PA of a random symbol. */ kern_va = (u64)lm_alias(__hyp_text_start); hyp_va = __early_kern_hyp_va(kern_va); - CHOOSE_NVHE_SYM(hyp_physvirt_offset) = (s64)__pa(kern_va) - (s64)hyp_va; + hyp_physvirt_offset = (s64)__pa(kern_va) - (s64)hyp_va; } /* -- GitLab From e6829e0384a49efe68537298132230bebd8bd1b3 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:51 +0000 Subject: [PATCH 0042/2012] KVM: arm64: Remove unused includes in psci-relay.c Minor cleanup removing unused includes. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-6-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 0d6f4aa396217..1f7237e451482 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -7,11 +7,8 @@ #include #include #include -#include #include #include -#include -#include #include #include -- GitLab From 860a4c3d1e04a3c3e62bacbbba64417bf49768e2 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 8 Dec 2020 14:24:52 +0000 Subject: [PATCH 0043/2012] KVM: arm64: Move skip_host_instruction to adjust_pc.h Move function for skipping host instruction in the host trap handler to a header file containing analogical helpers for guests. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201208142452.87237-7-dbrazdil@google.com --- arch/arm64/kvm/hyp/include/hyp/adjust_pc.h | 9 +++++++++ arch/arm64/kvm/hyp/nvhe/hyp-main.c | 12 ++---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h index b1f60923a8feb..61716359035d6 100644 --- a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h +++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h @@ -59,4 +59,13 @@ static inline void __adjust_pc(struct kvm_vcpu *vcpu) } } +/* + * Skip an instruction while host sysregs are live. + * Assumes host is always 64-bit. + */ +static inline void kvm_skip_host_instr(void) +{ + write_sysreg_el2(read_sysreg_el2(SYS_ELR) + 4, SYS_ELR); +} + #endif diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index bde658d51404b..a906f9e2ff34f 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -157,11 +157,6 @@ static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt) __kvm_hyp_host_forward_smc(host_ctxt); } -static void skip_host_instruction(void) -{ - write_sysreg_el2(read_sysreg_el2(SYS_ELR) + 4, SYS_ELR); -} - static void handle_host_smc(struct kvm_cpu_context *host_ctxt) { bool handled; @@ -170,11 +165,8 @@ static void handle_host_smc(struct kvm_cpu_context *host_ctxt) if (!handled) default_host_smc_handler(host_ctxt); - /* - * Unlike HVC, the return address of an SMC is the instruction's PC. - * Move the return address past the instruction. - */ - skip_host_instruction(); + /* SMC was trapped, move ELR past the current PC. */ + kvm_skip_host_instr(); } void handle_trap(struct kvm_cpu_context *host_ctxt) -- GitLab From 767c973f2e4a9264a4f159c9fad5ca8acdb9915e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 22 Dec 2020 12:46:41 +0000 Subject: [PATCH 0044/2012] KVM: arm64: Declutter host PSCI 0.1 handling Although there is nothing wrong with the current host PSCI relay implementation, we can clean it up and remove some of the helpers that do not improve the overall readability of the legacy PSCI 0.1 handling. Opportunity is taken to turn the bitmap into a set of booleans, and creative use of preprocessor macros make init and check more concise/readable. Suggested-by: Mark Rutland Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 11 ++-- arch/arm64/kvm/arm.c | 12 +++-- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 77 +++++++--------------------- 3 files changed, 30 insertions(+), 70 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index bce2452b305c0..8fcfab0c25672 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -241,11 +241,6 @@ struct kvm_host_data { struct kvm_pmu_events pmu_events; }; -#define KVM_HOST_PSCI_0_1_CPU_SUSPEND BIT(0) -#define KVM_HOST_PSCI_0_1_CPU_ON BIT(1) -#define KVM_HOST_PSCI_0_1_CPU_OFF BIT(2) -#define KVM_HOST_PSCI_0_1_MIGRATE BIT(3) - struct kvm_host_psci_config { /* PSCI version used by host. */ u32 version; @@ -253,8 +248,10 @@ struct kvm_host_psci_config { /* Function IDs used by host if version is v0.1. */ struct psci_0_1_function_ids function_ids_0_1; - /* Bitmask of functions enabled for v0.1, bits KVM_HOST_PSCI_0_1_*. */ - unsigned int enabled_functions_0_1; + bool psci_0_1_cpu_suspend_implemented; + bool psci_0_1_cpu_on_implemented; + bool psci_0_1_cpu_off_implemented; + bool psci_0_1_migrate_implemented; }; extern struct kvm_host_psci_config kvm_nvhe_sym(kvm_host_psci_config); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 836ca763b91d7..e207e4541f552 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1603,6 +1603,9 @@ static void init_cpu_logical_map(void) hyp_cpu_logical_map[cpu] = cpu_logical_map(cpu); } +#define init_psci_0_1_impl_state(config, what) \ + config.psci_0_1_ ## what ## _implemented = psci_ops.what + static bool init_psci_relay(void) { /* @@ -1618,11 +1621,10 @@ static bool init_psci_relay(void) if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) { kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids(); - kvm_host_psci_config.enabled_functions_0_1 = - (psci_ops.cpu_suspend ? KVM_HOST_PSCI_0_1_CPU_SUSPEND : 0) | - (psci_ops.cpu_off ? KVM_HOST_PSCI_0_1_CPU_OFF : 0) | - (psci_ops.cpu_on ? KVM_HOST_PSCI_0_1_CPU_ON : 0) | - (psci_ops.migrate ? KVM_HOST_PSCI_0_1_MIGRATE : 0); + init_psci_0_1_impl_state(kvm_host_psci_config, cpu_suspend); + init_psci_0_1_impl_state(kvm_host_psci_config, cpu_on); + init_psci_0_1_impl_state(kvm_host_psci_config, cpu_off); + init_psci_0_1_impl_state(kvm_host_psci_config, migrate); } return true; } diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 1f7237e451482..e3947846ffcb9 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -43,48 +43,16 @@ struct psci_boot_args { static DEFINE_PER_CPU(struct psci_boot_args, cpu_on_args) = PSCI_BOOT_ARGS_INIT; static DEFINE_PER_CPU(struct psci_boot_args, suspend_args) = PSCI_BOOT_ARGS_INIT; -static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt) -{ - DECLARE_REG(u64, func_id, host_ctxt, 0); - - return func_id; -} - -static inline bool is_psci_0_1_function_enabled(unsigned int fn_bit) -{ - return kvm_host_psci_config.enabled_functions_0_1 & fn_bit; -} - -static inline bool is_psci_0_1_cpu_suspend(u64 func_id) -{ - return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_SUSPEND) && - (func_id == kvm_host_psci_config.function_ids_0_1.cpu_suspend); -} - -static inline bool is_psci_0_1_cpu_on(u64 func_id) -{ - return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_ON) && - (func_id == kvm_host_psci_config.function_ids_0_1.cpu_on); -} - -static inline bool is_psci_0_1_cpu_off(u64 func_id) -{ - return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_CPU_OFF) && - (func_id == kvm_host_psci_config.function_ids_0_1.cpu_off); -} - -static inline bool is_psci_0_1_migrate(u64 func_id) -{ - return is_psci_0_1_function_enabled(KVM_HOST_PSCI_0_1_MIGRATE) && - (func_id == kvm_host_psci_config.function_ids_0_1.migrate); -} +#define is_psci_0_1(what, func_id) \ + (kvm_host_psci_config.psci_0_1_ ## what ## _implemented && \ + (func_id) == kvm_host_psci_config.function_ids_0_1.what) static bool is_psci_0_1_call(u64 func_id) { - return is_psci_0_1_cpu_suspend(func_id) || - is_psci_0_1_cpu_on(func_id) || - is_psci_0_1_cpu_off(func_id) || - is_psci_0_1_migrate(func_id); + return (is_psci_0_1(cpu_suspend, func_id) || + is_psci_0_1(cpu_on, func_id) || + is_psci_0_1(cpu_off, func_id) || + is_psci_0_1(migrate, func_id)); } static bool is_psci_0_2_call(u64 func_id) @@ -94,16 +62,6 @@ static bool is_psci_0_2_call(u64 func_id) (PSCI_0_2_FN64(0) <= func_id && func_id <= PSCI_0_2_FN64(31)); } -static bool is_psci_call(u64 func_id) -{ - switch (kvm_host_psci_config.version) { - case PSCI_VERSION(0, 1): - return is_psci_0_1_call(func_id); - default: - return is_psci_0_2_call(func_id); - } -} - static unsigned long psci_call(unsigned long fn, unsigned long arg0, unsigned long arg1, unsigned long arg2) { @@ -273,14 +231,14 @@ asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on) static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { - if (is_psci_0_1_cpu_off(func_id) || is_psci_0_1_migrate(func_id)) + if (is_psci_0_1(cpu_off, func_id) || is_psci_0_1(migrate, func_id)) return psci_forward(host_ctxt); - else if (is_psci_0_1_cpu_on(func_id)) + if (is_psci_0_1(cpu_on, func_id)) return psci_cpu_on(func_id, host_ctxt); - else if (is_psci_0_1_cpu_suspend(func_id)) + if (is_psci_0_1(cpu_suspend, func_id)) return psci_cpu_suspend(func_id, host_ctxt); - else - return PSCI_RET_NOT_SUPPORTED; + + return PSCI_RET_NOT_SUPPORTED; } static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) @@ -322,20 +280,23 @@ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt) { - u64 func_id = get_psci_func_id(host_ctxt); + DECLARE_REG(u64, func_id, host_ctxt, 0); unsigned long ret; - if (!is_psci_call(func_id)) - return false; - switch (kvm_host_psci_config.version) { case PSCI_VERSION(0, 1): + if (!is_psci_0_1_call(func_id)) + return false; ret = psci_0_1_handler(func_id, host_ctxt); break; case PSCI_VERSION(0, 2): + if (!is_psci_0_2_call(func_id)) + return false; ret = psci_0_2_handler(func_id, host_ctxt); break; default: + if (!is_psci_0_2_call(func_id)) + return false; ret = psci_1_0_handler(func_id, host_ctxt); break; } -- GitLab From e789ca0cc1d51296832b8424fa4008ce6e9d1703 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:37 +0100 Subject: [PATCH 0045/2012] ext4: combine ext4_handle_error() and save_error_info() save_error_info() is always called together with ext4_handle_error(). Combine them into a single call and move unconditional bits out of save_error_info() into ext4_handle_error(). Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-2-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4bbfb05aae581..cdf2a377d884f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -592,9 +592,6 @@ static void __save_error_info(struct super_block *sb, int error, { struct ext4_sb_info *sbi = EXT4_SB(sb); - EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - if (bdev_read_only(sb->s_bdev)) - return; /* We default to EFSCORRUPTED error... */ if (error == 0) error = EFSCORRUPTED; @@ -647,13 +644,19 @@ static void save_error_info(struct super_block *sb, int error, * used to deal with unrecoverable failures such as journal IO errors or ENOMEM * at a critical moment in log management. */ -static void ext4_handle_error(struct super_block *sb, bool force_ro) +static void ext4_handle_error(struct super_block *sb, bool force_ro, int error, + __u32 ino, __u64 block, + const char *func, unsigned int line) { journal_t *journal = EXT4_SB(sb)->s_journal; + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); + if (!bdev_read_only(sb->s_bdev)) + save_error_info(sb, error, ino, block, func, line); + if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT))) return; @@ -710,8 +713,7 @@ void __ext4_error(struct super_block *sb, const char *function, sb->s_id, function, line, current->comm, &vaf); va_end(args); } - save_error_info(sb, error, 0, block, function, line); - ext4_handle_error(sb, force_ro); + ext4_handle_error(sb, force_ro, error, 0, block, function, line); } void __ext4_error_inode(struct inode *inode, const char *function, @@ -741,9 +743,8 @@ void __ext4_error_inode(struct inode *inode, const char *function, current->comm, &vaf); va_end(args); } - save_error_info(inode->i_sb, error, inode->i_ino, block, - function, line); - ext4_handle_error(inode->i_sb, false); + ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block, + function, line); } void __ext4_error_file(struct file *file, const char *function, @@ -780,9 +781,8 @@ void __ext4_error_file(struct file *file, const char *function, current->comm, path, &vaf); va_end(args); } - save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block, - function, line); - ext4_handle_error(inode->i_sb, false); + ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block, + function, line); } const char *ext4_decode_error(struct super_block *sb, int errno, @@ -849,8 +849,7 @@ void __ext4_std_error(struct super_block *sb, const char *function, sb->s_id, function, line, errstr); } - save_error_info(sb, -errno, 0, 0, function, line); - ext4_handle_error(sb, false); + ext4_handle_error(sb, false, -errno, 0, 0, function, line); } void __ext4_msg(struct super_block *sb, @@ -944,13 +943,14 @@ __acquires(bitlock) if (test_opt(sb, ERRORS_CONT)) { if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; __save_error_info(sb, EFSCORRUPTED, ino, block, function, line); - schedule_work(&EXT4_SB(sb)->s_error_work); + if (!bdev_read_only(sb->s_bdev)) + schedule_work(&EXT4_SB(sb)->s_error_work); return; } ext4_unlock_group(sb, grp); - save_error_info(sb, EFSCORRUPTED, ino, block, function, line); - ext4_handle_error(sb, false); + ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line); /* * We only get here in the ERRORS_RO case; relocking the group * may be dangerous, but nothing bad will happen since the -- GitLab From 4392fbc4bab57db3760f0fb61258cb7089b37665 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:38 +0100 Subject: [PATCH 0046/2012] ext4: drop sync argument of ext4_commit_super() Everybody passes 1 as sync argument of ext4_commit_super(). Just drop it. Reviewed-by: Harshad Shirwadkar Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-3-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cdf2a377d884f..8bf31003416a9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -65,7 +65,7 @@ static struct ratelimit_state ext4_mount_msg_ratelimit; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); static int ext4_show_options(struct seq_file *seq, struct dentry *root); -static int ext4_commit_super(struct super_block *sb, int sync); +static int ext4_commit_super(struct super_block *sb); static int ext4_mark_recovery_complete(struct super_block *sb, struct ext4_super_block *es); static int ext4_clear_journal_err(struct super_block *sb, @@ -621,7 +621,7 @@ static void save_error_info(struct super_block *sb, int error, { __save_error_info(sb, error, ino, block, func, line); if (!bdev_read_only(sb->s_bdev)) - ext4_commit_super(sb, 1); + ext4_commit_super(sb); } /* Deal with the reporting of failure conditions on a filesystem such as @@ -686,7 +686,7 @@ static void flush_stashed_error_work(struct work_struct *work) struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info, s_error_work); - ext4_commit_super(sbi->s_sb, 1); + ext4_commit_super(sbi->s_sb); } #define ext4_error_ratelimit(sb) \ @@ -1152,7 +1152,7 @@ static void ext4_put_super(struct super_block *sb) es->s_state = cpu_to_le16(sbi->s_mount_state); } if (!sb_rdonly(sb)) - ext4_commit_super(sb, 1); + ext4_commit_super(sb); rcu_read_lock(); group_desc = rcu_dereference(sbi->s_group_desc); @@ -2642,7 +2642,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, if (sbi->s_journal) ext4_set_feature_journal_needs_recovery(sb); - err = ext4_commit_super(sb, 1); + err = ext4_commit_super(sb); done: if (test_opt(sb, DEBUG)) printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " @@ -4869,7 +4869,7 @@ no_journal: if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) && !ext4_has_feature_encrypt(sb)) { ext4_set_feature_encrypt(sb); - ext4_commit_super(sb, 1); + ext4_commit_super(sb); } /* @@ -5424,7 +5424,7 @@ static int ext4_load_journal(struct super_block *sb, es->s_journal_dev = cpu_to_le32(journal_devnum); /* Make sure we flush the recovery flag to disk. */ - ext4_commit_super(sb, 1); + ext4_commit_super(sb); } return 0; @@ -5434,7 +5434,7 @@ err_out: return err; } -static int ext4_commit_super(struct super_block *sb, int sync) +static int ext4_commit_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = EXT4_SB(sb)->s_es; @@ -5515,8 +5515,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); - if (sync) - lock_buffer(sbh); + lock_buffer(sbh); if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { /* * Oh, dear. A previous attempt to write the @@ -5532,16 +5531,14 @@ static int ext4_commit_super(struct super_block *sb, int sync) set_buffer_uptodate(sbh); } mark_buffer_dirty(sbh); - if (sync) { - unlock_buffer(sbh); - error = __sync_dirty_buffer(sbh, - REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0)); - if (buffer_write_io_error(sbh)) { - ext4_msg(sb, KERN_ERR, "I/O error while writing " - "superblock"); - clear_buffer_write_io_error(sbh); - set_buffer_uptodate(sbh); - } + unlock_buffer(sbh); + error = __sync_dirty_buffer(sbh, + REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0)); + if (buffer_write_io_error(sbh)) { + ext4_msg(sb, KERN_ERR, "I/O error while writing " + "superblock"); + clear_buffer_write_io_error(sbh); + set_buffer_uptodate(sbh); } return error; } @@ -5572,7 +5569,7 @@ static int ext4_mark_recovery_complete(struct super_block *sb, if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) { ext4_clear_feature_journal_needs_recovery(sb); - ext4_commit_super(sb, 1); + ext4_commit_super(sb); } out: jbd2_journal_unlock_updates(journal); @@ -5614,7 +5611,7 @@ static int ext4_clear_journal_err(struct super_block *sb, EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb, 1); + ext4_commit_super(sb); jbd2_journal_clear_err(journal); jbd2_journal_update_sb_errno(journal); @@ -5716,7 +5713,7 @@ static int ext4_freeze(struct super_block *sb) ext4_clear_feature_journal_needs_recovery(sb); } - error = ext4_commit_super(sb, 1); + error = ext4_commit_super(sb); out: if (journal) /* we rely on upper layer to stop further updates */ @@ -5738,7 +5735,7 @@ static int ext4_unfreeze(struct super_block *sb) ext4_set_feature_journal_needs_recovery(sb); } - ext4_commit_super(sb, 1); + ext4_commit_super(sb); return 0; } @@ -5998,7 +5995,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) { - err = ext4_commit_super(sb, 1); + err = ext4_commit_super(sb); if (err) goto restore_opts; } -- GitLab From 05c2c00f3769abb9e323fcaca70d2de0b48af7ba Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:39 +0100 Subject: [PATCH 0047/2012] ext4: protect superblock modifications with a buffer lock Protect all superblock modifications (including checksum computation) with a superblock buffer lock. That way we are sure computed checksum matches current superblock contents (a mismatch could cause checksum failures in nojournal mode or if an unjournalled superblock update races with a journalled one). Also we avoid modifying superblock contents while it is being written out (which can cause DIF/DIX failures if we are running in nojournal mode). Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-4-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/ext4_jbd2.c | 1 - fs/ext4/file.c | 3 +++ fs/ext4/inode.c | 3 +++ fs/ext4/namei.c | 6 ++++++ fs/ext4/resize.c | 12 ++++++++++++ fs/ext4/super.c | 2 +- fs/ext4/xattr.c | 3 +++ 7 files changed, 28 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 1a0a827a7f345..c7e410c4ab7d7 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -379,7 +379,6 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, struct buffer_head *bh = EXT4_SB(sb)->s_sbh; int err = 0; - ext4_superblock_csum_set(sb); if (ext4_handle_valid(handle)) { err = jbd2_journal_dirty_metadata(handle, bh); if (err) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 3ed8c048fb12c..26907d5835d00 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -809,8 +809,11 @@ static int ext4_sample_last_mounted(struct super_block *sb, err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) goto out_journal; + lock_buffer(sbi->s_sbh); strlcpy(sbi->s_es->s_last_mounted, cp, sizeof(sbi->s_es->s_last_mounted)); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); ext4_handle_dirty_super(handle, sb); out_journal: ext4_journal_stop(handle); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 27946882d4ce4..27de6f0a33c8a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5150,7 +5150,10 @@ static int ext4_do_update_inode(handle_t *handle, err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); if (err) goto out_brelse; + lock_buffer(EXT4_SB(sb)->s_sbh); ext4_set_feature_large_file(sb); + ext4_superblock_csum_set(sb); + unlock_buffer(EXT4_SB(sb)->s_sbh); ext4_handle_sync(handle); err = ext4_handle_dirty_super(handle, sb); } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 7a890ff214f1a..40970a509dccc 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2978,7 +2978,10 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) (le32_to_cpu(sbi->s_es->s_inodes_count))) { /* Insert this inode at the head of the on-disk orphan list */ NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan); + lock_buffer(sbi->s_sbh); sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); dirty = true; } list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan); @@ -3061,7 +3064,10 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) mutex_unlock(&sbi->s_orphan_lock); goto out_brelse; } + lock_buffer(sbi->s_sbh); sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); + ext4_superblock_csum_set(inode->i_sb); + unlock_buffer(sbi->s_sbh); mutex_unlock(&sbi->s_orphan_lock); err = ext4_handle_dirty_super(handle, inode->i_sb); } else { diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 928700d57eb67..6155f2b9538ca 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -899,7 +899,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, EXT4_SB(sb)->s_gdb_count++; ext4_kvfree_array_rcu(o_group_desc); + lock_buffer(EXT4_SB(sb)->s_sbh); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); + ext4_superblock_csum_set(sb); + unlock_buffer(EXT4_SB(sb)->s_sbh); err = ext4_handle_dirty_super(handle, sb); if (err) ext4_std_error(sb, err); @@ -1384,6 +1387,7 @@ static void ext4_update_super(struct super_block *sb, reserved_blocks *= blocks_count; do_div(reserved_blocks, 100); + lock_buffer(sbi->s_sbh); ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks); le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * @@ -1421,6 +1425,8 @@ static void ext4_update_super(struct super_block *sb, * active. */ ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + reserved_blocks); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); /* Update the free space counts */ percpu_counter_add(&sbi->s_freeclusters_counter, @@ -1717,8 +1723,11 @@ static int ext4_group_extend_no_check(struct super_block *sb, goto errout; } + lock_buffer(EXT4_SB(sb)->s_sbh); ext4_blocks_count_set(es, o_blocks_count + add); ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add); + ext4_superblock_csum_set(sb); + unlock_buffer(EXT4_SB(sb)->s_sbh); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ @@ -1874,10 +1883,13 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) if (err) goto errout; + lock_buffer(sbi->s_sbh); ext4_clear_feature_resize_inode(sb); ext4_set_feature_meta_bg(sb); sbi->s_es->s_first_meta_bg = cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count)); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); err = ext4_handle_dirty_super(handle, sb); if (err) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8bf31003416a9..2d653ad142005 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5444,6 +5444,7 @@ static int ext4_commit_super(struct super_block *sb) if (!sbh || block_device_ejected(sb)) return error; + lock_buffer(sbh); /* * If the file system is mounted read-only, don't update the * superblock write time. This avoids updating the superblock @@ -5515,7 +5516,6 @@ static int ext4_commit_super(struct super_block *sb) BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); - lock_buffer(sbh); if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { /* * Oh, dear. A previous attempt to write the diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 4e3b1f8c2e81e..1db7ca778d69e 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -792,7 +792,10 @@ static void ext4_xattr_update_super_block(handle_t *handle, BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { + lock_buffer(EXT4_SB(sb)->s_sbh); ext4_set_feature_xattr(sb); + ext4_superblock_csum_set(sb); + unlock_buffer(EXT4_SB(sb)->s_sbh); ext4_handle_dirty_super(handle, sb); } } -- GitLab From 2d01ddc86606564fb08c56e3bc93a0693895f710 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:40 +0100 Subject: [PATCH 0048/2012] ext4: save error info to sb through journal if available If journalling is still working at the moment we get to writing error information to the superblock we cannot write directly to the superblock as such write could race with journalled update of the superblock and cause journal checksum failures, writing inconsistent information to the journal or other problems. We cannot journal the superblock directly from the error handling functions as we are running in uncertain context and could deadlock so just punt journalled superblock update to a workqueue. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-5-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 101 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 75 insertions(+), 26 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2d653ad142005..9a256c558e044 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -65,6 +65,7 @@ static struct ratelimit_state ext4_mount_msg_ratelimit; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); static int ext4_show_options(struct seq_file *seq, struct dentry *root); +static void ext4_update_super(struct super_block *sb); static int ext4_commit_super(struct super_block *sb); static int ext4_mark_recovery_complete(struct super_block *sb, struct ext4_super_block *es); @@ -586,9 +587,9 @@ static int ext4_errno_to_code(int errno) return EXT4_ERR_UNKNOWN; } -static void __save_error_info(struct super_block *sb, int error, - __u32 ino, __u64 block, - const char *func, unsigned int line) +static void save_error_info(struct super_block *sb, int error, + __u32 ino, __u64 block, + const char *func, unsigned int line) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -615,15 +616,6 @@ static void __save_error_info(struct super_block *sb, int error, spin_unlock(&sbi->s_error_lock); } -static void save_error_info(struct super_block *sb, int error, - __u32 ino, __u64 block, - const char *func, unsigned int line) -{ - __save_error_info(sb, error, ino, block, func, line); - if (!bdev_read_only(sb->s_bdev)) - ext4_commit_super(sb); -} - /* Deal with the reporting of failure conditions on a filesystem such as * inconsistencies detected or read IO failures. * @@ -649,20 +641,35 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error, const char *func, unsigned int line) { journal_t *journal = EXT4_SB(sb)->s_journal; + bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); - if (!bdev_read_only(sb->s_bdev)) + if (!continue_fs && !sb_rdonly(sb)) { + ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); + if (journal) + jbd2_journal_abort(journal, -EIO); + } + + if (!bdev_read_only(sb->s_bdev)) { save_error_info(sb, error, ino, block, func, line); + /* + * In case the fs should keep running, we need to writeout + * superblock through the journal. Due to lock ordering + * constraints, it may not be safe to do it right here so we + * defer superblock flushing to a workqueue. + */ + if (continue_fs) + schedule_work(&EXT4_SB(sb)->s_error_work); + else + ext4_commit_super(sb); + } - if (sb_rdonly(sb) || (!force_ro && test_opt(sb, ERRORS_CONT))) + if (sb_rdonly(sb) || continue_fs) return; - ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); - if (journal) - jbd2_journal_abort(journal, -EIO); /* * We force ERRORS_RO behavior when system is rebooting. Otherwise we * could panic during 'reboot -f' as the underlying device got already @@ -685,7 +692,38 @@ static void flush_stashed_error_work(struct work_struct *work) { struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info, s_error_work); + journal_t *journal = sbi->s_journal; + handle_t *handle; + /* + * If the journal is still running, we have to write out superblock + * through the journal to avoid collisions of other journalled sb + * updates. + * + * We use directly jbd2 functions here to avoid recursing back into + * ext4 error handling code during handling of previous errors. + */ + if (!sb_rdonly(sbi->s_sb) && journal) { + handle = jbd2_journal_start(journal, 1); + if (IS_ERR(handle)) + goto write_directly; + if (jbd2_journal_get_write_access(handle, sbi->s_sbh)) { + jbd2_journal_stop(handle); + goto write_directly; + } + ext4_update_super(sbi->s_sb); + if (jbd2_journal_dirty_metadata(handle, sbi->s_sbh)) { + jbd2_journal_stop(handle); + goto write_directly; + } + jbd2_journal_stop(handle); + return; + } +write_directly: + /* + * Write through journal failed. Write sb directly to get error info + * out and hope for the best. + */ ext4_commit_super(sbi->s_sb); } @@ -944,9 +982,11 @@ __acquires(bitlock) if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; - __save_error_info(sb, EFSCORRUPTED, ino, block, function, line); - if (!bdev_read_only(sb->s_bdev)) + if (!bdev_read_only(sb->s_bdev)) { + save_error_info(sb, EFSCORRUPTED, ino, block, function, + line); schedule_work(&EXT4_SB(sb)->s_error_work); + } return; } ext4_unlock_group(sb, grp); @@ -5434,15 +5474,12 @@ err_out: return err; } -static int ext4_commit_super(struct super_block *sb) +/* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */ +static void ext4_update_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; - int error = 0; - - if (!sbh || block_device_ejected(sb)) - return error; lock_buffer(sbh); /* @@ -5514,8 +5551,20 @@ static int ext4_commit_super(struct super_block *sb) } spin_unlock(&sbi->s_error_lock); - BUFFER_TRACE(sbh, "marking dirty"); ext4_superblock_csum_set(sb); + unlock_buffer(sbh); +} + +static int ext4_commit_super(struct super_block *sb) +{ + struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; + int error = 0; + + if (!sbh || block_device_ejected(sb)) + return error; + + ext4_update_super(sb); + if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { /* * Oh, dear. A previous attempt to write the @@ -5530,8 +5579,8 @@ static int ext4_commit_super(struct super_block *sb) clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } + BUFFER_TRACE(sbh, "marking dirty"); mark_buffer_dirty(sbh); - unlock_buffer(sbh); error = __sync_dirty_buffer(sbh, REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0)); if (buffer_write_io_error(sbh)) { -- GitLab From e92ad03fa53498f12b3f5ecb8822adc3bf815b28 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:41 +0100 Subject: [PATCH 0049/2012] ext4: use sbi instead of EXT4_SB(sb) in ext4_update_super() No behavioral change. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-6-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9a256c558e044..0f0db49031dc9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5478,8 +5478,8 @@ err_out: static void ext4_update_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = EXT4_SB(sb)->s_es; - struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; + struct ext4_super_block *es = sbi->s_es; + struct buffer_head *sbh = sbi->s_sbh; lock_buffer(sbh); /* @@ -5496,21 +5496,20 @@ static void ext4_update_super(struct super_block *sb) ext4_update_tstamp(es, s_wtime); if (sb->s_bdev->bd_part) es->s_kbytes_written = - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + + cpu_to_le64(sbi->s_kbytes_written + ((part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]) - - EXT4_SB(sb)->s_sectors_written_start) >> 1)); + sbi->s_sectors_written_start) >> 1)); else - es->s_kbytes_written = - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); - if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter)) + es->s_kbytes_written = cpu_to_le64(sbi->s_kbytes_written); + if (percpu_counter_initialized(&sbi->s_freeclusters_counter)) ext4_free_blocks_count_set(es, - EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( - &EXT4_SB(sb)->s_freeclusters_counter))); - if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) + EXT4_C2B(sbi, percpu_counter_sum_positive( + &sbi->s_freeclusters_counter))); + if (percpu_counter_initialized(&sbi->s_freeinodes_counter)) es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( - &EXT4_SB(sb)->s_freeinodes_counter)); + &sbi->s_freeinodes_counter)); /* Copy error information to the on-disk superblock */ spin_lock(&sbi->s_error_lock); if (sbi->s_add_error_count > 0) { -- GitLab From dfd56c2c0c0dbb11be939b804ddc8d5395ab3432 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:43 +0100 Subject: [PATCH 0050/2012] ext4: fix superblock checksum failure when setting password salt When setting password salt in the superblock, we forget to recompute the superblock checksum so it will not match until the next superblock modification which recomputes the checksum. Fix it. CC: Michael Halcrow Reported-by: Andreas Dilger Fixes: 9bd8212f981e ("ext4 crypto: add encryption policy and password salt support") Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-8-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index f0381876a7e5b..106bf149e8ca8 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1157,7 +1157,10 @@ resizefs_out: err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) goto pwsalt_err_journal; + lock_buffer(sbi->s_sbh); generate_random_uuid(sbi->s_es->s_encrypt_pw_salt); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); pwsalt_err_journal: -- GitLab From a3f5cf14ff917d46a4d491cf86210fd639d1ff38 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 16 Dec 2020 11:18:44 +0100 Subject: [PATCH 0051/2012] ext4: drop ext4_handle_dirty_super() The wrapper is now useless since it does what ext4_handle_dirty_metadata() does. Just remove it. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20201216101844.22917-9-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/ext4_jbd2.c | 16 ---------------- fs/ext4/ext4_jbd2.h | 5 ----- fs/ext4/file.c | 2 +- fs/ext4/inode.c | 3 ++- fs/ext4/namei.c | 4 ++-- fs/ext4/resize.c | 8 ++++---- fs/ext4/xattr.c | 2 +- 7 files changed, 10 insertions(+), 30 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index c7e410c4ab7d7..be799040a4154 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -372,19 +372,3 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, } return err; } - -int __ext4_handle_dirty_super(const char *where, unsigned int line, - handle_t *handle, struct super_block *sb) -{ - struct buffer_head *bh = EXT4_SB(sb)->s_sbh; - int err = 0; - - if (ext4_handle_valid(handle)) { - err = jbd2_journal_dirty_metadata(handle, bh); - if (err) - ext4_journal_abort_handle(where, line, __func__, - bh, handle, err); - } else - mark_buffer_dirty(bh); - return err; -} diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index a124c68b0c75e..0d2fa423b7adb 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -244,9 +244,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, handle_t *handle, struct inode *inode, struct buffer_head *bh); -int __ext4_handle_dirty_super(const char *where, unsigned int line, - handle_t *handle, struct super_block *sb); - #define ext4_journal_get_write_access(handle, bh) \ __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh)) #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ @@ -257,8 +254,6 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, #define ext4_handle_dirty_metadata(handle, inode, bh) \ __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \ (bh)) -#define ext4_handle_dirty_super(handle, sb) \ - __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, int type, int blocks, int rsv_blocks, diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 26907d5835d00..1cd3d26e3217e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -814,7 +814,7 @@ static int ext4_sample_last_mounted(struct super_block *sb, sizeof(sbi->s_es->s_last_mounted)); ext4_superblock_csum_set(sb); unlock_buffer(sbi->s_sbh); - ext4_handle_dirty_super(handle, sb); + ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); out_journal: ext4_journal_stop(handle); out: diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 27de6f0a33c8a..c173c84058561 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5155,7 +5155,8 @@ static int ext4_do_update_inode(handle_t *handle, ext4_superblock_csum_set(sb); unlock_buffer(EXT4_SB(sb)->s_sbh); ext4_handle_sync(handle); - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, + EXT4_SB(sb)->s_sbh); } ext4_update_inode_fsync_trans(handle, inode, need_datasync); out_brelse: diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 40970a509dccc..a3b28ef2455a8 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2988,7 +2988,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) mutex_unlock(&sbi->s_orphan_lock); if (dirty) { - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); rc = ext4_mark_iloc_dirty(handle, inode, &iloc); if (!err) err = rc; @@ -3069,7 +3069,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) ext4_superblock_csum_set(inode->i_sb); unlock_buffer(sbi->s_sbh); mutex_unlock(&sbi->s_orphan_lock); - err = ext4_handle_dirty_super(handle, inode->i_sb); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); } else { struct ext4_iloc iloc2; struct inode *i_prev = diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 6155f2b9538ca..bd0d185654f33 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -903,7 +903,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, le16_add_cpu(&es->s_reserved_gdt_blocks, -1); ext4_superblock_csum_set(sb); unlock_buffer(EXT4_SB(sb)->s_sbh); - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); if (err) ext4_std_error(sb, err); return err; @@ -1521,7 +1521,7 @@ static int ext4_flex_group_add(struct super_block *sb, ext4_update_super(sb, flex_gd); - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); exit_journal: err2 = ext4_journal_stop(handle); @@ -1734,7 +1734,7 @@ static int ext4_group_extend_no_check(struct super_block *sb, err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); if (err) goto errout; - ext4_handle_dirty_super(handle, sb); + ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); errout: @@ -1891,7 +1891,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode) ext4_superblock_csum_set(sb); unlock_buffer(sbi->s_sbh); - err = ext4_handle_dirty_super(handle, sb); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); if (err) { ext4_std_error(sb, err); goto errout; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 1db7ca778d69e..372208500f4e7 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -796,7 +796,7 @@ static void ext4_xattr_update_super_block(handle_t *handle, ext4_set_feature_xattr(sb); ext4_superblock_csum_set(sb); unlock_buffer(EXT4_SB(sb)->s_sbh); - ext4_handle_dirty_super(handle, sb); + ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); } } -- GitLab From 5a3b590d4b2db187faa6f06adc9a53d6199fb1f9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 17 Dec 2020 13:24:15 -0500 Subject: [PATCH 0052/2012] ext4: don't leak old mountpoint samples When the first file is opened, ext4 samples the mountpoint of the filesystem in 64 bytes of the super block. It does so using strlcpy(), this means that the remaining bytes in the super block string buffer are untouched. If the mount point before had a longer path than the current one, it can be reconstructed. Consider the case where the fs was mounted to "/media/johnjdeveloper" and later to "/". The super block buffer then contains "/\x00edia/johnjdeveloper". This case was seen in the wild and caused confusion how the name of a developer ands up on the super block of a filesystem used in production... Fix this by using strncpy() instead of strlcpy(). The superblock field is defined to be a fixed-size char array, and it is already marked using __nonstring in fs/ext4/ext4.h. The consumer of the field in e2fsprogs already assumes that in the case of a 64+ byte mount path, that s_last_mounted will not be NUL terminated. Link: https://lore.kernel.org/r/X9ujIOJG/HqMr88R@mit.edu Reported-by: Richard Weinberger Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 1cd3d26e3217e..349b27f0dda0c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -810,7 +810,7 @@ static int ext4_sample_last_mounted(struct super_block *sb, if (err) goto out_journal; lock_buffer(sbi->s_sbh); - strlcpy(sbi->s_es->s_last_mounted, cp, + strncpy(sbi->s_es->s_last_mounted, cp, sizeof(sbi->s_es->s_last_mounted)); ext4_superblock_csum_set(sb); unlock_buffer(sbi->s_sbh); -- GitLab From 7887cc89d5851cbdec49219e9614beec776af150 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 14 Dec 2020 23:34:13 +0100 Subject: [PATCH 0053/2012] ARM: dts: ux500/golden: Set display max brightness A too high brightness by default (default is max) makes the screen go blank. Set this to 15 as in the Vendor tree. Signed-off-by: Linus Walleij Cc: Stephan Gerhold Link: https://lore.kernel.org/r/20201214223413.253893-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/ste-ux500-samsung-golden.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/ste-ux500-samsung-golden.dts b/arch/arm/boot/dts/ste-ux500-samsung-golden.dts index a1093cb37dc7a..aed1f2d5f2467 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-golden.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-golden.dts @@ -326,6 +326,7 @@ panel@0 { compatible = "samsung,s6e63m0"; reg = <0>; + max-brightness = <15>; vdd3-supply = <&panel_reg_3v0>; vci-supply = <&panel_reg_1v8>; reset-gpios = <&gpio4 11 GPIO_ACTIVE_LOW>; -- GitLab From 3557ae187c32203d1bb8b48ee1e2e7bdb23d98d5 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Dec 2020 15:01:53 +0000 Subject: [PATCH 0054/2012] KVM: Documentation: Add arm64 KVM_RUN error codes The API documentation states that general error codes are not detailed, but errors with specific meanings are. On arm64, KVM_RUN can return error numbers with a different meaning than what is described by POSIX or the C99 standard (as taken from man 3 errno). Absent from the newly documented error codes is ERANGE which can be returned when making a change to the EL2 stage 1 tables if the address is larger than the largest supported input address. Assuming no bugs in the implementation, that is not possible because the input addresses which are mapped are the result of applying the macro kern_hyp_va() on kernel virtual addresses. CC: Paolo Bonzini Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201201150157.223625-2-alexandru.elisei@arm.com --- Documentation/virt/kvm/api.rst | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index e00a66d723728..4e5316ed10e90 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -380,9 +380,14 @@ This ioctl is obsolete and has been removed. Errors: - ===== ============================= + ======= ============================================================== EINTR an unmasked signal is pending - ===== ============================= + ENOEXEC the vcpu hasn't been initialized or the guest tried to execute + instructions from device memory (arm64) + ENOSYS data abort outside memslots with no syndrome info and + KVM_CAP_ARM_NISV_TO_USER not enabled (arm64) + EPERM SVE feature set but not finalized (arm64) + ======= ============================================================== This ioctl is used to run a guest virtual cpu. While there are no explicit parameters, there is an implicit parameter block that can be -- GitLab From f16570ba47ff2b3766ebeaba6f4b80ad48cfd6a1 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Dec 2020 15:01:54 +0000 Subject: [PATCH 0055/2012] KVM: arm64: arch_timer: Remove VGIC initialization check kvm_timer_enable() is called in kvm_vcpu_first_run_init() after kvm_vgic_map_resources() if the VGIC wasn't ready. kvm_vgic_map_resources() is the only place where kvm->arch.vgic.ready is set to true. For a v2 VGIC, kvm_vgic_map_resources() will attempt to initialize the VGIC and set the initialized flag. For a v3 VGIC, kvm_vgic_map_resources() will return an error code if the VGIC isn't already initialized. The end result is that if we've reached kvm_timer_enable(), the VGIC is initialzed and ready and vgic_initialized() will always be true, so remove this check. Signed-off-by: Alexandru Elisei Reviewed-by: Eric Auger [maz: added comment about vgic initialisation, as suggested by Eric] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201201150157.223625-3-alexandru.elisei@arm.com --- arch/arm64/kvm/arch_timer.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 32ba6fbc38141..74e0699661e90 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1129,9 +1129,10 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (!irqchip_in_kernel(vcpu->kvm)) goto no_vgic; - if (!vgic_initialized(vcpu->kvm)) - return -ENODEV; - + /* + * At this stage, we have the guarantee that the vgic is both + * available and initialized. + */ if (!timer_irqs_are_valid(vcpu)) { kvm_debug("incorrectly configured timer irqs\n"); return -EINVAL; -- GitLab From 1c91f06d296de4f0c27022f5ec464e047d471215 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Dec 2020 15:01:55 +0000 Subject: [PATCH 0056/2012] KVM: arm64: Move double-checked lock to kvm_vgic_map_resources() kvm_vgic_map_resources() is called when a VCPU if first run and it maps all the VGIC MMIO regions. To prevent double-initialization, the VGIC uses the ready variable to keep track of the state of resources and the global KVM mutex to protect against concurrent accesses. After the lock is taken, the variable is checked again in case another VCPU took the lock between the current VCPU reading ready equals false and taking the lock. The double-checked lock pattern is spread across four different functions: in kvm_vcpu_first_run_init(), in kvm_vgic_map_resource() and in vgic_{v2,v3}_map_resources(), which makes it hard to reason about and introduces minor code duplication. Consolidate the checks in kvm_vgic_map_resources(), where the lock is taken. No functional change intended. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201201150157.223625-4-alexandru.elisei@arm.com --- arch/arm64/kvm/arm.c | 8 +++----- arch/arm64/kvm/vgic/vgic-init.c | 6 ++++++ arch/arm64/kvm/vgic/vgic-v2.c | 3 --- arch/arm64/kvm/vgic/vgic-v3.c | 3 --- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e207e4541f552..ab782c480e9a5 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -580,11 +580,9 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Map the VGIC hardware resources before running a vcpu the * first time on this VM. */ - if (unlikely(!vgic_ready(kvm))) { - ret = kvm_vgic_map_resources(kvm); - if (ret) - return ret; - } + ret = kvm_vgic_map_resources(kvm); + if (ret) + return ret; } else { /* * Tell the rest of the code that there are userspace irqchip diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 32e32d67a127f..a2f4d1c85f001 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -428,7 +428,13 @@ int kvm_vgic_map_resources(struct kvm *kvm) struct vgic_dist *dist = &kvm->arch.vgic; int ret = 0; + if (likely(vgic_ready(kvm))) + return 0; + mutex_lock(&kvm->lock); + if (vgic_ready(kvm)) + goto out; + if (!irqchip_in_kernel(kvm)) goto out; diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index ebf53a4e12963..7f38c1a936395 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -306,9 +306,6 @@ int vgic_v2_map_resources(struct kvm *kvm) struct vgic_dist *dist = &kvm->arch.vgic; int ret = 0; - if (vgic_ready(kvm)) - goto out; - if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { kvm_err("Need to set vgic cpu and dist addresses first\n"); diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 9cdf39a94a635..35029c5cb0f12 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -500,9 +500,6 @@ int vgic_v3_map_resources(struct kvm *kvm) int ret = 0; int c; - if (vgic_ready(kvm)) - goto out; - kvm_for_each_vcpu(c, vcpu, kvm) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; -- GitLab From 9e5c23b9bd71d00b07720b2a8037b019d356e9df Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Dec 2020 15:01:56 +0000 Subject: [PATCH 0057/2012] KVM: arm64: Update comment in kvm_vgic_map_resources() vgic_v3_map_resources() returns -EBUSY if the VGIC isn't initialized, update the comment to kvm_vgic_map_resources() to match what the function does. Signed-off-by: Alexandru Elisei Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201201150157.223625-5-alexandru.elisei@arm.com --- arch/arm64/kvm/vgic/vgic-init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index a2f4d1c85f001..5b54787a9ad57 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -419,7 +419,8 @@ int vgic_lazy_init(struct kvm *kvm) * Map the MMIO regions depending on the VGIC model exposed to the guest * called on the first VCPU run. * Also map the virtual CPU interface into the VM. - * v2/v3 derivatives call vgic_init if not already done. + * v2 calls vgic_init() if not already done. + * v3 and derivatives return an error if the VGIC is not initialized. * vgic_ready() returns true if this function has succeeded. * @kvm: kvm struct pointer */ -- GitLab From 282ff80135717cc43f1e33ddd4b0cd9e760d060b Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Tue, 1 Dec 2020 15:01:57 +0000 Subject: [PATCH 0058/2012] KVM: arm64: Remove redundant call to kvm_pmu_vcpu_reset() KVM_ARM_VCPU_INIT ioctl calls kvm_reset_vcpu(), which in turn resets the PMU with a call to kvm_pmu_vcpu_reset(). The function zeroes the PMU chained counters bitmap and stops all the counters with a perf event attached. Because it is called before the VCPU has had the chance to run, no perf events are in use and none are released. kvm_arm_pmu_v3_enable(), called by kvm_vcpu_first_run_init() only if the VCPU has been initialized, also resets the PMU. kvm_pmu_vcpu_reset() in this case does the exact same thing as the previous call, so remove it. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201201150157.223625-6-alexandru.elisei@arm.com --- arch/arm64/kvm/pmu-emul.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 398f6df1bbe40..4ad66a532e38b 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -850,8 +850,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) return -EINVAL; } - kvm_pmu_vcpu_reset(vcpu); - return 0; } -- GitLab From 101068b566ef227b605d807aad9e72efd8b6bc5b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 27 Dec 2020 14:28:34 +0000 Subject: [PATCH 0059/2012] KVM: arm64: Consolidate dist->ready setting into kvm_vgic_map_resources() dist->ready setting is pointlessly spread across the two vgic backends, while it could be consolidated in kvm_vgic_map_resources(). Move it there, and slightly simplify the flows in both backends. Suggested-by: Eric Auger Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-init.c | 2 ++ arch/arm64/kvm/vgic/vgic-v2.c | 17 ++++++----------- arch/arm64/kvm/vgic/vgic-v3.c | 18 ++++++------------ 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 5b54787a9ad57..052917deb1495 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -446,6 +446,8 @@ int kvm_vgic_map_resources(struct kvm *kvm) if (ret) __kvm_vgic_destroy(kvm); + else + dist->ready = true; out: mutex_unlock(&kvm->lock); diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 7f38c1a936395..11934c2af2f42 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -309,14 +309,12 @@ int vgic_v2_map_resources(struct kvm *kvm) if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { kvm_err("Need to set vgic cpu and dist addresses first\n"); - ret = -ENXIO; - goto out; + return -ENXIO; } if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) { kvm_err("VGIC CPU and dist frames overlap\n"); - ret = -EINVAL; - goto out; + return -EINVAL; } /* @@ -326,13 +324,13 @@ int vgic_v2_map_resources(struct kvm *kvm) ret = vgic_init(kvm); if (ret) { kvm_err("Unable to initialize VGIC dynamic data structures\n"); - goto out; + return ret; } ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2); if (ret) { kvm_err("Unable to register VGIC MMIO regions\n"); - goto out; + return ret; } if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { @@ -341,14 +339,11 @@ int vgic_v2_map_resources(struct kvm *kvm) KVM_VGIC_V2_CPU_SIZE, true); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out; + return ret; } } - dist->ready = true; - -out: - return ret; + return 0; } DEFINE_STATIC_KEY_FALSE(vgic_v2_cpuif_trap); diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 35029c5cb0f12..52915b3423514 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -505,21 +505,18 @@ int vgic_v3_map_resources(struct kvm *kvm) if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) { kvm_debug("vcpu %d redistributor base not set\n", c); - ret = -ENXIO; - goto out; + return -ENXIO; } } if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) { kvm_err("Need to set vgic distributor addresses first\n"); - ret = -ENXIO; - goto out; + return -ENXIO; } if (!vgic_v3_check_base(kvm)) { kvm_err("VGIC redist and dist frames overlap\n"); - ret = -EINVAL; - goto out; + return -EINVAL; } /* @@ -527,22 +524,19 @@ int vgic_v3_map_resources(struct kvm *kvm) * the VGIC before we need to use it. */ if (!vgic_initialized(kvm)) { - ret = -EBUSY; - goto out; + return -EBUSY; } ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3); if (ret) { kvm_err("Unable to register VGICv3 dist MMIO regions\n"); - goto out; + return ret; } if (kvm_vgic_global_state.has_gicv4_1) vgic_v4_configure_vsgis(kvm); - dist->ready = true; -out: - return ret; + return 0; } DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); -- GitLab From 105b5ca9b1e38a8db8446a493ca062eea98171eb Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 30 Nov 2020 16:36:30 +0200 Subject: [PATCH 0060/2012] habanalabs: Fix a missing-braces warning Fix a compilation "missing braces around initializer" warning. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 32e6af1db4e35..a9927a06c5ea1 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -406,7 +406,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv, static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { struct hl_device *hdev = hpriv->hdev; - struct hl_pll_frequency_info freq_info = {0}; + struct hl_pll_frequency_info freq_info = {}; u32 max_size = args->return_size; void __user *out = (void __user *) (uintptr_t) args->return_pointer; int rc; -- GitLab From 429f1571e8f0b14ec42b8fb14efcfc0576b2788f Mon Sep 17 00:00:00 2001 From: Alon Mizrahi Date: Tue, 1 Dec 2020 18:44:11 +0200 Subject: [PATCH 0061/2012] habanalabs: add comment for pll frequency ioctl opcode Forgot to add the comment for the opcode when it was added. Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 8c15a7d336a0d..dc8bcec195ccf 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -279,6 +279,7 @@ enum hl_device_status { * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption + * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 -- GitLab From 4783489951b78525a6e61b43936cbbd88b7938af Mon Sep 17 00:00:00 2001 From: Alon Mizrahi Date: Thu, 26 Nov 2020 13:05:20 +0200 Subject: [PATCH 0062/2012] habanalabs: fetch PSOC PLL frequency from F/W in goya When the F/W security is enabled, goya needs to fetch the PSOC pll frequency through a dedicated interface Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 61 ++++++++++++++++++----------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 3e5eb9e3d7bd8..b66fd55accb50 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -694,32 +694,47 @@ static void goya_qman0_set_security(struct hl_device *hdev, bool secure) static void goya_fetch_psoc_frequency(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u32 trace_freq = 0; - u32 pll_clk = 0; - u32 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1); - u32 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1); - u32 nr = RREG32(mmPSOC_PCI_PLL_NR); - u32 nf = RREG32(mmPSOC_PCI_PLL_NF); - u32 od = RREG32(mmPSOC_PCI_PLL_OD); - - if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) { - if (div_sel == DIV_SEL_REF_CLK) - trace_freq = PLL_REF_CLK; - else - trace_freq = PLL_REF_CLK / (div_fctr + 1); - } else if (div_sel == DIV_SEL_PLL_CLK || - div_sel == DIV_SEL_DIVIDED_PLL) { - pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1)); - if (div_sel == DIV_SEL_PLL_CLK) - trace_freq = pll_clk; - else - trace_freq = pll_clk / (div_fctr + 1); + u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; + u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; + int rc; + + if (hdev->asic_prop.fw_security_disabled) { + div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1); + div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1); + nr = RREG32(mmPSOC_PCI_PLL_NR); + nf = RREG32(mmPSOC_PCI_PLL_NF); + od = RREG32(mmPSOC_PCI_PLL_OD); + + if (div_sel == DIV_SEL_REF_CLK || + div_sel == DIV_SEL_DIVIDED_REF) { + if (div_sel == DIV_SEL_REF_CLK) + freq = PLL_REF_CLK; + else + freq = PLL_REF_CLK / (div_fctr + 1); + } else if (div_sel == DIV_SEL_PLL_CLK || + div_sel == DIV_SEL_DIVIDED_PLL) { + pll_clk = PLL_REF_CLK * (nf + 1) / + ((nr + 1) * (od + 1)); + if (div_sel == DIV_SEL_PLL_CLK) + freq = pll_clk; + else + freq = pll_clk / (div_fctr + 1); + } else { + dev_warn(hdev->dev, + "Received invalid div select value: %d", + div_sel); + freq = 0; + } } else { - dev_warn(hdev->dev, - "Received invalid div select value: %d", div_sel); + rc = hl_fw_cpucp_pll_info_get(hdev, PCI_PLL, pll_freq_arr); + + if (rc) + return; + + freq = pll_freq_arr[1]; } - prop->psoc_timestamp_frequency = trace_freq; + prop->psoc_timestamp_frequency = freq; prop->psoc_pci_pll_nr = nr; prop->psoc_pci_pll_nf = nf; prop->psoc_pci_pll_od = od; -- GitLab From 6585489e808d9964dbde9dad89ac8e792e1185fc Mon Sep 17 00:00:00 2001 From: Alon Mizrahi Date: Thu, 19 Nov 2020 16:34:19 +0200 Subject: [PATCH 0063/2012] habanalabs: remove generic gaudi get_pll_freq function As we only fetch the CPU_PLL frequency in gaudi, we don't need a generic get_pll_frequency function which takes a pll index as input Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/common/habanalabs_ioctl.c | 2 +- drivers/misc/habanalabs/gaudi/gaudi.c | 126 ++++++------------ drivers/misc/habanalabs/gaudi/gaudiP.h | 7 - 3 files changed, 41 insertions(+), 94 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index a9927a06c5ea1..a0c0d20f6f8ff 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -406,7 +406,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv, static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args) { struct hl_device *hdev = hpriv->hdev; - struct hl_pll_frequency_info freq_info = {}; + struct hl_pll_frequency_info freq_info = { {0} }; u32 max_size = args->return_size; void __user *out = (void __user *) (uintptr_t) args->return_pointer; int rc; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 1f1926607c5e7..278c4de98e22e 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -151,19 +151,6 @@ static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) }; -static const u32 gaudi_pll_base_addresses[GAUDI_PLL_MAX] = { - [CPU_PLL] = mmPSOC_CPU_PLL_NR, - [PCI_PLL] = mmPSOC_PCI_PLL_NR, - [SRAM_PLL] = mmSRAM_W_PLL_NR, - [HBM_PLL] = mmPSOC_HBM_PLL_NR, - [NIC_PLL] = mmNIC0_PLL_NR, - [DMA_PLL] = mmDMA_W_PLL_NR, - [MESH_PLL] = mmMESH_W_PLL_NR, - [MME_PLL] = mmPSOC_MME_PLL_NR, - [TPC_PLL] = mmPSOC_TPC_PLL_NR, - [IF_PLL] = mmIF_W_PLL_NR -}; - static inline bool validate_packet_id(enum packet_id id) { switch (id) { @@ -703,93 +690,60 @@ static int gaudi_early_fini(struct hl_device *hdev) } /** - * gaudi_fetch_pll_frequency - Fetch PLL frequency values + * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values * * @hdev: pointer to hl_device structure - * @pll_index: index of the pll to fetch frequency from - * @pll_freq: pointer to store the pll frequency in MHz in each of the available - * outputs. if a certain output is not available a 0 will be set * */ -static int gaudi_fetch_pll_frequency(struct hl_device *hdev, - enum gaudi_pll_index pll_index, - u16 *pll_freq_arr) +static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) { - u32 nr = 0, nf = 0, od = 0, pll_clk = 0, div_fctr, div_sel, - pll_base_addr = gaudi_pll_base_addresses[pll_index]; - u16 freq = 0; - int i, rc; - - if (hdev->asic_prop.fw_security_status_valid && - (hdev->asic_prop.fw_app_security_map & - CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { - rc = hl_fw_cpucp_pll_info_get(hdev, pll_index, pll_freq_arr); + struct asic_fixed_properties *prop = &hdev->asic_prop; + u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; + u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; + int rc; - if (rc) - return rc; - } else if (hdev->asic_prop.fw_security_disabled) { + if (hdev->asic_prop.fw_security_disabled) { /* Backward compatibility */ - nr = RREG32(pll_base_addr + PLL_NR_OFFSET); - nf = RREG32(pll_base_addr + PLL_NF_OFFSET); - od = RREG32(pll_base_addr + PLL_OD_OFFSET); - - for (i = 0; i < HL_PLL_NUM_OUTPUTS; i++) { - div_fctr = RREG32(pll_base_addr + - PLL_DIV_FACTOR_0_OFFSET + i * 4); - div_sel = RREG32(pll_base_addr + - PLL_DIV_SEL_0_OFFSET + i * 4); + div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2); + div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2); + nr = RREG32(mmPSOC_CPU_PLL_NR); + nf = RREG32(mmPSOC_CPU_PLL_NF); + od = RREG32(mmPSOC_CPU_PLL_OD); - if (div_sel == DIV_SEL_REF_CLK || + if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) { - if (div_sel == DIV_SEL_REF_CLK) - freq = PLL_REF_CLK; - else - freq = PLL_REF_CLK / (div_fctr + 1); - } else if (div_sel == DIV_SEL_PLL_CLK || - div_sel == DIV_SEL_DIVIDED_PLL) { - pll_clk = PLL_REF_CLK * (nf + 1) / - ((nr + 1) * (od + 1)); - if (div_sel == DIV_SEL_PLL_CLK) - freq = pll_clk; - else - freq = pll_clk / (div_fctr + 1); - } else { - dev_warn(hdev->dev, - "Received invalid div select value: %d", - div_sel); - } - - pll_freq_arr[i] = freq; + if (div_sel == DIV_SEL_REF_CLK) + freq = PLL_REF_CLK; + else + freq = PLL_REF_CLK / (div_fctr + 1); + } else if (div_sel == DIV_SEL_PLL_CLK || + div_sel == DIV_SEL_DIVIDED_PLL) { + pll_clk = PLL_REF_CLK * (nf + 1) / + ((nr + 1) * (od + 1)); + if (div_sel == DIV_SEL_PLL_CLK) + freq = pll_clk; + else + freq = pll_clk / (div_fctr + 1); + } else { + dev_warn(hdev->dev, + "Received invalid div select value: %d", + div_sel); + freq = 0; } } else { - dev_err(hdev->dev, "Failed to fetch PLL frequency values\n"); - return -EIO; - } - - return 0; -} + rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr); -/** - * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values - * - * @hdev: pointer to hl_device structure - * - */ -static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - u16 pll_freq[HL_PLL_NUM_OUTPUTS]; - int rc; + if (rc) + return rc; - rc = gaudi_fetch_pll_frequency(hdev, CPU_PLL, pll_freq); - if (rc) - return rc; + freq = pll_freq_arr[2]; + } - prop->psoc_timestamp_frequency = pll_freq[2]; - prop->psoc_pci_pll_nr = 0; - prop->psoc_pci_pll_nf = 0; - prop->psoc_pci_pll_od = 0; - prop->psoc_pci_pll_div_factor = 0; + prop->psoc_timestamp_frequency = freq; + prop->psoc_pci_pll_nr = nr; + prop->psoc_pci_pll_nf = nf; + prop->psoc_pci_pll_od = od; + prop->psoc_pci_pll_div_factor = div_fctr; return 0; } diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index f2d91f4fcffea..a7ab2d7e57d44 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -105,13 +105,6 @@ #define MME_ACC_OFFSET (mmMME1_ACC_BASE - mmMME0_ACC_BASE) #define SRAM_BANK_OFFSET (mmSRAM_Y0_X1_RTR_BASE - mmSRAM_Y0_X0_RTR_BASE) -#define PLL_NR_OFFSET 0 -#define PLL_NF_OFFSET (mmPSOC_CPU_PLL_NF - mmPSOC_CPU_PLL_NR) -#define PLL_OD_OFFSET (mmPSOC_CPU_PLL_OD - mmPSOC_CPU_PLL_NR) -#define PLL_DIV_FACTOR_0_OFFSET (mmPSOC_CPU_PLL_DIV_FACTOR_0 - \ - mmPSOC_CPU_PLL_NR) -#define PLL_DIV_SEL_0_OFFSET (mmPSOC_CPU_PLL_DIV_SEL_0 - mmPSOC_CPU_PLL_NR) - #define NUM_OF_SOB_IN_BLOCK \ (((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047 - \ mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0) + 4) >> 2) -- GitLab From 9c9013cbd8338ff8eac732d115c9005bc512cbc5 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 1 Dec 2020 10:39:54 +0200 Subject: [PATCH 0064/2012] habanalabs: preboot hard reset support FW hard reset capability indication is now moved to preboot stage. Driver will check if HW is dirty only after it validated preboot is up. If HW is dirty, driver will perform a hard reset according to the FW capability. In addition, FW defines a new message which driver need to send in order to initiate a hard reset. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 25 ++++++++++--------- drivers/misc/habanalabs/gaudi/gaudi.c | 17 +++++++------ drivers/misc/habanalabs/goya/goya.c | 12 ++++----- .../habanalabs/include/common/hl_boot_if.h | 2 ++ 4 files changed, 31 insertions(+), 25 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 0e1c629e9800a..c970bfc6db66c 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -627,7 +627,9 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, security_status = RREG32(cpu_security_boot_status_reg); /* We read security status multiple times during boot: - * 1. preboot - we check if fw security feature is supported + * 1. preboot - a. Check whether the security status bits are valid + * b. Check whether fw security is enabled + * c. Check whether hard reset is done by fw * 2. boot cpu - we get boot cpu security status * 3. FW application - we get FW application security status * @@ -637,13 +639,20 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, */ if (security_status & CPU_BOOT_DEV_STS0_ENABLED) { hdev->asic_prop.fw_security_status_valid = 1; - prop->fw_security_disabled = - !(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN); + + if (!(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)) + prop->fw_security_disabled = true; + + if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) + hdev->asic_prop.hard_reset_done_by_fw = true; } else { hdev->asic_prop.fw_security_status_valid = 0; prop->fw_security_disabled = true; } + dev_dbg(hdev->dev, "Firmware hard-reset is %s\n", + hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled"); + dev_info(hdev->dev, "firmware-level security is %s\n", prop->fw_security_disabled ? "disabled" : "enabled"); @@ -797,18 +806,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, } /* Read FW application security bits */ - if (hdev->asic_prop.fw_security_status_valid) { + if (hdev->asic_prop.fw_security_status_valid) hdev->asic_prop.fw_app_security_map = RREG32(cpu_security_boot_status_reg); - if (hdev->asic_prop.fw_app_security_map & - CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) - hdev->asic_prop.hard_reset_done_by_fw = true; - } - - dev_dbg(hdev->dev, "Firmware hard-reset is %s\n", - hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled"); - dev_info(hdev->dev, "Successfully loaded firmware to device\n"); out: diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 278c4de98e22e..e465c158eaeb8 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -654,12 +654,6 @@ static int gaudi_early_init(struct hl_device *hdev) if (rc) goto free_queue_props; - if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { - dev_info(hdev->dev, - "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->hw_fini(hdev, true); - } - /* Before continuing in the initialization, we need to read the preboot * version to determine whether we run with a security-enabled firmware */ @@ -672,6 +666,12 @@ static int gaudi_early_init(struct hl_device *hdev) goto pci_fini; } + if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { + dev_info(hdev->dev, + "H/W state is dirty, must reset before initializing\n"); + hdev->asic_funcs->hw_fini(hdev, true); + } + return 0; pci_fini: @@ -3881,7 +3881,10 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) /* I don't know what is the state of the CPU so make sure it is * stopped in any means necessary */ - WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE); + if (hdev->asic_prop.hard_reset_done_by_fw) + WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_RST_DEV); + else + WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE); WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index b66fd55accb50..d61177bf36a5a 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -613,12 +613,6 @@ static int goya_early_init(struct hl_device *hdev) if (rc) goto free_queue_props; - if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { - dev_info(hdev->dev, - "H/W state is dirty, must reset before initializing\n"); - hdev->asic_funcs->hw_fini(hdev, true); - } - /* Before continuing in the initialization, we need to read the preboot * version to determine whether we run with a security-enabled firmware */ @@ -631,6 +625,12 @@ static int goya_early_init(struct hl_device *hdev) goto pci_fini; } + if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { + dev_info(hdev->dev, + "H/W state is dirty, must reset before initializing\n"); + hdev->asic_funcs->hw_fini(hdev, true); + } + if (!hdev->pldm) { val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS); if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK) diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index e5801ecf0cb23..755c4800f0028 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -204,6 +204,8 @@ enum kmd_msg { KMD_MSG_GOTO_WFE, KMD_MSG_FIT_RDY, KMD_MSG_SKIP_BMC, + RESERVED, + KMD_MSG_RST_DEV, }; enum cpu_msg_status { -- GitLab From 72ab9ca52de6856380c26b2045aa826ae4308b76 Mon Sep 17 00:00:00 2001 From: Alon Mizrahi Date: Wed, 2 Dec 2020 19:55:30 +0200 Subject: [PATCH 0065/2012] habanalabs/gaudi: do not set EB in collective slave queues We don't need to set EB on signal packets from collective slave queues as it degrades performance. Because the slaves are the network queues, the engine barrier doesn't actually guarantee that the packet has been sent. Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 2 +- drivers/misc/habanalabs/common/hw_queue.c | 5 ++++- drivers/misc/habanalabs/gaudi/gaudi.c | 8 ++++---- drivers/misc/habanalabs/goya/goya.c | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 571eda6ef5ab0..70b778a0d60e2 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -944,7 +944,7 @@ struct hl_asic_funcs { u32 (*get_signal_cb_size)(struct hl_device *hdev); u32 (*get_wait_cb_size)(struct hl_device *hdev); u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id, - u32 size); + u32 size, bool eb); u32 (*gen_wait_cb)(struct hl_device *hdev, struct hl_gen_wait_properties *prop); void (*reset_sob)(struct hl_device *hdev, void *data); diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 7caf868d1585c..76217258780a4 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -418,8 +418,11 @@ static void init_signal_cs(struct hl_device *hdev, "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n", cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); + /* we set an EB since we must make sure all oeprations are done + * when sending the signal + */ hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, - cs_cmpl->hw_sob->sob_id, 0); + cs_cmpl->hw_sob->sob_id, 0, true); kref_get(&hw_sob->kref); diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index e465c158eaeb8..65895ba075fe2 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -361,7 +361,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev); static void gaudi_disable_clock_gating(struct hl_device *hdev); static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, - u32 size); + u32 size, bool eb); static u32 gaudi_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop); @@ -1064,7 +1064,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev, prop->collective_sob_id, queue_id); cb_size += gaudi_gen_signal_cb(hdev, job->user_cb, - prop->collective_sob_id, cb_size); + prop->collective_sob_id, cb_size, false); } static void gaudi_collective_wait_init_cs(struct hl_cs *cs) @@ -7893,7 +7893,7 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) } static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, - u32 size) + u32 size, bool eb) { struct hl_cb *cb = (struct hl_cb *) data; struct packet_msg_short *pkt; @@ -7910,7 +7910,7 @@ static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); - ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1); + ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, eb); ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1); ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index d61177bf36a5a..b8b4aa636b7cb 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5339,7 +5339,7 @@ static u32 goya_get_wait_cb_size(struct hl_device *hdev) } static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, - u32 size) + u32 size, bool eb) { return 0; } -- GitLab From 7a585dfc32110a106f70474c6fa822d912a92c7e Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 30 Nov 2020 14:56:06 +0200 Subject: [PATCH 0066/2012] habanalabs: Revise comment to align with mirror list name hw_queues_mirror was renamed to cs_mirror, so revise accordingly a comment that refers to this list. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index beb482310a586..92c1c516b65f3 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -562,7 +562,7 @@ void hl_cs_rollback_all(struct hl_device *hdev) for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) flush_workqueue(hdev->cq_wq[i]); - /* Make sure we don't have leftovers in the H/W queues mirror list */ + /* Make sure we don't have leftovers in the CS mirror list */ list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { cs_get(cs); cs->aborted = true; -- GitLab From 0024c094851f718ccb0b797255292bdce850a01f Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 5 Dec 2020 22:55:09 +0200 Subject: [PATCH 0067/2012] habanalabs/gaudi: disable CGM at HW initialization In case the clock gating was enabled in preboot we need to disable it at the H/W initialization stage before touching the MME/TPC registers. Otherwise, the ASIC can get stuck. If the security is enabled in the firmware level, the CGM is always disabled and the driver can't enable it. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 14 +++++++++++--- .../misc/habanalabs/include/common/hl_boot_if.h | 5 +++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 65895ba075fe2..f316b898e8e05 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -2403,8 +2403,6 @@ static void gaudi_init_golden_registers(struct hl_device *hdev) gaudi_init_e2e(hdev); gaudi_init_hbm_cred(hdev); - hdev->asic_funcs->disable_clock_gating(hdev); - for (tpc_id = 0, tpc_offset = 0; tpc_id < TPC_NUMBER_OF_ENGINES; tpc_id++, tpc_offset += TPC_CFG_OFFSET) { @@ -3416,6 +3414,9 @@ static void gaudi_set_clock_gating(struct hl_device *hdev) if (hdev->in_debug) return; + if (!hdev->asic_prop.fw_security_disabled) + return; + for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) { enable = !!(hdev->clock_gating_mask & (BIT_ULL(gaudi_dma_assignment[i]))); @@ -3467,7 +3468,7 @@ static void gaudi_disable_clock_gating(struct hl_device *hdev) u32 qman_offset; int i; - if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE)) + if (!hdev->asic_prop.fw_security_disabled) return; for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { @@ -3801,6 +3802,13 @@ static int gaudi_hw_init(struct hl_device *hdev) return rc; } + /* In case the clock gating was enabled in preboot we need to disable + * it here before touching the MME/TPC registers. + * There is no need to take clk gating mutex because when this function + * runs, no other relevant code can run + */ + hdev->asic_funcs->disable_clock_gating(hdev); + /* SRAM scrambler must be initialized after CPU is running from HBM */ gaudi_init_scrambler_sram(hdev); diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index 755c4800f0028..7cb5f2d3e5654 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -150,6 +150,10 @@ * CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled. * Initialized in: linux * + * CPU_BOOT_DEV_STS0_CLK_GATE_EN Clock Gating enabled. + * FW initialized Clock Gating. + * Initialized in: preboot + * * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. * This is a main indication that the * running FW populates the device status @@ -171,6 +175,7 @@ #define CPU_BOOT_DEV_STS0_DRAM_SCR_EN (1 << 9) #define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN (1 << 10) #define CPU_BOOT_DEV_STS0_PLL_INFO_EN (1 << 11) +#define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13) #define CPU_BOOT_DEV_STS0_ENABLED (1 << 31) enum cpu_boot_status { -- GitLab From 6bbb77b9e6f0bd5595724b7c0cb1189afdd133d3 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Sun, 6 Dec 2020 14:00:35 +0200 Subject: [PATCH 0068/2012] habanalabs: full FW hard reset support Driver must fetch FW hard reset capability at every FW boot stage: preboot, CPU boot, CPU application. If hard reset is triggered, driver will take into consideration only the last capability received. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 55 +++++++++++++++----- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index c970bfc6db66c..20f77f58edef5 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -629,32 +629,36 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, /* We read security status multiple times during boot: * 1. preboot - a. Check whether the security status bits are valid * b. Check whether fw security is enabled - * c. Check whether hard reset is done by fw - * 2. boot cpu - we get boot cpu security status - * 3. FW application - we get FW application security status + * c. Check whether hard reset is done by preboot + * 2. boot cpu - a. Fetch boot cpu security status + * b. Check whether hard reset is done by boot cpu + * 3. FW application - a. Fetch fw application security status + * b. Check whether hard reset is done by fw app * * Preboot: * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN) */ if (security_status & CPU_BOOT_DEV_STS0_ENABLED) { - hdev->asic_prop.fw_security_status_valid = 1; + prop->fw_security_status_valid = 1; - if (!(security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)) + if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN) + prop->fw_security_disabled = false; + else prop->fw_security_disabled = true; if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) - hdev->asic_prop.hard_reset_done_by_fw = true; + prop->hard_reset_done_by_fw = true; } else { - hdev->asic_prop.fw_security_status_valid = 0; + prop->fw_security_status_valid = 0; prop->fw_security_disabled = true; } - dev_dbg(hdev->dev, "Firmware hard-reset is %s\n", - hdev->asic_prop.hard_reset_done_by_fw ? "enabled" : "disabled"); + dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n", + prop->hard_reset_done_by_fw ? "enabled" : "disabled"); dev_info(hdev->dev, "firmware-level security is %s\n", - prop->fw_security_disabled ? "disabled" : "enabled"); + prop->fw_security_disabled ? "disabled" : "enabled"); return 0; } @@ -664,6 +668,7 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 cpu_security_boot_status_reg, u32 boot_err0_reg, bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout) { + struct asic_fixed_properties *prop = &hdev->asic_prop; u32 status; int rc; @@ -732,11 +737,22 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, /* Read U-Boot version now in case we will later fail */ hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT); + /* Clear reset status since we need to read it again from boot CPU */ + prop->hard_reset_done_by_fw = false; + /* Read boot_cpu security bits */ - if (hdev->asic_prop.fw_security_status_valid) - hdev->asic_prop.fw_boot_cpu_security_map = + if (prop->fw_security_status_valid) { + prop->fw_boot_cpu_security_map = RREG32(cpu_security_boot_status_reg); + if (prop->fw_boot_cpu_security_map & + CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) + prop->hard_reset_done_by_fw = true; + } + + dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n", + prop->hard_reset_done_by_fw ? "enabled" : "disabled"); + if (rc) { detect_cpu_boot_status(hdev, status); rc = -EIO; @@ -805,11 +821,22 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, goto out; } + /* Clear reset status since we need to read again from app */ + prop->hard_reset_done_by_fw = false; + /* Read FW application security bits */ - if (hdev->asic_prop.fw_security_status_valid) - hdev->asic_prop.fw_app_security_map = + if (prop->fw_security_status_valid) { + prop->fw_app_security_map = RREG32(cpu_security_boot_status_reg); + if (prop->fw_app_security_map & + CPU_BOOT_DEV_STS0_FW_HARD_RST_EN) + prop->hard_reset_done_by_fw = true; + } + + dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n", + prop->hard_reset_done_by_fw ? "enabled" : "disabled"); + dev_info(hdev->dev, "Successfully loaded firmware to device\n"); out: -- GitLab From 13d0ee10b55ecec01fd3c91e086e4f3ba75a7911 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 6 Dec 2020 23:48:45 +0200 Subject: [PATCH 0069/2012] habanalabs/gaudi: enhance reset message Print the initiator who performs the hard-reset for easier debugging. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index f316b898e8e05..b0528883a9959 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -3936,11 +3936,15 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); - } - dev_info(hdev->dev, - "Issued HARD reset command, going to wait %dms\n", - reset_timeout_ms); + dev_info(hdev->dev, + "Issued HARD reset command, going to wait %dms\n", + reset_timeout_ms); + } else { + dev_info(hdev->dev, + "Firmware performs HARD reset, going to wait %dms\n", + reset_timeout_ms); + } /* * After hard reset, we can't poll the BTM_FSM register because the PSOC -- GitLab From 90ffe170a390d5a620f8fe66758514e369e85d24 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 8 Dec 2020 16:52:48 +0200 Subject: [PATCH 0070/2012] habanalabs: update comment in hl_boot_if.h Hard-reset flag is updated in many stages of the boot sequence of the firmware. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/include/common/hl_boot_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index 7cb5f2d3e5654..b637dfd69f6e0 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -145,7 +145,7 @@ * implemented. This means that FW will * perform hard reset procedure on * receiving the halt-machine event. - * Initialized in: linux + * Initialized in: preboot, u-boot, linux * * CPU_BOOT_DEV_STS0_PLL_INFO_EN FW retrieval of PLL info is enabled. * Initialized in: linux -- GitLab From 377182a3cc5ae6cc17fb04d06864c975f9f71c18 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 9 Dec 2020 19:50:46 +0200 Subject: [PATCH 0071/2012] habanalabs: adjust pci controller init to new firmware When the firmware security is enabled, the pcie_aux_dbi_reg_addr register in the PCI controller is blocked. Therefore, ignore the result of writing to this register and assume it worked. Also remove the prints on errors in the internal ELBI write function. If the security is enabled, the firmware is responsible for setting this register correctly so we won't have any problem. If the security is disabled, the write will work (unless something is totally broken at the PCI level and then the whole sequence will fail). In addition, remove a write to register pcie_aux_dbi_reg_addr+4, which was never actually needed. Moreover, PCIE_DBI registers are blocked to access from host when firmware security is enabled. Use a different register to flush the writes. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/pci.c | 28 +++++++++++-------- drivers/misc/habanalabs/gaudi/gaudi.c | 4 +-- .../misc/habanalabs/gaudi/gaudi_coresight.c | 3 +- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci.c index 923b2606e29fe..b4725e6101f6c 100644 --- a/drivers/misc/habanalabs/common/pci.c +++ b/drivers/misc/habanalabs/common/pci.c @@ -130,10 +130,8 @@ static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data) if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE) return 0; - if (val & PCI_CONFIG_ELBI_STS_ERR) { - dev_err(hdev->dev, "Error writing to ELBI\n"); + if (val & PCI_CONFIG_ELBI_STS_ERR) return -EIO; - } if (!(val & PCI_CONFIG_ELBI_STS_MASK)) { dev_err(hdev->dev, "ELBI write didn't finish in time\n"); @@ -160,8 +158,12 @@ int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data) dbi_offset = addr & 0xFFF; - rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000); - rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset, + /* Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail + * in case the firmware security is enabled + */ + hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000); + + rc = hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset, data); if (rc) @@ -244,9 +246,11 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val); - /* Return the DBI window to the default location */ - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); + /* Return the DBI window to the default location + * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail + * in case the firmware security is enabled + */ + hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); if (rc) dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n", @@ -294,9 +298,11 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, /* Enable */ rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000); - /* Return the DBI window to the default location */ - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); - rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0); + /* Return the DBI window to the default location + * Ignore result of writing to pcie_aux_dbi_reg_addr as it could fail + * in case the firmware security is enabled + */ + hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0); return rc; } diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index b0528883a9959..88d0e4356d594 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -3761,7 +3761,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) static void gaudi_pre_hw_init(struct hl_device *hdev) { /* Perform read from the device to make sure device is up */ - RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + RREG32(mmHW_STATE); if (hdev->asic_prop.fw_security_disabled) { /* Set the access through PCI bars (Linux driver only) as @@ -3847,7 +3847,7 @@ static int gaudi_hw_init(struct hl_device *hdev) } /* Perform read from the device to flush all configuration */ - RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + RREG32(mmHW_STATE); return 0; diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c index 2e3612e1ee28d..88a09d42e111c 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c @@ -9,6 +9,7 @@ #include "../include/gaudi/gaudi_coresight.h" #include "../include/gaudi/asic_reg/gaudi_regs.h" #include "../include/gaudi/gaudi_masks.h" +#include "../include/gaudi/gaudi_reg_map.h" #include #define SPMU_SECTION_SIZE MME0_ACC_SPMU_MAX_OFFSET @@ -874,7 +875,7 @@ int gaudi_debug_coresight(struct hl_device *hdev, void *data) } /* Perform read from the device to flush all configuration */ - RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + RREG32(mmHW_STATE); return rc; } -- GitLab From 98e8781f008372057bd5cb059ca6b507371e473d Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 9 Dec 2020 23:07:58 +0200 Subject: [PATCH 0072/2012] habanalabs/gaudi: retry loading TPC f/w on -EINTR If loading the firmware file for the TPC f/w was interrupted, try to do it again, up to 5 times. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 88d0e4356d594..8c09e4466af8c 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -838,11 +838,17 @@ static int gaudi_init_tpc_mem(struct hl_device *hdev) size_t fw_size; void *cpu_addr; dma_addr_t dma_handle; - int rc; + int rc, count = 5; +again: rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); + if (rc == -EINTR && count-- > 0) { + msleep(50); + goto again; + } + if (rc) { - dev_err(hdev->dev, "Firmware file %s is not found!\n", + dev_err(hdev->dev, "Failed to load firmware file %s\n", GAUDI_TPC_FW_FILE); goto out; } -- GitLab From a3fd28306329e8e82efab973aafe81e9001dcf6f Mon Sep 17 00:00:00 2001 From: Alon Mizrahi Date: Tue, 8 Dec 2020 16:14:01 +0200 Subject: [PATCH 0073/2012] habanalabs: add validation cs counter, fix misplaced counters Up until now validation errors were counted in the parsing field of the cs_counters struct, so we added a new counter and increased it when needed. In addition, there were some locations where only one of the counters was updated (ctx or aggregate) so add the second one to be updated as well. Signed-off-by: Alon Mizrahi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../habanalabs/common/command_submission.c | 75 ++++++++++++++----- drivers/misc/habanalabs/common/habanalabs.h | 2 + .../misc/habanalabs/common/habanalabs_ioctl.c | 5 ++ include/uapi/misc/habanalabs.h | 4 + 4 files changed, 68 insertions(+), 18 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 92c1c516b65f3..b2b3d2b0f808a 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -472,8 +472,11 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cntr = &hdev->aggregated_cs_counters; cs = kzalloc(sizeof(*cs), GFP_ATOMIC); - if (!cs) + if (!cs) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); return -ENOMEM; + } cs->ctx = ctx; cs->submitted = false; @@ -486,6 +489,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC); if (!cs_cmpl) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); rc = -ENOMEM; goto free_cs; } @@ -513,6 +518,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); if (!cs->jobs_in_queue_cnt) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&cntr->out_of_mem_drop_cnt); rc = -ENOMEM; goto free_fence; } @@ -764,11 +771,14 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) static int hl_cs_copy_chunk_array(struct hl_device *hdev, struct hl_cs_chunk **cs_chunk_array, - void __user *chunks, u32 num_chunks) + void __user *chunks, u32 num_chunks, + struct hl_ctx *ctx) { u32 size_to_copy; if (num_chunks > HL_MAX_JOBS_PER_CS) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); dev_err(hdev->dev, "Number of chunks can NOT be larger than %d\n", HL_MAX_JOBS_PER_CS); @@ -777,11 +787,16 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev, *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array), GFP_ATOMIC); - if (!*cs_chunk_array) + if (!*cs_chunk_array) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); return -ENOMEM; + } size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); kfree(*cs_chunk_array); return -EFAULT; @@ -797,6 +812,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, struct hl_device *hdev = hpriv->hdev; struct hl_cs_chunk *cs_chunk_array; struct hl_cs_counters_atomic *cntr; + struct hl_ctx *ctx = hpriv->ctx; struct hl_cs_job *job; struct hl_cs *cs; struct hl_cb *cb; @@ -805,7 +821,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, cntr = &hdev->aggregated_cs_counters; *cs_seq = ULLONG_MAX; - rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks); + rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks, + hpriv->ctx); if (rc) goto out; @@ -832,8 +849,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, rc = validate_queue_index(hdev, chunk, &queue_type, &is_kernel_allocated_cb); if (rc) { - atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt); - atomic64_inc(&cntr->parsing_drop_cnt); + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); goto free_cs_object; } @@ -841,8 +858,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); if (!cb) { atomic64_inc( - &hpriv->ctx->cs_counters.parsing_drop_cnt); - atomic64_inc(&cntr->parsing_drop_cnt); + &ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); rc = -EINVAL; goto free_cs_object; } @@ -856,8 +873,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, job = hl_cs_allocate_job(hdev, queue_type, is_kernel_allocated_cb); if (!job) { - atomic64_inc( - &hpriv->ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); atomic64_inc(&cntr->out_of_mem_drop_cnt); dev_err(hdev->dev, "Failed to allocate a new job\n"); rc = -ENOMEM; @@ -891,7 +907,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, rc = cs_parser(hpriv, job); if (rc) { - atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt); + atomic64_inc(&ctx->cs_counters.parsing_drop_cnt); atomic64_inc(&cntr->parsing_drop_cnt); dev_err(hdev->dev, "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", @@ -901,8 +917,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, } if (int_queues_only) { - atomic64_inc(&hpriv->ctx->cs_counters.parsing_drop_cnt); - atomic64_inc(&cntr->parsing_drop_cnt); + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "Reject CS %d.%llu because only internal queues jobs are present\n", cs->ctx->asid, cs->sequence); @@ -1042,7 +1058,7 @@ out: } static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, - struct hl_cs_chunk *chunk, u64 *signal_seq) + struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx) { u64 *signal_seq_arr = NULL; u32 size_to_copy, signal_seq_arr_len; @@ -1052,6 +1068,8 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, /* currently only one signal seq is supported */ if (signal_seq_arr_len != 1) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); dev_err(hdev->dev, "Wait for signal CS supports only one signal CS seq\n"); return -EINVAL; @@ -1060,13 +1078,18 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, signal_seq_arr = kmalloc_array(signal_seq_arr_len, sizeof(*signal_seq_arr), GFP_ATOMIC); - if (!signal_seq_arr) + if (!signal_seq_arr) { + atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); return -ENOMEM; + } size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr); if (copy_from_user(signal_seq_arr, u64_to_user_ptr(chunk->signal_seq_arr), size_to_copy)) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); dev_err(hdev->dev, "Failed to copy signal seq array from user\n"); rc = -EFAULT; @@ -1153,6 +1176,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, struct hl_device *hdev = hpriv->hdev; struct hl_cs_compl *sig_waitcs_cmpl; u32 q_idx, collective_engine_id = 0; + struct hl_cs_counters_atomic *cntr; struct hl_fence *sig_fence = NULL; struct hl_ctx *ctx = hpriv->ctx; enum hl_queue_type q_type; @@ -1160,9 +1184,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, u64 signal_seq; int rc; + cntr = &hdev->aggregated_cs_counters; *cs_seq = ULLONG_MAX; - rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks); + rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks, + ctx); if (rc) goto out; @@ -1170,6 +1196,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, chunk = &cs_chunk_array[0]; if (chunk->queue_index >= hdev->asic_prop.max_queues) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "Queue index %d is invalid\n", chunk->queue_index); rc = -EINVAL; @@ -1181,6 +1209,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, q_type = hw_queue_prop->type; if (!hw_queue_prop->supports_sync_stream) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "Queue index %d does not support sync stream operations\n", q_idx); @@ -1190,6 +1220,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, if (cs_type == CS_TYPE_COLLECTIVE_WAIT) { if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx); rc = -EINVAL; @@ -1200,12 +1232,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, } if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) { - rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq); + rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx); if (rc) goto free_cs_chunk_array; sig_fence = hl_ctx_get_fence(ctx, signal_seq); if (IS_ERR(sig_fence)) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "Failed to get signal CS with seq 0x%llx\n", signal_seq); @@ -1223,6 +1257,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, container_of(sig_fence, struct hl_cs_compl, base_fence); if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); dev_err(hdev->dev, "CS seq 0x%llx is not of a signal CS\n", signal_seq); @@ -1270,8 +1306,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, else if (cs_type == CS_TYPE_COLLECTIVE_WAIT) rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx, cs, q_idx, collective_engine_id); - else + else { + atomic64_inc(&ctx->cs_counters.validation_drop_cnt); + atomic64_inc(&cntr->validation_drop_cnt); rc = -EINVAL; + } if (rc) goto free_cs_object; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 70b778a0d60e2..e0d7f5fbaa5c3 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1000,6 +1000,7 @@ struct hl_va_range { * @queue_full_drop_cnt: dropped due to queue full * @device_in_reset_drop_cnt: dropped due to device in reset * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight + * @validation_drop_cnt: dropped due to error in validation */ struct hl_cs_counters_atomic { atomic64_t out_of_mem_drop_cnt; @@ -1007,6 +1008,7 @@ struct hl_cs_counters_atomic { atomic64_t queue_full_drop_cnt; atomic64_t device_in_reset_drop_cnt; atomic64_t max_cs_in_flight_drop_cnt; + atomic64_t validation_drop_cnt; }; /** diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index a0c0d20f6f8ff..12efbd9d2e3a0 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -335,6 +335,8 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) atomic64_read(&cntr->device_in_reset_drop_cnt); cs_counters.total_max_cs_in_flight_drop_cnt = atomic64_read(&cntr->max_cs_in_flight_drop_cnt); + cs_counters.total_validation_drop_cnt = + atomic64_read(&cntr->validation_drop_cnt); if (hpriv->ctx) { cs_counters.ctx_out_of_mem_drop_cnt = @@ -352,6 +354,9 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) cs_counters.ctx_max_cs_in_flight_drop_cnt = atomic64_read( &hpriv->ctx->cs_counters.max_cs_in_flight_drop_cnt); + cs_counters.ctx_validation_drop_cnt = + atomic64_read( + &hpriv->ctx->cs_counters.validation_drop_cnt); } return copy_to_user(out, &cs_counters, diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index dc8bcec195ccf..dba3827c43ca4 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -426,6 +426,8 @@ struct hl_info_sync_manager { * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight + * @total_validation_drop_cnt: total dropped due to validation error + * @ctx_validation_drop_cnt: context dropped due to validation error */ struct hl_info_cs_counters { __u64 total_out_of_mem_drop_cnt; @@ -438,6 +440,8 @@ struct hl_info_cs_counters { __u64 ctx_device_in_reset_drop_cnt; __u64 total_max_cs_in_flight_drop_cnt; __u64 ctx_max_cs_in_flight_drop_cnt; + __u64 total_validation_drop_cnt; + __u64 ctx_validation_drop_cnt; }; enum gaudi_dcores { -- GitLab From fcaebc7354188b0d708c79df4390fbabd4d9799d Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 14 Dec 2020 12:52:06 +0200 Subject: [PATCH 0074/2012] habanalabs: register to pci shutdown callback We need to make sure our device is idle when rebooting a virtual machine. This is done in the driver level. The firmware will later handle FLR but we want to be extra safe and stop the devices until the FLR is handled. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 6bbb6bca68600..032d114f01ea5 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -544,6 +544,7 @@ static struct pci_driver hl_pci_driver = { .id_table = ids, .probe = hl_pci_probe, .remove = hl_pci_remove, + .shutdown = hl_pci_remove, .driver.pm = &hl_pm_ops, .err_handler = &hl_pci_err_handler, }; -- GitLab From 097c62b6f0ec2bdadf86afbe80df03856338724d Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 22 Dec 2020 15:21:07 +0200 Subject: [PATCH 0075/2012] habanalabs: fix order of status check When the device is in reset or needs to be reset, the disabled property is don't-care. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 5871162a84425..0749c92cbcf63 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -17,12 +17,12 @@ enum hl_device_status hl_device_status(struct hl_device *hdev) { enum hl_device_status status; - if (hdev->disabled) - status = HL_DEVICE_STATUS_MALFUNCTION; - else if (atomic_read(&hdev->in_reset)) + if (atomic_read(&hdev->in_reset)) status = HL_DEVICE_STATUS_IN_RESET; else if (hdev->needs_reset) status = HL_DEVICE_STATUS_NEEDS_RESET; + else if (hdev->disabled) + status = HL_DEVICE_STATUS_MALFUNCTION; else status = HL_DEVICE_STATUS_OPERATIONAL; -- GitLab From 2ca408d9c749c32288bc28725f9f12ba30299e8f Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 30 Nov 2020 17:30:59 -0500 Subject: [PATCH 0076/2012] fanotify: Fix sys_fanotify_mark() on native x86-32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 121b32a58a3a ("x86/entry/32: Use IA32-specific wrappers for syscalls taking 64-bit arguments") converted native x86-32 which take 64-bit arguments to use the compat handlers to allow conversion to passing args via pt_regs. sys_fanotify_mark() was however missed, as it has a general compat handler. Add a config option that will use the syscall wrapper that takes the split args for native 32-bit. [ bp: Fix typo in Kconfig help text. ] Fixes: 121b32a58a3a ("x86/entry/32: Use IA32-specific wrappers for syscalls taking 64-bit arguments") Reported-by: Paweł Jasiak Signed-off-by: Brian Gerst Signed-off-by: Borislav Petkov Acked-by: Jan Kara Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20201130223059.101286-1-brgerst@gmail.com --- arch/Kconfig | 6 ++++++ arch/x86/Kconfig | 1 + fs/notify/fanotify/fanotify_user.c | 17 +++++++---------- include/linux/syscalls.h | 24 ++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 78c6f05b10f91..24862d15f3a36 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1105,6 +1105,12 @@ config HAVE_ARCH_PFN_VALID config ARCH_SUPPORTS_DEBUG_PAGEALLOC bool +config ARCH_SPLIT_ARG64 + bool + help + If a 32-bit architecture requires 64-bit arguments to be split into + pairs of 32-bit arguments, select this option. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7b6dd10b162ac..21f851179ff08 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -19,6 +19,7 @@ config X86_32 select KMAP_LOCAL select MODULES_USE_ELF_REL select OLD_SIGACTION + select ARCH_SPLIT_ARG64 config X86_64 def_bool y diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 3e01d8f2ab906..dcab112e1f001 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1285,26 +1285,23 @@ fput_and_out: return ret; } +#ifndef CONFIG_ARCH_SPLIT_ARG64 SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags, __u64, mask, int, dfd, const char __user *, pathname) { return do_fanotify_mark(fanotify_fd, flags, mask, dfd, pathname); } +#endif -#ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE6(fanotify_mark, +#if defined(CONFIG_ARCH_SPLIT_ARG64) || defined(CONFIG_COMPAT) +SYSCALL32_DEFINE6(fanotify_mark, int, fanotify_fd, unsigned int, flags, - __u32, mask0, __u32, mask1, int, dfd, + SC_ARG64(mask), int, dfd, const char __user *, pathname) { - return do_fanotify_mark(fanotify_fd, flags, -#ifdef __BIG_ENDIAN - ((__u64)mask0 << 32) | mask1, -#else - ((__u64)mask1 << 32) | mask0, -#endif - dfd, pathname); + return do_fanotify_mark(fanotify_fd, flags, SC_VAL64(__u64, mask), + dfd, pathname); } #endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f3929aff39cf2..7688bc983de54 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -251,6 +251,30 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* __SYSCALL_DEFINEx */ +/* For split 64-bit arguments on 32-bit architectures */ +#ifdef __LITTLE_ENDIAN +#define SC_ARG64(name) u32, name##_lo, u32, name##_hi +#else +#define SC_ARG64(name) u32, name##_hi, u32, name##_lo +#endif +#define SC_VAL64(type, name) ((type) name##_hi << 32 | name##_lo) + +#ifdef CONFIG_COMPAT +#define SYSCALL32_DEFINE1 COMPAT_SYSCALL_DEFINE1 +#define SYSCALL32_DEFINE2 COMPAT_SYSCALL_DEFINE2 +#define SYSCALL32_DEFINE3 COMPAT_SYSCALL_DEFINE3 +#define SYSCALL32_DEFINE4 COMPAT_SYSCALL_DEFINE4 +#define SYSCALL32_DEFINE5 COMPAT_SYSCALL_DEFINE5 +#define SYSCALL32_DEFINE6 COMPAT_SYSCALL_DEFINE6 +#else +#define SYSCALL32_DEFINE1 SYSCALL_DEFINE1 +#define SYSCALL32_DEFINE2 SYSCALL_DEFINE2 +#define SYSCALL32_DEFINE3 SYSCALL_DEFINE3 +#define SYSCALL32_DEFINE4 SYSCALL_DEFINE4 +#define SYSCALL32_DEFINE5 SYSCALL_DEFINE5 +#define SYSCALL32_DEFINE6 SYSCALL_DEFINE6 +#endif + /* * Called before coming back to user-mode. Returning to user-mode with an * address limit different than USER_DS can allow to overwrite kernel memory. -- GitLab From 512d4a26abdbd11c6ffa03032740e5ab3c62c55b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 Dec 2020 14:03:02 +0200 Subject: [PATCH 0077/2012] interconnect: qcom: fix rpmh link failures When CONFIG_COMPILE_TEST is set, it is possible to build some of the interconnect drivers into the kernel while their dependencies are loadable modules, which is bad: arm-linux-gnueabi-ld: drivers/interconnect/qcom/bcm-voter.o: in function `qcom_icc_bcm_voter_commit': (.text+0x1f8): undefined reference to `rpmh_invalidate' arm-linux-gnueabi-ld: (.text+0x20c): undefined reference to `rpmh_write_batch' arm-linux-gnueabi-ld: (.text+0x2b0): undefined reference to `rpmh_write_batch' arm-linux-gnueabi-ld: (.text+0x2e8): undefined reference to `rpmh_write_batch' arm-linux-gnueabi-ld: drivers/interconnect/qcom/icc-rpmh.o: in function `qcom_icc_bcm_init': (.text+0x2ac): undefined reference to `cmd_db_read_addr' arm-linux-gnueabi-ld: (.text+0x2c8): undefined reference to `cmd_db_read_aux_data' The exact dependencies are a bit complicated, so split them out into a hidden Kconfig symbol that all drivers can in turn depend on to get it right. Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support") Signed-off-by: Arnd Bergmann Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201204165030.3747484-1-arnd@kernel.org Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/Kconfig | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/interconnect/qcom/Kconfig b/drivers/interconnect/qcom/Kconfig index a8f93ba265f81..b3fb5b02bcf1e 100644 --- a/drivers/interconnect/qcom/Kconfig +++ b/drivers/interconnect/qcom/Kconfig @@ -42,13 +42,23 @@ config INTERCONNECT_QCOM_QCS404 This is a driver for the Qualcomm Network-on-Chip on qcs404-based platforms. +config INTERCONNECT_QCOM_RPMH_POSSIBLE + tristate + default INTERCONNECT_QCOM + depends on QCOM_RPMH || (COMPILE_TEST && !QCOM_RPMH) + depends on QCOM_COMMAND_DB || (COMPILE_TEST && !QCOM_COMMAND_DB) + depends on OF || COMPILE_TEST + help + Compile-testing RPMH drivers is possible on other platforms, + but in order to avoid link failures, drivers must not be built-in + when QCOM_RPMH or QCOM_COMMAND_DB are loadable modules + config INTERCONNECT_QCOM_RPMH tristate config INTERCONNECT_QCOM_SC7180 tristate "Qualcomm SC7180 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help @@ -57,8 +67,7 @@ config INTERCONNECT_QCOM_SC7180 config INTERCONNECT_QCOM_SDM845 tristate "Qualcomm SDM845 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help @@ -67,8 +76,7 @@ config INTERCONNECT_QCOM_SDM845 config INTERCONNECT_QCOM_SM8150 tristate "Qualcomm SM8150 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help @@ -77,8 +85,7 @@ config INTERCONNECT_QCOM_SM8150 config INTERCONNECT_QCOM_SM8250 tristate "Qualcomm SM8250 interconnect driver" - depends on INTERCONNECT_QCOM - depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST + depends on INTERCONNECT_QCOM_RPMH_POSSIBLE select INTERCONNECT_QCOM_RPMH select INTERCONNECT_QCOM_BCM_VOTER help -- GitLab From c6174c0e058fc0a54e0b9787c44cb24b0a8d0217 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 28 Dec 2020 14:03:02 +0200 Subject: [PATCH 0078/2012] interconnect: imx: Add a missing of_node_put after of_device_is_available Add an 'of_node_put()' call when a tested device node is not available. Fixes: f0d8048525d7 ("interconnect: Add imx core driver") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201206121304.29381-1-christophe.jaillet@wanadoo.fr Signed-off-by: Georgi Djakov --- drivers/interconnect/imx/imx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/interconnect/imx/imx.c b/drivers/interconnect/imx/imx.c index 41dba7090c2ae..e398ebf1dbbab 100644 --- a/drivers/interconnect/imx/imx.c +++ b/drivers/interconnect/imx/imx.c @@ -99,6 +99,7 @@ static int imx_icc_node_init_qos(struct icc_provider *provider, if (!dn || !of_device_is_available(dn)) { dev_warn(dev, "Missing property %s, skip scaling %s\n", adj->phandle_name, node->name); + of_node_put(dn); return 0; } -- GitLab From 6414b79d02c426b7dd7d942fc19fb38220ea44ec Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 28 Dec 2020 14:03:02 +0200 Subject: [PATCH 0079/2012] interconnect: imx: Remove a useless test 'dn' can't be NULL here, it is tested just the line above. Remove this useless test. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201206121322.29434-1-christophe.jaillet@wanadoo.fr Signed-off-by: Georgi Djakov --- drivers/interconnect/imx/imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/interconnect/imx/imx.c b/drivers/interconnect/imx/imx.c index e398ebf1dbbab..c770951a909c9 100644 --- a/drivers/interconnect/imx/imx.c +++ b/drivers/interconnect/imx/imx.c @@ -96,7 +96,7 @@ static int imx_icc_node_init_qos(struct icc_provider *provider, return -ENODEV; } /* Allow scaling to be disabled on a per-node basis */ - if (!dn || !of_device_is_available(dn)) { + if (!of_device_is_available(dn)) { dev_warn(dev, "Missing property %s, skip scaling %s\n", adj->phandle_name, node->name); of_node_put(dn); -- GitLab From 67288f74d4837b82ef937170da3389b0779c17be Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Mon, 28 Dec 2020 14:03:02 +0200 Subject: [PATCH 0080/2012] interconnect: imx8mq: Use icc_sync_state Add the icc_sync_state callback to notify the framework when consumers are probed and the bandwidth doesn't have to be kept at maximum anymore. Signed-off-by: Martin Kepplinger Suggested-by: Georgi Djakov Fixes: 7d3b0b0d8184 ("interconnect: qcom: Use icc_sync_state") Link: https://lore.kernel.org/r/20201210100906.18205-6-martin.kepplinger@puri.sm Signed-off-by: Georgi Djakov --- drivers/interconnect/imx/imx8mq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/interconnect/imx/imx8mq.c b/drivers/interconnect/imx/imx8mq.c index ba43a15aefec0..d7768d3c6d8aa 100644 --- a/drivers/interconnect/imx/imx8mq.c +++ b/drivers/interconnect/imx/imx8mq.c @@ -7,6 +7,7 @@ #include #include +#include #include #include "imx.h" @@ -94,6 +95,7 @@ static struct platform_driver imx8mq_icc_driver = { .remove = imx8mq_icc_remove, .driver = { .name = "imx8mq-interconnect", + .sync_state = icc_sync_state, }, }; -- GitLab From 12b38ea040b3bb2a30eb9cd488376df5be7ea81f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 13 Dec 2020 16:11:05 +0100 Subject: [PATCH 0081/2012] staging: spmi: hisi-spmi-controller: Fix some error handling paths IN the probe function, if an error occurs after calling 'spmi_controller_alloc()', it must be undone by a corresponding 'spmi_controller_put() call. In the remove function, use 'spmi_controller_put(ctrl)' instead of 'kfree(ctrl)'. While a it fix an error message (s/spmi_add_controller/spmi_controller_add/) Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201213151105.137731-1-christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- .../staging/hikey9xx/hisi-spmi-controller.c | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/staging/hikey9xx/hisi-spmi-controller.c b/drivers/staging/hikey9xx/hisi-spmi-controller.c index 861aedd5de484..0d42bc65f39bc 100644 --- a/drivers/staging/hikey9xx/hisi-spmi-controller.c +++ b/drivers/staging/hikey9xx/hisi-spmi-controller.c @@ -278,21 +278,24 @@ static int spmi_controller_probe(struct platform_device *pdev) iores = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!iores) { dev_err(&pdev->dev, "can not get resource!\n"); - return -EINVAL; + ret = -EINVAL; + goto err_put_controller; } spmi_controller->base = devm_ioremap(&pdev->dev, iores->start, resource_size(iores)); if (!spmi_controller->base) { dev_err(&pdev->dev, "can not remap base addr!\n"); - return -EADDRNOTAVAIL; + ret = -EADDRNOTAVAIL; + goto err_put_controller; } ret = of_property_read_u32(pdev->dev.of_node, "spmi-channel", &spmi_controller->channel); if (ret) { dev_err(&pdev->dev, "can not get channel\n"); - return -ENODEV; + ret = -ENODEV; + goto err_put_controller; } platform_set_drvdata(pdev, spmi_controller); @@ -309,9 +312,15 @@ static int spmi_controller_probe(struct platform_device *pdev) ctrl->write_cmd = spmi_write_cmd; ret = spmi_controller_add(ctrl); - if (ret) - dev_err(&pdev->dev, "spmi_add_controller failed with error %d!\n", ret); + if (ret) { + dev_err(&pdev->dev, "spmi_controller_add failed with error %d!\n", ret); + goto err_put_controller; + } + + return 0; +err_put_controller: + spmi_controller_put(ctrl); return ret; } @@ -320,7 +329,7 @@ static int spmi_del_controller(struct platform_device *pdev) struct spmi_controller *ctrl = platform_get_drvdata(pdev); spmi_controller_remove(ctrl); - kfree(ctrl); + spmi_controller_put(ctrl); return 0; } -- GitLab From cab36da4bf1a35739b091b73714a39a1bbd02b05 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 2 Dec 2020 09:43:49 +0300 Subject: [PATCH 0082/2012] Staging: comedi: Return -EFAULT if copy_to_user() fails Return -EFAULT on error instead of the number of bytes remaining to be copied. Fixes: bac42fb21259 ("comedi: get rid of compat_alloc_user_space() mess in COMEDI_CMD{,TEST} compat") Signed-off-by: Dan Carpenter Cc: stable Link: https://lore.kernel.org/r/X8c3pfwFy2jpy4BP@mwanda Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/comedi_fops.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index d99231c737fbf..80d74cce2a010 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -2987,7 +2987,9 @@ static int put_compat_cmd(struct comedi32_cmd_struct __user *cmd32, v32.chanlist_len = cmd->chanlist_len; v32.data = ptr_to_compat(cmd->data); v32.data_len = cmd->data_len; - return copy_to_user(cmd32, &v32, sizeof(v32)); + if (copy_to_user(cmd32, &v32, sizeof(v32))) + return -EFAULT; + return 0; } /* Handle 32-bit COMEDI_CMD ioctl. */ -- GitLab From d887d6104adeb94d1b926936ea21f07367f0ff9f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 13 Dec 2020 16:35:13 +0100 Subject: [PATCH 0083/2012] staging: mt7621-dma: Fix a resource leak in an error handling path If an error occurs after calling 'mtk_hsdma_init()', it must be undone by a corresponding call to 'mtk_hsdma_uninit()' as already done in the remove function. Fixes: 0853c7a53eb3 ("staging: mt7621-dma: ralink: add rt2880 dma engine") Signed-off-by: Christophe JAILLET Cc: stable Link: https://lore.kernel.org/r/20201213153513.138723-1-christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/staging/mt7621-dma/mtk-hsdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/mt7621-dma/mtk-hsdma.c b/drivers/staging/mt7621-dma/mtk-hsdma.c index d241349214e71..bc4bb43743131 100644 --- a/drivers/staging/mt7621-dma/mtk-hsdma.c +++ b/drivers/staging/mt7621-dma/mtk-hsdma.c @@ -712,7 +712,7 @@ static int mtk_hsdma_probe(struct platform_device *pdev) ret = dma_async_device_register(dd); if (ret) { dev_err(&pdev->dev, "failed to register dma device\n"); - return ret; + goto err_uninit_hsdma; } ret = of_dma_controller_register(pdev->dev.of_node, @@ -728,6 +728,8 @@ static int mtk_hsdma_probe(struct platform_device *pdev) err_unregister: dma_async_device_unregister(dd); +err_uninit_hsdma: + mtk_hsdma_uninit(hsdma); return ret; } -- GitLab From 275565997ade6fc32be9cd49a910ba996bcb4797 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Sun, 27 Dec 2020 17:40:37 +0100 Subject: [PATCH 0084/2012] ASoC: AMD Renoir - add DMI entry for Lenovo ThinkPad E14 Gen 2 The ThinkPad E14 Gen 2 latop does not have the internal digital microphone connected to the AMD's ACP bridge, but it's advertised via BIOS. The internal microphone is connected to the HDA codec. Use DMI to block the microphone PCM device for this platform. Reported-by: Eliot Blennerhassett Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20201227164037.269893-1-perex@perex.cz Signed-off-by: Mark Brown --- sound/soc/amd/renoir/rn-pci-acp3x.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/renoir/rn-pci-acp3x.c b/sound/soc/amd/renoir/rn-pci-acp3x.c index fa169bf09886f..17ec35be73ac4 100644 --- a/sound/soc/amd/renoir/rn-pci-acp3x.c +++ b/sound/soc/amd/renoir/rn-pci-acp3x.c @@ -171,6 +171,13 @@ static const struct dmi_system_id rn_acp_quirk_table[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "LNVNB161216"), } }, + { + /* Lenovo ThinkPad E14 Gen 2 */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "20T6CTO1WW"), + } + }, {} }; -- GitLab From a523e1538fdd5f00ea3289cc0b3c6c1785b89814 Mon Sep 17 00:00:00 2001 From: Ravulapati Vishnu vardhan rao Date: Tue, 22 Dec 2020 17:29:18 +0530 Subject: [PATCH 0085/2012] ASoC: amd: Replacing MSI with Legacy IRQ model When we try to play and capture simultaneously we see that interrupts are genrated but our handler is not being acknowledged, After investigating further more in detail on this issue we found that IRQ delivery via MSI from the ACP IP is unreliable and so sometimes interrupt generated will not be acknowledged so MSI model shouldn't be used and using legacy IRQs will resolve interrupt handling issue. This patch replaces MSI interrupt handling with legacy IRQ model. Issue can be reproduced easily by running below python script: import subprocess import time import threading def do2(): cmd = 'aplay -f dat -D hw:2,1 /dev/zero -d 1' subprocess.call(cmd, stdin=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) print('Play Done') def run(): for i in range(1000): do2() def do(i): cmd = 'arecord -f dat -D hw:2,2 /dev/null -d 1' subprocess.call(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) print(datetime.datetime.now(), i) t = threading.Thread(target=run) t.start() for i in range(1000): do(i) t.join() After applying this patch issue is resolved. Signed-off-by: Ravulapati Vishnu vardhan rao Link: https://lore.kernel.org/r/20201222115929.11222-1-Vishnuvardhanrao.Ravulapati@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/raven/pci-acp3x.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/sound/soc/amd/raven/pci-acp3x.c b/sound/soc/amd/raven/pci-acp3x.c index 8c138e490f0c5..d3536fd6a1240 100644 --- a/sound/soc/amd/raven/pci-acp3x.c +++ b/sound/soc/amd/raven/pci-acp3x.c @@ -140,21 +140,14 @@ static int snd_acp3x_probe(struct pci_dev *pci, goto release_regions; } - /* check for msi interrupt support */ - ret = pci_enable_msi(pci); - if (ret) - /* msi is not enabled */ - irqflags = IRQF_SHARED; - else - /* msi is enabled */ - irqflags = 0; + irqflags = IRQF_SHARED; addr = pci_resource_start(pci, 0); adata->acp3x_base = devm_ioremap(&pci->dev, addr, pci_resource_len(pci, 0)); if (!adata->acp3x_base) { ret = -ENOMEM; - goto disable_msi; + goto release_regions; } pci_set_master(pci); pci_set_drvdata(pci, adata); @@ -162,7 +155,7 @@ static int snd_acp3x_probe(struct pci_dev *pci, adata->pme_en = rv_readl(adata->acp3x_base + mmACP_PME_EN); ret = acp3x_init(adata); if (ret) - goto disable_msi; + goto release_regions; val = rv_readl(adata->acp3x_base + mmACP_I2S_PIN_CONFIG); switch (val) { @@ -251,8 +244,6 @@ unregister_devs: de_init: if (acp3x_deinit(adata->acp3x_base)) dev_err(&pci->dev, "ACP de-init failed\n"); -disable_msi: - pci_disable_msi(pci); release_regions: pci_release_regions(pci); disable_pci: @@ -311,7 +302,6 @@ static void snd_acp3x_remove(struct pci_dev *pci) dev_err(&pci->dev, "ACP de-init failed\n"); pm_runtime_forbid(&pci->dev); pm_runtime_get_noresume(&pci->dev); - pci_disable_msi(pci); pci_release_regions(pci); pci_disable_device(pci); } -- GitLab From 1f092d1c8819679d78a7d9c62a46d4939d217a9d Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Sun, 27 Dec 2020 17:41:09 +0100 Subject: [PATCH 0086/2012] ASoC: AMD Renoir - add DMI entry for Lenovo ThinkPad X395 The ThinkPad X395 latop does not have the internal digital microphone connected to the AMD's ACP bridge, but it's advertised via BIOS. The internal microphone is connected to the HDA codec. Use DMI to block the microphone PCM device for this platform. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1892115 Cc: Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20201227164109.269973-1-perex@perex.cz Signed-off-by: Mark Brown --- sound/soc/amd/renoir/rn-pci-acp3x.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/renoir/rn-pci-acp3x.c b/sound/soc/amd/renoir/rn-pci-acp3x.c index 17ec35be73ac4..deca8c7a0e878 100644 --- a/sound/soc/amd/renoir/rn-pci-acp3x.c +++ b/sound/soc/amd/renoir/rn-pci-acp3x.c @@ -178,6 +178,13 @@ static const struct dmi_system_id rn_acp_quirk_table[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "20T6CTO1WW"), } }, + { + /* Lenovo ThinkPad X395 */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "20NLCTO1WW"), + } + }, {} }; -- GitLab From 0ffc76539e6e8d28114f95ac25c167c37b5191b3 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sun, 27 Dec 2020 13:45:02 +0000 Subject: [PATCH 0087/2012] USB: cdc-acm: blacklist another IR Droid device This device is supported by the IR Toy driver. Reported-by: Georgi Bakalski Signed-off-by: Sean Young Acked-by: Oliver Neukum Cc: stable Link: https://lore.kernel.org/r/20201227134502.4548-2-sean@mess.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index f52f1bc0559f9..781905745812e 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1895,6 +1895,10 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x04d8, 0xfd08), .driver_info = IGNORE_DEVICE, }, + + { USB_DEVICE(0x04d8, 0xf58b), + .driver_info = IGNORE_DEVICE, + }, #endif /*Samsung phone in firmware update mode */ -- GitLab From 421da9413a6a5ec4334cade5092370cf2c8c8add Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 18 Dec 2020 12:57:36 +0200 Subject: [PATCH 0088/2012] MAINTAINERS: Update address for Cadence USB3 driver Updates my email address for Cadence USB3 driver. Signed-off-by: Roger Quadros Link: https://lore.kernel.org/r/20201218105736.17667-1-rogerq@ti.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9f..afe3a5c66bc90 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3883,7 +3883,7 @@ F: drivers/mtd/nand/raw/cadence-nand-controller.c CADENCE USB3 DRD IP DRIVER M: Peter Chen M: Pawel Laszczak -M: Roger Quadros +R: Roger Quadros R: Aswath Govindraju L: linux-usb@vger.kernel.org S: Maintained -- GitLab From 88ebce92806e5dff3549e1a8cacb53978104d3b4 Mon Sep 17 00:00:00 2001 From: Aswath Govindraju Date: Tue, 15 Dec 2020 09:55:49 +0530 Subject: [PATCH 0089/2012] dt-bindings: usb: Add new compatible string for AM64 SoC Add compatible string in j721e-usb binding file as the same USB subsystem is present in AM64. Reviewed-by: Rob Herring Signed-off-by: Aswath Govindraju Link: https://lore.kernel.org/r/20201215042549.7956-1-a-govindraju@ti.com Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml index 388245b91a55b..148b3fb4ceaf8 100644 --- a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml +++ b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml @@ -11,8 +11,12 @@ maintainers: properties: compatible: - items: + oneOf: - const: ti,j721e-usb + - const: ti,am64-usb + - items: + - const: ti,j721e-usb + - const: ti,am64-usb reg: description: module registers -- GitLab From a390bef7db1f192cc5b588dbcf8ed113406ec130 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 10 Dec 2020 18:04:13 -0300 Subject: [PATCH 0090/2012] usb: gadget: fsl_mxc_udc: Remove the driver Since 5.10-rc1 i.MX is a devicetree-only platform, and this driver was only used by the old non-DT i.MX devices. Remove the driver as it has no users left. Signed-off-by: Fabio Estevam Acked-by: Peter Chen Link: https://lore.kernel.org/r/20201210210413.15262-1-festevam@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/Kconfig | 2 +- drivers/usb/gadget/udc/Makefile | 1 - drivers/usb/gadget/udc/fsl_mxc_udc.c | 122 --------------------------- 3 files changed, 1 insertion(+), 124 deletions(-) delete mode 100644 drivers/usb/gadget/udc/fsl_mxc_udc.c diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig index 1a12aab208b46..8c614bb86c665 100644 --- a/drivers/usb/gadget/udc/Kconfig +++ b/drivers/usb/gadget/udc/Kconfig @@ -90,7 +90,7 @@ config USB_BCM63XX_UDC config USB_FSL_USB2 tristate "Freescale Highspeed USB DR Peripheral Controller" - depends on FSL_SOC || ARCH_MXC + depends on FSL_SOC help Some of Freescale PowerPC and i.MX processors have a High Speed Dual-Role(DR) USB controller, which supports device mode. diff --git a/drivers/usb/gadget/udc/Makefile b/drivers/usb/gadget/udc/Makefile index f5a7ce28aecdf..a21f2224e7eb7 100644 --- a/drivers/usb/gadget/udc/Makefile +++ b/drivers/usb/gadget/udc/Makefile @@ -23,7 +23,6 @@ obj-$(CONFIG_USB_ATMEL_USBA) += atmel_usba_udc.o obj-$(CONFIG_USB_BCM63XX_UDC) += bcm63xx_udc.o obj-$(CONFIG_USB_FSL_USB2) += fsl_usb2_udc.o fsl_usb2_udc-y := fsl_udc_core.o -fsl_usb2_udc-$(CONFIG_ARCH_MXC) += fsl_mxc_udc.o obj-$(CONFIG_USB_TEGRA_XUDC) += tegra-xudc.o obj-$(CONFIG_USB_M66592) += m66592-udc.o obj-$(CONFIG_USB_R8A66597) += r8a66597-udc.o diff --git a/drivers/usb/gadget/udc/fsl_mxc_udc.c b/drivers/usb/gadget/udc/fsl_mxc_udc.c deleted file mode 100644 index 5a321992decc8..0000000000000 --- a/drivers/usb/gadget/udc/fsl_mxc_udc.c +++ /dev/null @@ -1,122 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Copyright (C) 2009 - * Guennadi Liakhovetski, DENX Software Engineering, - * - * Description: - * Helper routines for i.MX3x SoCs from Freescale, needed by the fsl_usb2_udc.c - * driver to function correctly on these systems. - */ -#include -#include -#include -#include -#include -#include -#include - -#include "fsl_usb2_udc.h" - -static struct clk *mxc_ahb_clk; -static struct clk *mxc_per_clk; -static struct clk *mxc_ipg_clk; - -/* workaround ENGcm09152 for i.MX35 */ -#define MX35_USBPHYCTRL_OFFSET 0x600 -#define USBPHYCTRL_OTGBASE_OFFSET 0x8 -#define USBPHYCTRL_EVDO (1 << 23) - -int fsl_udc_clk_init(struct platform_device *pdev) -{ - struct fsl_usb2_platform_data *pdata; - unsigned long freq; - int ret; - - pdata = dev_get_platdata(&pdev->dev); - - mxc_ipg_clk = devm_clk_get(&pdev->dev, "ipg"); - if (IS_ERR(mxc_ipg_clk)) { - dev_err(&pdev->dev, "clk_get(\"ipg\") failed\n"); - return PTR_ERR(mxc_ipg_clk); - } - - mxc_ahb_clk = devm_clk_get(&pdev->dev, "ahb"); - if (IS_ERR(mxc_ahb_clk)) { - dev_err(&pdev->dev, "clk_get(\"ahb\") failed\n"); - return PTR_ERR(mxc_ahb_clk); - } - - mxc_per_clk = devm_clk_get(&pdev->dev, "per"); - if (IS_ERR(mxc_per_clk)) { - dev_err(&pdev->dev, "clk_get(\"per\") failed\n"); - return PTR_ERR(mxc_per_clk); - } - - clk_prepare_enable(mxc_ipg_clk); - clk_prepare_enable(mxc_ahb_clk); - clk_prepare_enable(mxc_per_clk); - - /* make sure USB_CLK is running at 60 MHz +/- 1000 Hz */ - if (!strcmp(pdev->id_entry->name, "imx-udc-mx27")) { - freq = clk_get_rate(mxc_per_clk); - if (pdata->phy_mode != FSL_USB2_PHY_ULPI && - (freq < 59999000 || freq > 60001000)) { - dev_err(&pdev->dev, "USB_CLK=%lu, should be 60MHz\n", freq); - ret = -EINVAL; - goto eclkrate; - } - } - - return 0; - -eclkrate: - clk_disable_unprepare(mxc_ipg_clk); - clk_disable_unprepare(mxc_ahb_clk); - clk_disable_unprepare(mxc_per_clk); - mxc_per_clk = NULL; - return ret; -} - -int fsl_udc_clk_finalize(struct platform_device *pdev) -{ - struct fsl_usb2_platform_data *pdata = dev_get_platdata(&pdev->dev); - int ret = 0; - - /* workaround ENGcm09152 for i.MX35 */ - if (pdata->workaround & FLS_USB2_WORKAROUND_ENGCM09152) { - unsigned int v; - struct resource *res = platform_get_resource - (pdev, IORESOURCE_MEM, 0); - void __iomem *phy_regs = ioremap(res->start + - MX35_USBPHYCTRL_OFFSET, 512); - if (!phy_regs) { - dev_err(&pdev->dev, "ioremap for phy address fails\n"); - ret = -EINVAL; - goto ioremap_err; - } - - v = readl(phy_regs + USBPHYCTRL_OTGBASE_OFFSET); - writel(v | USBPHYCTRL_EVDO, - phy_regs + USBPHYCTRL_OTGBASE_OFFSET); - - iounmap(phy_regs); - } - - -ioremap_err: - /* ULPI transceivers don't need usbpll */ - if (pdata->phy_mode == FSL_USB2_PHY_ULPI) { - clk_disable_unprepare(mxc_per_clk); - mxc_per_clk = NULL; - } - - return ret; -} - -void fsl_udc_clk_release(void) -{ - if (mxc_per_clk) - clk_disable_unprepare(mxc_per_clk); - clk_disable_unprepare(mxc_ahb_clk); - clk_disable_unprepare(mxc_ipg_clk); -} -- GitLab From 5e5ff0b4b6bcb4d17b7a26ec8bcfc7dd4651684f Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 20 Dec 2020 00:25:53 +0900 Subject: [PATCH 0091/2012] USB: cdc-wdm: Fix use after free in service_outstanding_interrupt(). syzbot is reporting UAF at usb_submit_urb() [1], for service_outstanding_interrupt() is not checking WDM_DISCONNECTING before calling usb_submit_urb(). Close the race by doing same checks wdm_read() does upon retry. Also, while wdm_read() checks WDM_DISCONNECTING with desc->rlock held, service_interrupt_work() does not hold desc->rlock. Thus, it is possible that usb_submit_urb() is called from service_outstanding_interrupt() from service_interrupt_work() after WDM_DISCONNECTING was set and kill_urbs() from wdm_disconnect() completed. Thus, move kill_urbs() in wdm_disconnect() to after cancel_work_sync() (which makes sure that service_interrupt_work() is no longer running) completed. Although it seems to be safe to dereference desc->intf->dev in service_outstanding_interrupt() even if WDM_DISCONNECTING was already set because desc->rlock or cancel_work_sync() prevents wdm_disconnect() from reaching list_del() before service_outstanding_interrupt() completes, let's not emit error message if WDM_DISCONNECTING is set by wdm_disconnect() while usb_submit_urb() is in progress. [1] https://syzkaller.appspot.com/bug?extid=9e04e2df4a32fb661daf Reported-by: syzbot Signed-off-by: Tetsuo Handa Cc: stable Link: https://lore.kernel.org/r/620e2ee0-b9a3-dbda-a25b-a93e0ed03ec5@i-love.sakura.ne.jp Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 02d0cfd23bb29..508b1c3f8b731 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -465,13 +465,23 @@ static int service_outstanding_interrupt(struct wdm_device *desc) if (!desc->resp_count || !--desc->resp_count) goto out; + if (test_bit(WDM_DISCONNECTING, &desc->flags)) { + rv = -ENODEV; + goto out; + } + if (test_bit(WDM_RESETTING, &desc->flags)) { + rv = -EIO; + goto out; + } + set_bit(WDM_RESPONDING, &desc->flags); spin_unlock_irq(&desc->iuspin); rv = usb_submit_urb(desc->response, GFP_KERNEL); spin_lock_irq(&desc->iuspin); if (rv) { - dev_err(&desc->intf->dev, - "usb_submit_urb failed with result %d\n", rv); + if (!test_bit(WDM_DISCONNECTING, &desc->flags)) + dev_err(&desc->intf->dev, + "usb_submit_urb failed with result %d\n", rv); /* make sure the next notification trigger a submit */ clear_bit(WDM_RESPONDING, &desc->flags); @@ -1027,9 +1037,9 @@ static void wdm_disconnect(struct usb_interface *intf) wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); - kill_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); + kill_urbs(desc); mutex_unlock(&desc->wlock); mutex_unlock(&desc->rlock); -- GitLab From 0f041b8592daaaea46e91a8ebb3b47e6e0171fd8 Mon Sep 17 00:00:00 2001 From: Madhusudanarao Amara Date: Wed, 16 Dec 2020 19:39:18 +0530 Subject: [PATCH 0092/2012] usb: typec: intel_pmc_mux: Configure HPD first for HPD+IRQ request Warm reboot scenarios some times type C Mux driver gets Mux configuration request as HPD=1,IRQ=1. In that scenario typeC Mux driver need to configure Mux as follows as per IOM requirement: (1). Confgiure Mux HPD = 1, IRQ = 0 (2). Configure Mux with HPD = 1, IRQ = 1 IOM expects TypeC Mux configuration as follows: (1). HPD=1, IRQ=0 (2). HPD=1, IRQ=1 if IOM gets mux config request (2) without configuring (1), it will ignore the request. The impact of this is there is no DP_alt mode display. Fixes: 43d596e32276 ("usb: typec: intel_pmc_mux: Check the port status before connect") Cc: stable@vger.kernel.org Reviewed-by: Heikki Krogerus Signed-off-by: Madhusudanarao Amara Link: https://lore.kernel.org/r/20201216140918.49197-1-madhusudanarao.amara@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/mux/intel_pmc_mux.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c index cf37a59ce1304..46a25b8db72e5 100644 --- a/drivers/usb/typec/mux/intel_pmc_mux.c +++ b/drivers/usb/typec/mux/intel_pmc_mux.c @@ -207,10 +207,21 @@ static int pmc_usb_mux_dp_hpd(struct pmc_usb_port *port, struct typec_displayport_data *dp) { u8 msg[2] = { }; + int ret; msg[0] = PMC_USB_DP_HPD; msg[0] |= port->usb3_port << PMC_USB_MSG_USB3_PORT_SHIFT; + /* Configure HPD first if HPD,IRQ comes together */ + if (!IOM_PORT_HPD_ASSERTED(port->iom_status) && + dp->status & DP_STATUS_IRQ_HPD && + dp->status & DP_STATUS_HPD_STATE) { + msg[1] = PMC_USB_DP_HPD_LVL; + ret = pmc_usb_command(port, msg, sizeof(msg)); + if (ret) + return ret; + } + if (dp->status & DP_STATUS_IRQ_HPD) msg[1] = PMC_USB_DP_HPD_IRQ; -- GitLab From 5d5323a6f3625f101dbfa94ba3ef7706cce38760 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Tue, 15 Dec 2020 20:31:47 +0100 Subject: [PATCH 0093/2012] USB: xhci: fix U1/U2 handling for hardware with XHCI_INTEL_HOST quirk set The commit 0472bf06c6fd ("xhci: Prevent U1/U2 link pm states if exit latency is too long") was constraining the xhci code not to allow U1/U2 sleep states if the latency to wake up from the U-states reached the service interval of an periodic endpoint. This fix was not taking into account that in case the quirk XHCI_INTEL_HOST is set, the wakeup time will be calculated and configured differently. It checks for u1_params.mel/u2_params.mel as a limit. But the code could decide to write another MEL into the hardware. This leads to broken cases where not enough bandwidth is available for other devices: usb 1-2: can't set config #1, error -28 This patch is fixing that case by checking for timeout_ns after the wakeup time was calculated depending on the quirks. Fixes: 0472bf06c6fd ("xhci: Prevent U1/U2 link pm states if exit latency is too long") Signed-off-by: Michael Grzeschik Cc: stable Link: https://lore.kernel.org/r/20201215193147.11738-1-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 91ab81c3fc79a..e86940571b4cf 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4770,19 +4770,19 @@ static u16 xhci_calculate_u1_timeout(struct xhci_hcd *xhci, { unsigned long long timeout_ns; + if (xhci->quirks & XHCI_INTEL_HOST) + timeout_ns = xhci_calculate_intel_u1_timeout(udev, desc); + else + timeout_ns = udev->u1_params.sel; + /* Prevent U1 if service interval is shorter than U1 exit latency */ if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { - if (xhci_service_interval_to_ns(desc) <= udev->u1_params.mel) { + if (xhci_service_interval_to_ns(desc) <= timeout_ns) { dev_dbg(&udev->dev, "Disable U1, ESIT shorter than exit latency\n"); return USB3_LPM_DISABLED; } } - if (xhci->quirks & XHCI_INTEL_HOST) - timeout_ns = xhci_calculate_intel_u1_timeout(udev, desc); - else - timeout_ns = udev->u1_params.sel; - /* The U1 timeout is encoded in 1us intervals. * Don't return a timeout of zero, because that's USB3_LPM_DISABLED. */ @@ -4834,19 +4834,19 @@ static u16 xhci_calculate_u2_timeout(struct xhci_hcd *xhci, { unsigned long long timeout_ns; + if (xhci->quirks & XHCI_INTEL_HOST) + timeout_ns = xhci_calculate_intel_u2_timeout(udev, desc); + else + timeout_ns = udev->u2_params.sel; + /* Prevent U2 if service interval is shorter than U2 exit latency */ if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { - if (xhci_service_interval_to_ns(desc) <= udev->u2_params.mel) { + if (xhci_service_interval_to_ns(desc) <= timeout_ns) { dev_dbg(&udev->dev, "Disable U2, ESIT shorter than exit latency\n"); return USB3_LPM_DISABLED; } } - if (xhci->quirks & XHCI_INTEL_HOST) - timeout_ns = xhci_calculate_intel_u2_timeout(udev, desc); - else - timeout_ns = udev->u2_params.sel; - /* The U2 timeout is encoded in 256us intervals */ timeout_ns = DIV_ROUND_UP_ULL(timeout_ns, 256 * 1000); /* If the necessary timeout value is bigger than what we can set in the -- GitLab From a5ada3dfe6a20f41f91448b9034a1ef8da3dc87d Mon Sep 17 00:00:00 2001 From: Zheng Zengkai Date: Tue, 15 Dec 2020 10:54:59 +0800 Subject: [PATCH 0094/2012] usb: dwc3: meson-g12a: disable clk on error handling path in probe dwc3_meson_g12a_probe() does not invoke clk_bulk_disable_unprepare() on one error handling path. This patch fixes that. Fixes: 347052e3bf1b ("usb: dwc3: meson-g12a: fix USB2 PHY initialization on G12A and A1 SoCs") Reported-by: Hulk Robot Signed-off-by: Zheng Zengkai Cc: stable Reviewed-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20201215025459.91794-1-zhengzengkai@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-meson-g12a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-meson-g12a.c b/drivers/usb/dwc3/dwc3-meson-g12a.c index 417e05381b5d0..bdf1f98dfad8c 100644 --- a/drivers/usb/dwc3/dwc3-meson-g12a.c +++ b/drivers/usb/dwc3/dwc3-meson-g12a.c @@ -754,7 +754,7 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev) ret = priv->drvdata->setup_regmaps(priv, base); if (ret) - return ret; + goto err_disable_clks; if (priv->vbus) { ret = regulator_enable(priv->vbus); -- GitLab From 2cc332e4ee4febcbb685e2962ad323fe4b3b750a Mon Sep 17 00:00:00 2001 From: Zqiang Date: Thu, 10 Dec 2020 10:01:48 +0800 Subject: [PATCH 0095/2012] usb: gadget: function: printer: Fix a memory leak for interface descriptor When printer driver is loaded, the printer_func_bind function is called, in this function, the interface descriptor be allocated memory, if after that, the error occurred, the interface descriptor memory need to be free. Reviewed-by: Peter Chen Cc: Signed-off-by: Zqiang Link: https://lore.kernel.org/r/20201210020148.6691-1-qiang.zhang@windriver.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_printer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c index 64a4112068fc8..2f1eb2e81d306 100644 --- a/drivers/usb/gadget/function/f_printer.c +++ b/drivers/usb/gadget/function/f_printer.c @@ -1162,6 +1162,7 @@ fail_tx_reqs: printer_req_free(dev->in_ep, req); } + usb_free_all_descriptors(f); return ret; } -- GitLab From 5cc35c224a80aa5a5a539510ef049faf0d6ed181 Mon Sep 17 00:00:00 2001 From: Sriharsha Allenki Date: Wed, 2 Dec 2020 18:32:20 +0530 Subject: [PATCH 0096/2012] usb: gadget: Fix spinlock lockup on usb_function_deactivate There is a spinlock lockup as part of composite_disconnect when it tries to acquire cdev->lock as part of usb_gadget_deactivate. This is because the usb_gadget_deactivate is called from usb_function_deactivate with the same spinlock held. This would result in the below call stack and leads to stall. rcu: INFO: rcu_preempt detected stalls on CPUs/tasks: rcu: 3-...0: (1 GPs behind) idle=162/1/0x4000000000000000 softirq=10819/10819 fqs=2356 (detected by 2, t=5252 jiffies, g=20129, q=3770) Task dump for CPU 3: task:uvc-gadget_wlhe state:R running task stack: 0 pid: 674 ppid: 636 flags:0x00000202 Call trace: __switch_to+0xc0/0x170 _raw_spin_lock_irqsave+0x84/0xb0 composite_disconnect+0x28/0x78 configfs_composite_disconnect+0x68/0x70 usb_gadget_disconnect+0x10c/0x128 usb_gadget_deactivate+0xd4/0x108 usb_function_deactivate+0x6c/0x80 uvc_function_disconnect+0x20/0x58 uvc_v4l2_release+0x30/0x88 v4l2_release+0xbc/0xf0 __fput+0x7c/0x230 ____fput+0x14/0x20 task_work_run+0x88/0x140 do_notify_resume+0x240/0x6f0 work_pending+0x8/0x200 Fix this by doing an unlock on cdev->lock before the usb_gadget_deactivate call from usb_function_deactivate. The same lockup can happen in the usb_gadget_activate path. Fix that path as well. Reported-by: Peter Chen Link: https://lore.kernel.org/linux-usb/20201102094936.GA29581@b29397-desktop/ Tested-by: Peter Chen Signed-off-by: Sriharsha Allenki Cc: stable Link: https://lore.kernel.org/r/20201202130220.24926-1-sallenki@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/composite.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index c6d455f2bb928..1a556a628971f 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -392,8 +392,11 @@ int usb_function_deactivate(struct usb_function *function) spin_lock_irqsave(&cdev->lock, flags); - if (cdev->deactivations == 0) + if (cdev->deactivations == 0) { + spin_unlock_irqrestore(&cdev->lock, flags); status = usb_gadget_deactivate(cdev->gadget); + spin_lock_irqsave(&cdev->lock, flags); + } if (status == 0) cdev->deactivations++; @@ -424,8 +427,11 @@ int usb_function_activate(struct usb_function *function) status = -EINVAL; else { cdev->deactivations--; - if (cdev->deactivations == 0) + if (cdev->deactivations == 0) { + spin_unlock_irqrestore(&cdev->lock, flags); status = usb_gadget_activate(cdev->gadget); + spin_lock_irqsave(&cdev->lock, flags); + } } spin_unlock_irqrestore(&cdev->lock, flags); -- GitLab From c91d3a6bcaa031f551ba29a496a8027b31289464 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 17 Nov 2020 17:29:55 +0800 Subject: [PATCH 0097/2012] USB: gadget: legacy: fix return error code in acm_ms_bind() If usb_otg_descriptor_alloc() failed, it need return ENOMEM. Fixes: 578aa8a2b12c ("usb: gadget: acm_ms: allocate and init otg descriptor by otg capabilities") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Cc: stable Link: https://lore.kernel.org/r/20201117092955.4102785-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/acm_ms.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/acm_ms.c b/drivers/usb/gadget/legacy/acm_ms.c index 59be2d8417c9c..e8033e5f0c18e 100644 --- a/drivers/usb/gadget/legacy/acm_ms.c +++ b/drivers/usb/gadget/legacy/acm_ms.c @@ -200,8 +200,10 @@ static int acm_ms_bind(struct usb_composite_dev *cdev) struct usb_descriptor_header *usb_desc; usb_desc = usb_otg_descriptor_alloc(gadget); - if (!usb_desc) + if (!usb_desc) { + status = -ENOMEM; goto fail_string_ids; + } usb_otg_descriptor_init(gadget, usb_desc); otg_desc[0] = usb_desc; otg_desc[1] = NULL; -- GitLab From 0a88fa221ce911c331bf700d2214c5b2f77414d3 Mon Sep 17 00:00:00 2001 From: Manish Narani Date: Tue, 17 Nov 2020 12:43:35 +0530 Subject: [PATCH 0098/2012] usb: gadget: u_ether: Fix MTU size mismatch with RX packet size Fix the MTU size issue with RX packet size as the host sends the packet with extra bytes containing ethernet header. This causes failure when user sets the MTU size to the maximum i.e. 15412. In this case the ethernet packet received will be of length 15412 plus the ethernet header length. This patch fixes the issue where there is a check that RX packet length must not be more than max packet length. Fixes: bba787a860fa ("usb: gadget: ether: Allow jumbo frames") Signed-off-by: Manish Narani Cc: stable Link: https://lore.kernel.org/r/1605597215-122027-1-git-send-email-manish.narani@xilinx.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_ether.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 31ea76adcc0db..c019f2b0c0af3 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -45,9 +45,10 @@ #define UETH__VERSION "29-May-2008" /* Experiments show that both Linux and Windows hosts allow up to 16k - * frame sizes. Set the max size to 15k+52 to prevent allocating 32k + * frame sizes. Set the max MTU size to 15k+52 to prevent allocating 32k * blocks and still have efficient handling. */ -#define GETHER_MAX_ETH_FRAME_LEN 15412 +#define GETHER_MAX_MTU_SIZE 15412 +#define GETHER_MAX_ETH_FRAME_LEN (GETHER_MAX_MTU_SIZE + ETH_HLEN) struct eth_dev { /* lock is held while accessing port_usb @@ -786,7 +787,7 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g, /* MTU range: 14 - 15412 */ net->min_mtu = ETH_HLEN; - net->max_mtu = GETHER_MAX_ETH_FRAME_LEN; + net->max_mtu = GETHER_MAX_MTU_SIZE; dev->gadget = g; SET_NETDEV_DEV(net, &g->dev); @@ -848,7 +849,7 @@ struct net_device *gether_setup_name_default(const char *netname) /* MTU range: 14 - 15412 */ net->min_mtu = ETH_HLEN; - net->max_mtu = GETHER_MAX_ETH_FRAME_LEN; + net->max_mtu = GETHER_MAX_MTU_SIZE; return net; } -- GitLab From 83a43ff80a566de8718dfc6565545a0080ec1fb5 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 17 Nov 2020 09:14:30 +0800 Subject: [PATCH 0099/2012] usb: chipidea: ci_hdrc_imx: add missing put_device() call in usbmisc_get_init_data() if of_find_device_by_node() succeed, usbmisc_get_init_data() doesn't have a corresponding put_device(). Thus add put_device() to fix the exception handling for this function implementation. Fixes: ef12da914ed6 ("usb: chipidea: imx: properly check for usbmisc") Signed-off-by: Yu Kuai Cc: stable Link: https://lore.kernel.org/r/20201117011430.642589-1-yukuai3@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_imx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index 9e12152ea46bc..8b7bc10b6e8b4 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -139,9 +139,13 @@ static struct imx_usbmisc_data *usbmisc_get_init_data(struct device *dev) misc_pdev = of_find_device_by_node(args.np); of_node_put(args.np); - if (!misc_pdev || !platform_get_drvdata(misc_pdev)) + if (!misc_pdev) return ERR_PTR(-EPROBE_DEFER); + if (!platform_get_drvdata(misc_pdev)) { + put_device(&misc_pdev->dev); + return ERR_PTR(-EPROBE_DEFER); + } data->dev = &misc_pdev->dev; /* -- GitLab From 372c93131998c0622304bed118322d2a04489e63 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Dec 2020 11:30:53 +0100 Subject: [PATCH 0100/2012] USB: yurex: fix control-URB timeout handling Make sure to always cancel the control URB in write() so that it can be reused after a timeout or spurious CMD_ACK. Currently any further write requests after a timeout would fail after triggering a WARN() in usb_submit_urb() when attempting to submit the already active URB. Reported-by: syzbot+e87ebe0f7913f71f2ea5@syzkaller.appspotmail.com Fixes: 6bc235a2e24a ("USB: add driver for Meywa-Denki & Kayac YUREX") Cc: stable # 2.6.37 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/yurex.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 73ebfa6e9715e..c640f98d20c54 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -496,6 +496,9 @@ static ssize_t yurex_write(struct file *file, const char __user *user_buffer, timeout = schedule_timeout(YUREX_WRITE_TIMEOUT); finish_wait(&dev->waitq, &wait); + /* make sure URB is idle after timeout or (spurious) CMD_ACK */ + usb_kill_urb(dev->cntl_urb); + mutex_unlock(&dev->io_mutex); if (retval < 0) { -- GitLab From ce722da66d3e9384aa2de9d33d584ee154e5e157 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Thu, 10 Dec 2020 11:50:06 +0300 Subject: [PATCH 0101/2012] usb: dwc3: ulpi: Use VStsDone to detect PHY regs access completion In accordance with [1] the DWC_usb3 core sets the GUSB2PHYACCn.VStsDone bit when the PHY vendor control access is done and clears it when the application initiates a new transaction. The doc doesn't say anything about the GUSB2PHYACCn.VStsBsy flag serving for the same purpose. Moreover we've discovered that the VStsBsy flag can be cleared before the VStsDone bit. So using the former as a signal of the PHY control registers completion might be dangerous. Let's have the VStsDone flag utilized instead then. [1] Synopsys DesignWare Cores SuperSpeed USB 3.0 xHCI Host Controller Databook, 2.70a, December 2013, p.388 Fixes: 88bc9d194ff6 ("usb: dwc3: add ULPI interface support") Acked-by: Heikki Krogerus Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20201210085008.13264-2-Sergey.Semin@baikalelectronics.ru Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 1 + drivers/usb/dwc3/ulpi.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 2f95f08ca5119..1b241f937d8f4 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -285,6 +285,7 @@ /* Global USB2 PHY Vendor Control Register */ #define DWC3_GUSB2PHYACC_NEWREGREQ BIT(25) +#define DWC3_GUSB2PHYACC_DONE BIT(24) #define DWC3_GUSB2PHYACC_BUSY BIT(23) #define DWC3_GUSB2PHYACC_WRITE BIT(22) #define DWC3_GUSB2PHYACC_ADDR(n) (n << 16) diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c index aa213c9815f67..3cc4f4970c050 100644 --- a/drivers/usb/dwc3/ulpi.c +++ b/drivers/usb/dwc3/ulpi.c @@ -24,7 +24,7 @@ static int dwc3_ulpi_busyloop(struct dwc3 *dwc) while (count--) { reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0)); - if (!(reg & DWC3_GUSB2PHYACC_BUSY)) + if (reg & DWC3_GUSB2PHYACC_DONE) return 0; cpu_relax(); } -- GitLab From fca3f138105727c3a22edda32d02f91ce1bf11c9 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Thu, 10 Dec 2020 11:50:07 +0300 Subject: [PATCH 0102/2012] usb: dwc3: ulpi: Replace CPU-based busyloop with Protocol-based one Originally the procedure of the ULPI transaction finish detection has been developed as a simple busy-loop with just decrementing counter and no delays. It's wrong since on different systems the loop will take a different time to complete. So if the system bus and CPU are fast enough to overtake the ULPI bus and the companion PHY reaction, then we'll get to take a false timeout error. Fix this by converting the busy-loop procedure to take the standard bus speed, address value and the registers access mode into account for the busy-loop delay calculation. Here is the way the fix works. It's known that the ULPI bus is clocked with 60MHz signal. In accordance with [1] the ULPI bus protocol is created so to spend 5 and 6 clock periods for immediate register write and read operations respectively, and 6 and 7 clock periods - for the extended register writes and reads. Based on that we can easily pre-calculate the time which will be needed for the controller to perform a requested IO operation. Note we'll still preserve the attempts counter in case if the DWC USB3 controller has got some internals delays. [1] UTMI+ Low Pin Interface (ULPI) Specification, Revision 1.1, October 20, 2004, pp. 30 - 36. Fixes: 88bc9d194ff6 ("usb: dwc3: add ULPI interface support") Acked-by: Heikki Krogerus Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20201210085008.13264-3-Sergey.Semin@baikalelectronics.ru Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ulpi.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c index 3cc4f4970c050..54c877f7b51db 100644 --- a/drivers/usb/dwc3/ulpi.c +++ b/drivers/usb/dwc3/ulpi.c @@ -7,6 +7,8 @@ * Author: Heikki Krogerus */ +#include +#include #include #include "core.h" @@ -17,12 +19,22 @@ DWC3_GUSB2PHYACC_ADDR(ULPI_ACCESS_EXTENDED) | \ DWC3_GUSB2PHYACC_EXTEND_ADDR(a) : DWC3_GUSB2PHYACC_ADDR(a)) -static int dwc3_ulpi_busyloop(struct dwc3 *dwc) +#define DWC3_ULPI_BASE_DELAY DIV_ROUND_UP(NSEC_PER_SEC, 60000000L) + +static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read) { + unsigned long ns = 5L * DWC3_ULPI_BASE_DELAY; unsigned int count = 1000; u32 reg; + if (addr >= ULPI_EXT_VENDOR_SPECIFIC) + ns += DWC3_ULPI_BASE_DELAY; + + if (read) + ns += DWC3_ULPI_BASE_DELAY; + while (count--) { + ndelay(ns); reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0)); if (reg & DWC3_GUSB2PHYACC_DONE) return 0; @@ -47,7 +59,7 @@ static int dwc3_ulpi_read(struct device *dev, u8 addr) reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr); dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); - ret = dwc3_ulpi_busyloop(dwc); + ret = dwc3_ulpi_busyloop(dwc, addr, true); if (ret) return ret; @@ -71,7 +83,7 @@ static int dwc3_ulpi_write(struct device *dev, u8 addr, u8 val) reg |= DWC3_GUSB2PHYACC_WRITE | val; dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); - return dwc3_ulpi_busyloop(dwc); + return dwc3_ulpi_busyloop(dwc, addr, false); } static const struct ulpi_ops dwc3_ulpi_ops = { -- GitLab From e5f4ca3fce90a37b23a77bfcc86800d484a80514 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Thu, 10 Dec 2020 11:50:08 +0300 Subject: [PATCH 0103/2012] usb: dwc3: ulpi: Fix USB2.0 HS/FS/LS PHY suspend regression First of all the commit e0082698b689 ("usb: dwc3: ulpi: conditionally resume ULPI PHY") introduced the Suspend USB2.0 HS/FS/LS PHY regression, as by design of the fix any attempt to read/write from/to the PHY control registers will completely disable the PHY suspension, which consequently will increase the USB bus power consumption. Secondly the fix won't work well for the very first attempt of the ULPI PHY control registers IO, because after disabling the USB2.0 PHY suspension functionality it will still take some time for the bus to resume from the sleep state if one has been reached before it. So the very first PHY register read/write operation will take more time than the busy-loop provides and the IO timeout error might be returned anyway. Here we suggest to fix the denoted problems in the following way. First of all let's not disable the Suspend USB2.0 HS/FS/LS PHY functionality so to make the controller and the USB2.0 bus more power efficient. Secondly instead of that we'll extend the PHY IO op wait procedure with 1 - 1.2 ms sleep if the PHY suspension is enabled (1ms should be enough as by LPM specification it is at most how long it takes for the USB2.0 bus to resume from L1 (Sleep) state). Finally in case if the USB2.0 PHY suspension functionality has been disabled on the DWC USB3 controller setup procedure we'll compensate the USB bus resume process latency by extending the busy-loop attempts counter. Fixes: e0082698b689 ("usb: dwc3: ulpi: conditionally resume ULPI PHY") Acked-by: Heikki Krogerus Signed-off-by: Serge Semin Link: https://lore.kernel.org/r/20201210085008.13264-4-Sergey.Semin@baikalelectronics.ru Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/ulpi.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c index 54c877f7b51db..f23f4c9a557e9 100644 --- a/drivers/usb/dwc3/ulpi.c +++ b/drivers/usb/dwc3/ulpi.c @@ -24,7 +24,7 @@ static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read) { unsigned long ns = 5L * DWC3_ULPI_BASE_DELAY; - unsigned int count = 1000; + unsigned int count = 10000; u32 reg; if (addr >= ULPI_EXT_VENDOR_SPECIFIC) @@ -33,6 +33,10 @@ static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read) if (read) ns += DWC3_ULPI_BASE_DELAY; + reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + if (reg & DWC3_GUSB2PHYCFG_SUSPHY) + usleep_range(1000, 1200); + while (count--) { ndelay(ns); reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0)); @@ -50,12 +54,6 @@ static int dwc3_ulpi_read(struct device *dev, u8 addr) u32 reg; int ret; - reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); - if (reg & DWC3_GUSB2PHYCFG_SUSPHY) { - reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); - } - reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr); dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); @@ -73,12 +71,6 @@ static int dwc3_ulpi_write(struct device *dev, u8 addr, u8 val) struct dwc3 *dwc = dev_get_drvdata(dev); u32 reg; - reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); - if (reg & DWC3_GUSB2PHYCFG_SUSPHY) { - reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); - } - reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr); reg |= DWC3_GUSB2PHYACC_WRITE | val; dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg); -- GitLab From 9389044f27081d6ec77730c36d5bf9a1288bcda2 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 21 Dec 2020 18:35:28 +0100 Subject: [PATCH 0104/2012] usb: gadget: f_uac2: reset wMaxPacketSize With commit 913e4a90b6f9 ("usb: gadget: f_uac2: finalize wMaxPacketSize according to bandwidth") wMaxPacketSize is computed dynamically but the value is never reset. Because of this, the actual maximum packet size can only decrease each time the audio gadget is instantiated. Reset the endpoint maximum packet size and mark wMaxPacketSize as dynamic to solve the problem. Fixes: 913e4a90b6f9 ("usb: gadget: f_uac2: finalize wMaxPacketSize according to bandwidth") Signed-off-by: Jerome Brunet Cc: stable Link: https://lore.kernel.org/r/20201221173531.215169-2-jbrunet@baylibre.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_uac2.c | 69 ++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 14 deletions(-) diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 3633df6d7610f..5d960b6603b6f 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -271,7 +271,7 @@ static struct usb_endpoint_descriptor fs_epout_desc = { .bEndpointAddress = USB_DIR_OUT, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1023), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 1, }; @@ -280,7 +280,7 @@ static struct usb_endpoint_descriptor hs_epout_desc = { .bDescriptorType = USB_DT_ENDPOINT, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1024), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 4, }; @@ -348,7 +348,7 @@ static struct usb_endpoint_descriptor fs_epin_desc = { .bEndpointAddress = USB_DIR_IN, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1023), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 1, }; @@ -357,7 +357,7 @@ static struct usb_endpoint_descriptor hs_epin_desc = { .bDescriptorType = USB_DT_ENDPOINT, .bmAttributes = USB_ENDPOINT_XFER_ISOC | USB_ENDPOINT_SYNC_ASYNC, - .wMaxPacketSize = cpu_to_le16(1024), + /* .wMaxPacketSize = DYNAMIC */ .bInterval = 4, }; @@ -444,12 +444,28 @@ struct cntrl_range_lay3 { __le32 dRES; } __packed; -static void set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, +static int set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, struct usb_endpoint_descriptor *ep_desc, - unsigned int factor, bool is_playback) + enum usb_device_speed speed, bool is_playback) { int chmask, srate, ssize; - u16 max_packet_size; + u16 max_size_bw, max_size_ep; + unsigned int factor; + + switch (speed) { + case USB_SPEED_FULL: + max_size_ep = 1023; + factor = 1000; + break; + + case USB_SPEED_HIGH: + max_size_ep = 1024; + factor = 8000; + break; + + default: + return -EINVAL; + } if (is_playback) { chmask = uac2_opts->p_chmask; @@ -461,10 +477,12 @@ static void set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, ssize = uac2_opts->c_ssize; } - max_packet_size = num_channels(chmask) * ssize * + max_size_bw = num_channels(chmask) * ssize * DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1))); - ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_packet_size, - le16_to_cpu(ep_desc->wMaxPacketSize))); + ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_size_bw, + max_size_ep)); + + return 0; } /* Use macro to overcome line length limitation */ @@ -670,10 +688,33 @@ afunc_bind(struct usb_configuration *cfg, struct usb_function *fn) } /* Calculate wMaxPacketSize according to audio bandwidth */ - set_ep_max_packet_size(uac2_opts, &fs_epin_desc, 1000, true); - set_ep_max_packet_size(uac2_opts, &fs_epout_desc, 1000, false); - set_ep_max_packet_size(uac2_opts, &hs_epin_desc, 8000, true); - set_ep_max_packet_size(uac2_opts, &hs_epout_desc, 8000, false); + ret = set_ep_max_packet_size(uac2_opts, &fs_epin_desc, USB_SPEED_FULL, + true); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } + + ret = set_ep_max_packet_size(uac2_opts, &fs_epout_desc, USB_SPEED_FULL, + false); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } + + ret = set_ep_max_packet_size(uac2_opts, &hs_epin_desc, USB_SPEED_HIGH, + true); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } + + ret = set_ep_max_packet_size(uac2_opts, &hs_epout_desc, USB_SPEED_HIGH, + false); + if (ret < 0) { + dev_err(dev, "%s:%d Error!\n", __func__, __LINE__); + return ret; + } if (EPOUT_EN(uac2_opts)) { agdev->out_ep = usb_ep_autoconfig(gadget, &fs_epout_desc); -- GitLab From 54ca955b5a4024e2ce0f206b03adb7109bc4da26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Wed, 23 Dec 2020 20:19:31 +0100 Subject: [PATCH 0105/2012] serial: mvebu-uart: fix tx lost characters at power off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c685af1108d7 ("serial: mvebu-uart: fix tx lost characters") fixed tx lost characters at low baud rates but started causing tx lost characters when kernel is going to power off or reboot. TX_EMP tells us when transmit queue is empty therefore all characters were transmitted. TX_RDY tells us when CPU can send a new character. Therefore we need to use different check prior transmitting new character and different check after all characters were sent. This patch splits polling code into two functions: wait_for_xmitr() which waits for TX_RDY and wait_for_xmite() which waits for TX_EMP. When rebooting A3720 platform without this patch on UART is print only: [ 42.699� And with this patch on UART is full output: [ 39.530216] reboot: Restarting system Fixes: c685af1108d7 ("serial: mvebu-uart: fix tx lost characters") Signed-off-by: Pali Rohár Cc: stable Link: https://lore.kernel.org/r/20201223191931.18343-1-pali@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mvebu-uart.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 118b299122898..e0c00a1b07639 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -648,6 +648,14 @@ static void wait_for_xmitr(struct uart_port *port) (val & STAT_TX_RDY(port)), 1, 10000); } +static void wait_for_xmite(struct uart_port *port) +{ + u32 val; + + readl_poll_timeout_atomic(port->membase + UART_STAT, val, + (val & STAT_TX_EMP), 1, 10000); +} + static void mvebu_uart_console_putchar(struct uart_port *port, int ch) { wait_for_xmitr(port); @@ -675,7 +683,7 @@ static void mvebu_uart_console_write(struct console *co, const char *s, uart_console_write(port, s, count, mvebu_uart_console_putchar); - wait_for_xmitr(port); + wait_for_xmite(port); if (ier) writel(ier, port->membase + UART_CTRL(port)); -- GitLab From 4d4f9c1a17a3480f8fe523673f7232b254d724b7 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 16 Dec 2020 23:39:56 +0000 Subject: [PATCH 0106/2012] MIPS: boot: Fix unaligned access with CONFIG_MIPS_RAW_APPENDED_DTB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The compressed payload is not necesarily 4-byte aligned, at least when compiling with Clang. In that case, the 4-byte value appended to the compressed payload that corresponds to the uncompressed kernel image size must be read using get_unaligned_le32(). This fixes Clang-built kernels not booting on MIPS (tested on a Ingenic JZ4770 board). Fixes: b8f54f2cde78 ("MIPS: ZBOOT: copy appended dtb to the end of the kernel") Cc: # v4.7 Signed-off-by: Paul Cercueil Reviewed-by: Nick Desaulniers Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Bogendoerfer --- arch/mips/boot/compressed/decompress.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index c61c641674e6b..e3946b06e840a 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -13,6 +13,7 @@ #include #include +#include /* * These two variables specify the free mem region @@ -117,7 +118,7 @@ void decompress_kernel(unsigned long boot_heap_start) dtb_size = fdt_totalsize((void *)&__appended_dtb); /* last four bytes is always image size in little endian */ - image_size = le32_to_cpup((void *)&__image_end - 4); + image_size = get_unaligned_le32((void *)&__image_end - 4); /* copy dtb to where the booted kernel will expect it */ memcpy((void *)VMLINUX_LOAD_ADDRESS_ULL + image_size, -- GitLab From 698222457465ce343443be81c5512edda86e5914 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Dec 2020 19:44:38 +0000 Subject: [PATCH 0107/2012] MIPS: Fix malformed NT_FILE and NT_SIGINFO in 32bit coredumps Patches that introduced NT_FILE and NT_SIGINFO notes back in 2012 had taken care of native (fs/binfmt_elf.c) and compat (fs/compat_binfmt_elf.c) coredumps; unfortunately, compat on mips (which does not go through the usual compat_binfmt_elf.c) had not been noticed. As the result, both N32 and O32 coredumps on 64bit mips kernels have those sections malformed enough to confuse the living hell out of all gdb and readelf versions (up to and including the tip of binutils-gdb.git). Longer term solution is to make both O32 and N32 compat use the regular compat_binfmt_elf.c, but that's too much for backports. The minimal solution is to do in arch/mips/kernel/binfmt_elf[on]32.c the same thing those patches have done in fs/compat_binfmt_elf.c Cc: stable@kernel.org # v3.7+ Signed-off-by: Al Viro Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/binfmt_elfn32.c | 7 +++++++ arch/mips/kernel/binfmt_elfo32.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c index 6ee3f7218c675..c4441416e96b6 100644 --- a/arch/mips/kernel/binfmt_elfn32.c +++ b/arch/mips/kernel/binfmt_elfn32.c @@ -103,4 +103,11 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value) #undef ns_to_kernel_old_timeval #define ns_to_kernel_old_timeval ns_to_old_timeval32 +/* + * Some data types as stored in coredump. + */ +#define user_long_t compat_long_t +#define user_siginfo_t compat_siginfo_t +#define copy_siginfo_to_external copy_siginfo_to_external32 + #include "../../../fs/binfmt_elf.c" diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c index 6dd103d3cebba..7b2a23f48c1ac 100644 --- a/arch/mips/kernel/binfmt_elfo32.c +++ b/arch/mips/kernel/binfmt_elfo32.c @@ -106,4 +106,11 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value) #undef ns_to_kernel_old_timeval #define ns_to_kernel_old_timeval ns_to_old_timeval32 +/* + * Some data types as stored in coredump. + */ +#define user_long_t compat_long_t +#define user_siginfo_t compat_siginfo_t +#define copy_siginfo_to_external copy_siginfo_to_external32 + #include "../../../fs/binfmt_elf.c" -- GitLab From 26b614fa441048a9f8e4a814c3b01756816ce7a7 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 16 Dec 2020 17:48:33 +0200 Subject: [PATCH 0108/2012] dmaengine: ti: k3-udma: Fix pktdma rchan TPL level setup Instead of initializing the rchan_tpl the initial commit re-initialized the tchan_tpl. Fixes: d2abc982333c0 ("dmaengine: ti: k3-udma: Initial support for K3 PKTDMA") Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201216154833.20821-1-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index 87157cbae1b8e..298460438bb4d 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -4698,9 +4698,9 @@ static int pktdma_setup_resources(struct udma_dev *ud) ud->tchan_tpl.levels = 1; } - ud->tchan_tpl.levels = ud->tchan_tpl.levels; - ud->tchan_tpl.start_idx[0] = ud->tchan_tpl.start_idx[0]; - ud->tchan_tpl.start_idx[1] = ud->tchan_tpl.start_idx[1]; + ud->rchan_tpl.levels = ud->tchan_tpl.levels; + ud->rchan_tpl.start_idx[0] = ud->tchan_tpl.start_idx[0]; + ud->rchan_tpl.start_idx[1] = ud->tchan_tpl.start_idx[1]; ud->tchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tchan_cnt), sizeof(unsigned long), GFP_KERNEL); -- GitLab From ff58f7dd0c1352a01de3a40327895bd51e03de3a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 Dec 2020 11:29:46 +0300 Subject: [PATCH 0109/2012] dmaengine: idxd: off by one in cleanup code The clean up is off by one so this will start at "i" and it should start with "i - 1" and then it doesn't unregister the zeroeth elements in the array. Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Signed-off-by: Dan Carpenter Acked-by: Dave Jiang Link: https://lore.kernel.org/r/X9nFeojulsNqUSnG@mwanda Signed-off-by: Vinod Koul --- drivers/dma/idxd/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 266423a2cabc7..4dbb03c545e48 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -434,7 +434,7 @@ int idxd_register_driver(void) return 0; drv_fail: - for (; i > 0; i--) + while (--i >= 0) driver_unregister(&idxd_drvs[i]->drv); return rc; } @@ -1840,7 +1840,7 @@ int idxd_register_bus_type(void) return 0; bus_err: - for (; i > 0; i--) + while (--i >= 0) bus_unregister(idxd_bus_types[i]); return rc; } -- GitLab From 8fb28795fb64e1151c0e713686d8b026a5a2aece Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Fri, 18 Dec 2020 18:41:37 +0800 Subject: [PATCH 0110/2012] dmaengine: qcom: gpi: Fixes a format mismatch drivers/dma/qcom/gpi.c:1419:3: warning: format '%lu' expects argument of type 'long unsigned int', but argument 8 has type 'size_t {aka unsigned int}' [-Wformat=] drivers/dma/qcom/gpi.c:1427:31: warning: format '%lu' expects argument of type 'long unsigned int', but argument 3 has type 'size_t {aka unsigned int}' [-Wformat=] drivers/dma/qcom/gpi.c:1447:3: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 4 has type 'dma_addr_t {aka unsigned int}' [-Wformat=] drivers/dma/qcom/gpi.c:1447:3: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 5 has type 'phys_addr_t {aka unsigned int}' [-Wformat=] Signed-off-by: Xiaoming Ni Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201218104137.59200-1-nixiaoming@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index d2334f535de2a..556c070a514ca 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -1416,7 +1416,7 @@ static int gpi_alloc_ring(struct gpi_ring *ring, u32 elements, len = 1 << bit; ring->alloc_size = (len + (len - 1)); dev_dbg(gpii->gpi_dev->dev, - "#el:%u el_size:%u len:%u actual_len:%llu alloc_size:%lu\n", + "#el:%u el_size:%u len:%u actual_len:%llu alloc_size:%zu\n", elements, el_size, (elements * el_size), len, ring->alloc_size); @@ -1424,7 +1424,7 @@ static int gpi_alloc_ring(struct gpi_ring *ring, u32 elements, ring->alloc_size, &ring->dma_handle, GFP_KERNEL); if (!ring->pre_aligned) { - dev_err(gpii->gpi_dev->dev, "could not alloc size:%lu mem for ring\n", + dev_err(gpii->gpi_dev->dev, "could not alloc size:%zu mem for ring\n", ring->alloc_size); return -ENOMEM; } @@ -1444,8 +1444,8 @@ static int gpi_alloc_ring(struct gpi_ring *ring, u32 elements, smp_wmb(); dev_dbg(gpii->gpi_dev->dev, - "phy_pre:0x%0llx phy_alig:0x%0llx len:%u el_size:%u elements:%u\n", - ring->dma_handle, ring->phys_addr, ring->len, + "phy_pre:%pad phy_alig:%pa len:%u el_size:%u elements:%u\n", + &ring->dma_handle, &ring->phys_addr, ring->len, ring->el_size, ring->elements); return 0; -- GitLab From 33cbd54dc515cc04b5a603603414222b4bb1448d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 19 Dec 2020 13:47:18 +0100 Subject: [PATCH 0111/2012] dmaengine: mediatek: mtk-hsdma: Fix a resource leak in the error handling path of the probe function 'mtk_hsdma_hw_deinit()' should be called in the error handling path of the probe function to undo a previous 'mtk_hsdma_hw_init()' call, as already done in the remove function. Fixes: 548c4597e984 ("dmaengine: mediatek: Add MediaTek High-Speed DMA controller for MT7622 and MT7623 SoC") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201219124718.182664-1-christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/mediatek/mtk-hsdma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/mediatek/mtk-hsdma.c b/drivers/dma/mediatek/mtk-hsdma.c index f133ae8dece16..6ad8afbb95f2b 100644 --- a/drivers/dma/mediatek/mtk-hsdma.c +++ b/drivers/dma/mediatek/mtk-hsdma.c @@ -1007,6 +1007,7 @@ static int mtk_hsdma_probe(struct platform_device *pdev) return 0; err_free: + mtk_hsdma_hw_deinit(hsdma); of_dma_controller_free(pdev->dev.of_node); err_unregister: dma_async_device_unregister(dd); -- GitLab From d645148cc82ca7fbacaa601414a552184e9c6dd3 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 19 Dec 2020 14:28:00 +0100 Subject: [PATCH 0112/2012] dmaengine: milbeaut-xdmac: Fix a resource leak in the error handling path of the probe function 'disable_xdmac()' should be called in the error handling path of the probe function to undo a previous 'enable_xdmac()' call, as already done in the remove function. Fixes: a6e9be055d47 ("dmaengine: milbeaut-xdmac: Add XDMAC driver for Milbeaut platforms") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201219132800.183254-1-christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/milbeaut-xdmac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/dma/milbeaut-xdmac.c b/drivers/dma/milbeaut-xdmac.c index 584c931e807af..d29d01e730aa0 100644 --- a/drivers/dma/milbeaut-xdmac.c +++ b/drivers/dma/milbeaut-xdmac.c @@ -350,7 +350,7 @@ static int milbeaut_xdmac_probe(struct platform_device *pdev) ret = dma_async_device_register(ddev); if (ret) - return ret; + goto disable_xdmac; ret = of_dma_controller_register(dev->of_node, of_dma_simple_xlate, mdev); @@ -363,6 +363,8 @@ static int milbeaut_xdmac_probe(struct platform_device *pdev) unregister_dmac: dma_async_device_unregister(ddev); +disable_xdmac: + disable_xdmac(mdev); return ret; } -- GitLab From 595a334148449bd1d27cf5d6fcb3b0d718cb1b9f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 14 Dec 2020 14:56:52 +0300 Subject: [PATCH 0113/2012] dmaengine: dw-edma: Fix use after free in dw_edma_alloc_chunk() If the dw_edma_alloc_burst() function fails then we free "chunk" but it's still on the "desc->chunk->list" list so it will lead to a use after free. Also the "->chunks_alloc" count is incremented when it shouldn't be. In current kernels small allocations are guaranteed to succeed and dw_edma_alloc_burst() can't fail so this will not actually affect runtime. Fixes: e63d79d1ffcd ("dmaengine: Add Synopsys eDMA IP core driver") Signed-off-by: Dan Carpenter Acked-by: Gustavo Pimentel Link: https://lore.kernel.org/r/X9dTBFrUPEvvW7qc@mwanda Signed-off-by: Vinod Koul --- drivers/dma/dw-edma/dw-edma-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c index b971505b87152..08d71dafa0015 100644 --- a/drivers/dma/dw-edma/dw-edma-core.c +++ b/drivers/dma/dw-edma/dw-edma-core.c @@ -86,12 +86,12 @@ static struct dw_edma_chunk *dw_edma_alloc_chunk(struct dw_edma_desc *desc) if (desc->chunk) { /* Create and add new element into the linked list */ - desc->chunks_alloc++; - list_add_tail(&chunk->list, &desc->chunk->list); if (!dw_edma_alloc_burst(chunk)) { kfree(chunk); return NULL; } + desc->chunks_alloc++; + list_add_tail(&chunk->list, &desc->chunk->list); } else { /* List head */ chunk->burst = NULL; -- GitLab From ba42f61b36121730d7f51cc261dfd744ee19f50b Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Wed, 16 Dec 2020 21:06:49 +0800 Subject: [PATCH 0114/2012] qcom: bam_dma: Delete useless kfree code The parameter of kfree function is NULL, so kfree code is useless, delete it. Therefore, goto expression is no longer needed, so simplify it. Signed-off-by: Zheng Yongjun Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20201216130649.13979-1-zhengyongjun3@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/qcom/bam_dma.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index d5773d474d8f5..88579857ca1d6 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -630,7 +630,7 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan, GFP_NOWAIT); if (!async_desc) - goto err_out; + return NULL; if (flags & DMA_PREP_FENCE) async_desc->flags |= DESC_FLAG_NWD; @@ -670,10 +670,6 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan, } return vchan_tx_prep(&bchan->vc, &async_desc->vd, flags); - -err_out: - kfree(async_desc); - return NULL; } /** -- GitLab From 28d8e07fc9478f8f14dd5dd4b2c382982fa12461 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 15 Dec 2020 15:13:47 +0200 Subject: [PATCH 0115/2012] MAINTAINERS: Add entry for Texas Instruments DMA drivers My employment with TI is coming to an end, it is my intention to look after the DMA drivers I have worked with over the years. Signed-off-by: Peter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201215131348.11282-2-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- MAINTAINERS | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9f..217866b668b9b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17553,6 +17553,19 @@ S: Supported F: Documentation/devicetree/bindings/iio/dac/ti,dac7612.txt F: drivers/iio/dac/ti-dac7612.c +TEXAS INSTRUMENTS DMA DRIVERS +M: Peter Ujfalusi +L: dmaengine@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/dma/ti-dma-crossbar.txt +F: Documentation/devicetree/bindings/dma/ti-edma.txt +F: Documentation/devicetree/bindings/dma/ti/ +F: drivers/dma/ti/ +X: drivers/dma/ti/cppi41.c +F: include/linux/dma/k3-udma-glue.h +F: include/linux/dma/ti-cppi5.h +F: include/linux/dma/k3-psil.h + TEXAS INSTRUMENTS' SYSTEM CONTROL INTERFACE (TISCI) PROTOCOL DRIVER M: Nishanth Menon M: Tero Kristo -- GitLab From cc465fa269bc0dc63a1ab7384110e4079fb40421 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 15 Dec 2020 15:13:48 +0200 Subject: [PATCH 0116/2012] dt-bindings: dma: ti: Update maintainer and author information My employment with TI is coming to an end, add the copyright and author comments as they due and change the maintainer mail address. Signed-off-by: Peter Ujfalusi Signed-off-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20201215131348.11282-3-peter.ujfalusi@ti.com Signed-off-by: Vinod Koul --- Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml | 4 +++- Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml | 4 +++- Documentation/devicetree/bindings/dma/ti/k3-udma.yaml | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml index b15f68c499cb2..df29d59d13a8d 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml @@ -1,4 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2020 Texas Instruments Incorporated +# Author: Peter Ujfalusi %YAML 1.2 --- $id: http://devicetree.org/schemas/dma/ti/k3-bcdma.yaml# @@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Texas Instruments K3 DMSS BCDMA Device Tree Bindings maintainers: - - Peter Ujfalusi + - Peter Ujfalusi description: | The Block Copy DMA (BCDMA) is intended to perform similar functions as the TR diff --git a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml index b13ab60cd740f..ea19d12a9337e 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml @@ -1,4 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2020 Texas Instruments Incorporated +# Author: Peter Ujfalusi %YAML 1.2 --- $id: http://devicetree.org/schemas/dma/ti/k3-pktdma.yaml# @@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Texas Instruments K3 DMSS PKTDMA Device Tree Bindings maintainers: - - Peter Ujfalusi + - Peter Ujfalusi description: | The Packet DMA (PKTDMA) is intended to perform similar functions as the packet diff --git a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml index 9a87fd9041eba..6a09bbf83d462 100644 --- a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml +++ b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml @@ -1,4 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2019 Texas Instruments Incorporated +# Author: Peter Ujfalusi %YAML 1.2 --- $id: http://devicetree.org/schemas/dma/ti/k3-udma.yaml# @@ -7,7 +9,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Texas Instruments K3 NAVSS Unified DMA Device Tree Bindings maintainers: - - Peter Ujfalusi + - Peter Ujfalusi description: | The UDMA-P is intended to perform similar (but significantly upgraded) -- GitLab From ef019c5daf032dce0b95ed4d45bfec93c4fbcb9f Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 22 Dec 2020 13:10:46 +0000 Subject: [PATCH 0117/2012] PHY: Ingenic: fix unconditional build of phy-ingenic-usb Currently drivers/phy/ingenic/Makefile adds phy-ingenic-usb to targets not depending on actual Kconfig symbol CONFIG_PHY_INGENIC_USB, so this driver always gets built[-in] on every system. Add missing dependency. Fixes: 31de313dfdcf ("PHY: Ingenic: Add USB PHY driver using generic PHY framework.") Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20201222131021.4751-1-alobakin@pm.me Signed-off-by: Vinod Koul --- drivers/phy/ingenic/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/ingenic/Makefile b/drivers/phy/ingenic/Makefile index 65d5ea00fc9d4..1cb158d7233f4 100644 --- a/drivers/phy/ingenic/Makefile +++ b/drivers/phy/ingenic/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y += phy-ingenic-usb.o +obj-$(CONFIG_PHY_INGENIC_USB) += phy-ingenic-usb.o -- GitLab From 92cbdb923c17544684c2dd3be9f8636617898a44 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Thu, 10 Dec 2020 21:31:36 +0800 Subject: [PATCH 0118/2012] usb: cdns3: imx: fix writing read-only memory issue The memory for struct clk_bulk_data should not be static which will be written during the clk_bulk_get. It fixed below oops when loading cdns3-imx as module. [ 17.272605] Unable to handle kernel write to read-only memory at virtual address ffff8000092a5398 [ 17.299730] Mem abort info: [ 17.313542] unregister ISI channel: mxc_isi.4 [ 17.324076] ESR = 0x9600004f [ 17.344658] EC = 0x25: DABT (current EL), IL = 32 bits [ 17.402055] SET = 0, FnV = 0 [ 17.404321] mxs_phy 5b100000.usbphy: supply phy-3p0 not found, using dummy regulator [ 17.405121] EA = 0, S1PTW = 0 [ 17.405133] Data abort info: [ 17.496231] ISV = 0, ISS = 0x0000004f [ 17.510871] CM = 0, WnR = 1 [ 17.533542] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000081ea5000 [ 17.545709] [ffff8000092a5398] pgd=00000008bffff003, p4d=00000008bffff003, pud=00000008bfffe003, pmd=0000000885041003, pte=006000088513b783 [ 17.573521] Internal error: Oops: 9600004f [#1] PREEMPT SMP [ 17.579113] Modules linked in: usbmisc_imx phy_mxs_usb phy_cadence_salvo cdns3_imx(+) tcpci imx8_media_dev(C) caam error [ 17.590044] CPU: 2 PID: 253 Comm: systemd-udevd Tainted: G C 5.10.0-rc4-04445-g11f3c3a29d0-dirty #19 [ 17.600488] Hardware name: Freescale i.MX8QXP MEK (DT) [ 17.605633] pstate: 20000005 (nzCv daif -PAN -UAO -TCO BTYPE=--) [ 17.611662] pc : __clk_bulk_get+0x48/0x130 [ 17.615786] lr : clk_bulk_get+0x18/0x20 [ 17.619634] sp : ffff80001369b880 [ 17.622953] x29: ffff80001369b880 x28: 0000000000000013 [ 17.628277] x27: 0000000000000100 x26: ffff00080553b100 [ 17.633602] x25: ffff80001229b4d8 x24: 0000000000000000 [ 17.638928] x23: ffff000800665410 x22: 0000000000000005 [ 17.644275] x21: ffff8000092a5390 x20: ffff000800665400 [ 17.649605] x19: ffff000804e6f980 x18: 000000005b110000 [ 17.654946] x17: 0000000000000000 x16: 0000000000000000 [ 17.660274] x15: ffff800011989100 x14: 0000000000000000 [ 17.665599] x13: ffff800013ce1000 x12: ffff800013ca1000 [ 17.670924] x11: 000000005b110000 x10: 0000000000000000 [ 17.676249] x9 : ffff8000106c5a30 x8 : ffff000804e6fa00 [ 17.681575] x7 : 0000000000000000 x6 : 000000000000003f [ 17.686901] x5 : 0000000000000040 x4 : ffff80001369b8b0 [ 17.692228] x3 : ffff8000092a5398 x2 : ffff8000092a5390 [ 17.697574] x1 : ffff8000092a53e8 x0 : 0000000000000004 [ 17.702905] Call trace: [ 17.705366] __clk_bulk_get+0x48/0x130 [ 17.709125] clk_bulk_get+0x18/0x20 [ 17.712620] devm_clk_bulk_get+0x58/0xb8 [ 17.716563] cdns_imx_probe+0x84/0x1f0 [cdns3_imx] [ 17.721363] platform_drv_probe+0x58/0xa8 [ 17.725381] really_probe+0xec/0x4c8 [ 17.728967] driver_probe_device+0xf4/0x160 [ 17.733160] device_driver_attach+0x74/0x80 [ 17.737355] __driver_attach+0xa4/0x170 [ 17.741202] bus_for_each_dev+0x74/0xc8 [ 17.745043] driver_attach+0x28/0x30 [ 17.748620] bus_add_driver+0x144/0x228 [ 17.752462] driver_register+0x68/0x118 [ 17.756308] __platform_driver_register+0x4c/0x58 [ 17.761022] cdns_imx_driver_init+0x24/0x1000 [cdns3_imx] [ 17.766434] do_one_initcall+0x48/0x2c0 [ 17.770280] do_init_module+0x5c/0x220 [ 17.774029] load_module+0x210c/0x2858 [ 17.777784] __do_sys_finit_module+0xb8/0x120 [ 17.782148] __arm64_sys_finit_module+0x24/0x30 [ 17.786691] el0_svc_common.constprop.0+0x70/0x168 [ 17.791497] do_el0_svc+0x28/0x88 [ 17.794822] el0_sync_handler+0x158/0x160 [ 17.798833] el0_sync+0x140/0x180 [ 17.802158] Code: aa0203f5 91002043 8b205021 a90153f3 (f801047f) Cc: Fixes: 1e056efab993 ("usb: cdns3: add NXP imx8qm glue layer") Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdns3-imx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdns3-imx.c b/drivers/usb/cdns3/cdns3-imx.c index 22a56c4dce678..4d3fedc867530 100644 --- a/drivers/usb/cdns3/cdns3-imx.c +++ b/drivers/usb/cdns3/cdns3-imx.c @@ -185,7 +185,11 @@ static int cdns_imx_probe(struct platform_device *pdev) } data->num_clks = ARRAY_SIZE(imx_cdns3_core_clks); - data->clks = (struct clk_bulk_data *)imx_cdns3_core_clks; + data->clks = devm_kmemdup(dev, imx_cdns3_core_clks, + sizeof(imx_cdns3_core_clks), GFP_KERNEL); + if (!data->clks) + return -ENOMEM; + ret = devm_clk_bulk_get(dev, data->num_clks, data->clks); if (ret) return ret; -- GitLab From 2ef02b846ee2526249a562a66d6dcb25fcbca9d8 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Thu, 10 Dec 2020 21:31:37 +0800 Subject: [PATCH 0119/2012] usb: cdns3: imx: fix can't create core device the second time issue The cdns3 core device is populated by calling of_platform_populate, the flag OF_POPULATED is set for core device node, if this flag is not cleared, when calling of_platform_populate the second time after loading parent module again, the OF code will not try to create platform device for core device. To fix it, it uses of_platform_depopulate to depopulate the core device which the parent created, and the flag OF_POPULATED for core device node will be cleared accordingly. Cc: Fixes: 1e056efab993 ("usb: cdns3: add NXP imx8qm glue layer") Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdns3-imx.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/usb/cdns3/cdns3-imx.c b/drivers/usb/cdns3/cdns3-imx.c index 4d3fedc867530..6b358e8be5799 100644 --- a/drivers/usb/cdns3/cdns3-imx.c +++ b/drivers/usb/cdns3/cdns3-imx.c @@ -218,20 +218,11 @@ err: return ret; } -static int cdns_imx_remove_core(struct device *dev, void *data) -{ - struct platform_device *pdev = to_platform_device(dev); - - platform_device_unregister(pdev); - - return 0; -} - static int cdns_imx_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; - device_for_each_child(dev, NULL, cdns_imx_remove_core); + of_platform_depopulate(dev); platform_set_drvdata(pdev, NULL); return 0; -- GitLab From d1357119157c4662d43143885f3691f9a766369a Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Thu, 10 Dec 2020 21:33:21 +0800 Subject: [PATCH 0120/2012] usb: cdns3: imx: improve driver .remove API Keep the runtime active during the remove operation, and disable related clocks. Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdns3-imx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/cdns3/cdns3-imx.c b/drivers/usb/cdns3/cdns3-imx.c index 6b358e8be5799..7990fee03fe4b 100644 --- a/drivers/usb/cdns3/cdns3-imx.c +++ b/drivers/usb/cdns3/cdns3-imx.c @@ -221,8 +221,13 @@ err: static int cdns_imx_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; + struct cdns_imx *data = dev_get_drvdata(dev); + pm_runtime_get_sync(dev); of_platform_depopulate(dev); + clk_bulk_disable_unprepare(data->num_clks, data->clks); + pm_runtime_disable(dev); + pm_runtime_put_noidle(dev); platform_set_drvdata(pdev, NULL); return 0; -- GitLab From 65403ff98ebb86caf498e020d572819bb61860ad Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 18 Dec 2020 12:57:36 +0200 Subject: [PATCH 0121/2012] MAINTAINERS: Update address for Cadence USB3 driver Updates my email address for Cadence USB3 driver. Signed-off-by: Roger Quadros Signed-off-by: Peter Chen --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9f..afe3a5c66bc90 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3883,7 +3883,7 @@ F: drivers/mtd/nand/raw/cadence-nand-controller.c CADENCE USB3 DRD IP DRIVER M: Peter Chen M: Pawel Laszczak -M: Roger Quadros +R: Roger Quadros R: Aswath Govindraju L: linux-usb@vger.kernel.org S: Maintained -- GitLab From a694ffed876575d1df1a47067444047182de4354 Mon Sep 17 00:00:00 2001 From: Iskren Chernev Date: Mon, 28 Dec 2020 23:31:30 +0200 Subject: [PATCH 0122/2012] drm/msm: Fix null dereference in _msm_gem_new The crash was caused by locking an uninitialized lock during init of drm_gem_object. The lock changed in the breaking commit, but the init was not moved accordingly. 8<--- cut here --- Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = (ptrval) [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: msm(+) qcom_spmi_vadc qcom_vadc_common dm_mod usb_f_rndis rmi_i2c rmi_core qnoc_msm8974 icc_smd_rpm pm8941_pwrkey CPU: 2 PID: 1020 Comm: udevd Not tainted 5.10.0-postmarketos-qcom-msm8974 #8 Hardware name: Generic DT based system PC is at ww_mutex_lock+0x20/0xb0 LR is at _msm_gem_new+0x13c/0x298 [msm] pc : [] lr : [] psr: 20000013 sp : c36e7ad0 ip : c3b3d800 fp : 00000000 r10: 00000001 r9 : c3b22800 r8 : 00000000 r7 : c3b23000 r6 : c3b3d600 r5 : c3b3d600 r4 : 00000000 r3 : c34b4780 r2 : c3b3d6f4 r1 : 00000000 r0 : 00000000 Flags: nzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5787d Table: 03ae406a DAC: 00000051 Process udevd (pid: 1020, stack limit = 0x(ptrval)) Stack: (0xc36e7ad0 to 0xc36e8000) [...] [] (ww_mutex_lock) from [] (_msm_gem_new+0x13c/0x298 [msm]) [] (_msm_gem_new [msm]) from [] (_msm_gem_kernel_new+0x20/0x190 [msm]) [] (_msm_gem_kernel_new [msm]) from [] (msm_gem_kernel_new+0x24/0x2c [msm]) [] (msm_gem_kernel_new [msm]) from [] (msm_gpu_init+0x308/0x548 [msm]) [] (msm_gpu_init [msm]) from [] (adreno_gpu_init+0x13c/0x240 [msm]) [] (adreno_gpu_init [msm]) from [] (a3xx_gpu_init+0x78/0x1dc [msm]) [] (a3xx_gpu_init [msm]) from [] (adreno_bind+0x1cc/0x274 [msm]) [] (adreno_bind [msm]) from [] (component_bind_all+0x11c/0x278) [] (component_bind_all) from [] (msm_drm_bind+0x18c/0x5b4 [msm]) [] (msm_drm_bind [msm]) from [] (try_to_bring_up_master+0x200/0x2c8) [] (try_to_bring_up_master) from [] (component_master_add_with_match+0xc8/0xfc) [] (component_master_add_with_match) from [] (msm_pdev_probe+0x288/0x2c4 [msm]) [] (msm_pdev_probe [msm]) from [] (platform_drv_probe+0x48/0x98) [] (platform_drv_probe) from [] (really_probe+0x108/0x528) [] (really_probe) from [] (driver_probe_device+0x78/0x1d4) [] (driver_probe_device) from [] (device_driver_attach+0xa8/0xb0) [] (device_driver_attach) from [] (__driver_attach+0xb4/0x154) [] (__driver_attach) from [] (bus_for_each_dev+0x78/0xb8) [] (bus_for_each_dev) from [] (bus_add_driver+0x10c/0x208) [] (bus_add_driver) from [] (driver_register+0x88/0x118) [] (driver_register) from [] (do_one_initcall+0x50/0x2b0) [] (do_one_initcall) from [] (do_init_module+0x60/0x288) [] (do_init_module) from [] (sys_finit_module+0xd4/0x120) [] (sys_finit_module) from [] (ret_fast_syscall+0x0/0x54) Exception stack(0xc36e7fa8 to 0xc36e7ff0) 7fa0: 00020000 00000000 00000007 b6edd5b0 00000000 b6f2ff20 7fc0: 00020000 00000000 0000017b 0000017b b6eef980 bedc3a54 00473c99 00000000 7fe0: b6edd5b0 bedc3918 b6ed8a5f b6f6a8b0 Code: e3c3303f e593300c e1a04000 f590f000 (e1940f9f) ---[ end trace 277e2a3da40bbb76 ]--- Fixes: 6c0e3ea250476 ("drm/msm/gem: Switch over to obj->resv for locking") Signed-off-by: Iskren Chernev Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index a21be5b910ff6..d9a5a1895f3dc 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -1101,6 +1101,8 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, struct msm_gem_vma *vma; struct page **pages; + drm_gem_private_object_init(dev, obj, size); + msm_gem_lock(obj); vma = add_vma(obj, NULL); @@ -1112,7 +1114,6 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, to_msm_bo(obj)->vram_node = &vma->node; - drm_gem_private_object_init(dev, obj, size); pages = get_pages(obj); if (IS_ERR(pages)) { -- GitLab From 07fcad0d726d5da7c43f1c8e8fdb66c93a140ca5 Mon Sep 17 00:00:00 2001 From: Iskren Chernev Date: Mon, 28 Dec 2020 23:31:31 +0200 Subject: [PATCH 0123/2012] drm/msm: Ensure get_pages is called when locked get_pages is only called in a locked context. Add a WARN_ON to make sure it stays that way. Signed-off-by: Iskren Chernev Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index d9a5a1895f3dc..114c0711a302d 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -96,6 +96,8 @@ static struct page **get_pages(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); + WARN_ON(!msm_gem_is_locked(obj)); + if (!msm_obj->pages) { struct drm_device *dev = obj->dev; struct page **p; @@ -1114,8 +1116,9 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, to_msm_bo(obj)->vram_node = &vma->node; - + msm_gem_lock(obj); pages = get_pages(obj); + msm_gem_unlock(obj); if (IS_ERR(pages)) { ret = PTR_ERR(pages); goto fail; -- GitLab From b000700d6db50c933ce8b661154e26cf4ad06dba Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 26 Dec 2020 15:27:14 +0800 Subject: [PATCH 0124/2012] habanalabs: Fix memleak in hl_device_reset When kzalloc() fails, we should execute hl_mmu_fini() to release the MMU module. It's the same when hl_ctx_init() fails. Signed-off-by: Dinghao Liu Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 0749c92cbcf63..1456eabf96010 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1092,6 +1092,7 @@ kill_processes: GFP_KERNEL); if (!hdev->kernel_ctx) { rc = -ENOMEM; + hl_mmu_fini(hdev); goto out_err; } @@ -1103,6 +1104,7 @@ kill_processes: "failed to init kernel ctx in hard reset\n"); kfree(hdev->kernel_ctx); hdev->kernel_ctx = NULL; + hl_mmu_fini(hdev); goto out_err; } } -- GitLab From 44362a3c353aeec5904c2ae6d1737f20fe7e9c79 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 23 Dec 2020 12:08:54 +0000 Subject: [PATCH 0125/2012] KVM: arm64: Fix hyp_cpu_pm_{init,exit} __init annotation The __init annotations on hyp_cpu_pm_{init,exit} are obviously incorrect, and the build system shouts at you if you enable DEBUG_SECTION_MISMATCH. Nothing really bad happens as we never execute that code outside of the init context, but we can't label the callers as __int either, as kvm_init isn't __init itself. Oh well. Signed-off-by: Marc Zyngier Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20201223120854.255347-1-maz@kernel.org --- arch/arm64/kvm/arm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ab782c480e9a5..04c44853b103b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1568,12 +1568,12 @@ static struct notifier_block hyp_init_cpu_pm_nb = { .notifier_call = hyp_init_cpu_pm_notifier, }; -static void __init hyp_cpu_pm_init(void) +static void hyp_cpu_pm_init(void) { if (!is_protected_kvm_enabled()) cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); } -static void __init hyp_cpu_pm_exit(void) +static void hyp_cpu_pm_exit(void) { if (!is_protected_kvm_enabled()) cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); -- GitLab From e042f151ec7474b88b8c1edaaddd1ff7415d7117 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Dec 2020 19:54:28 -0800 Subject: [PATCH 0126/2012] hwmon: (sbtsi_temp) Fix Documenation kernel-doc warning Fix Documentation/hwmon/ kernel-doc warning in 5.11-rc1: lnx-511-rc1/Documentation/hwmon/sbtsi_temp.rst:4: WARNING: Title underline too short. Kernel driver sbtsi_temp ================== Fixes: 6ec3fcf556fe ("hwmon: (sbtsi) Add documentation") Signed-off-by: Randy Dunlap Cc: Kun Yi Cc: Guenter Roeck Cc: Jean Delvare Cc: linux-hwmon@vger.kernel.org Link: https://lore.kernel.org/r/20201229035428.31270-1-rdunlap@infradead.org Signed-off-by: Guenter Roeck --- Documentation/hwmon/sbtsi_temp.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/hwmon/sbtsi_temp.rst b/Documentation/hwmon/sbtsi_temp.rst index 922b3c8db666e..749f518389c38 100644 --- a/Documentation/hwmon/sbtsi_temp.rst +++ b/Documentation/hwmon/sbtsi_temp.rst @@ -1,7 +1,7 @@ .. SPDX-License-Identifier: GPL-2.0-or-later Kernel driver sbtsi_temp -================== +======================== Supported hardware: -- GitLab From 1eda52334e6d13eb1a85f713ce06dd39342b5020 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 15 Dec 2020 10:20:30 +0100 Subject: [PATCH 0127/2012] hwmon: (pwm-fan) Ensure that calculation doesn't discard big period values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With MAX_PWM being defined to 255 the code unsigned long period; ... period = ctx->pwm->args.period; state.duty_cycle = DIV_ROUND_UP(pwm * (period - 1), MAX_PWM); calculates a too small value for duty_cycle if the configured period is big (either by discarding the 64 bit value ctx->pwm->args.period or by overflowing the multiplication). As this results in a too slow fan and so maybe an overheating machine better be safe than sorry and error out in .probe. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20201215092031.152243-1-u.kleine-koenig@pengutronix.de Signed-off-by: Guenter Roeck --- drivers/hwmon/pwm-fan.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 777439f48c147..111a91dc6b798 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -334,8 +334,18 @@ static int pwm_fan_probe(struct platform_device *pdev) ctx->pwm_value = MAX_PWM; - /* Set duty cycle to maximum allowed and enable PWM output */ pwm_init_state(ctx->pwm, &state); + /* + * __set_pwm assumes that MAX_PWM * (period - 1) fits into an unsigned + * long. Check this here to prevent the fan running at a too low + * frequency. + */ + if (state.period > ULONG_MAX / MAX_PWM + 1) { + dev_err(dev, "Configured period too big\n"); + return -EINVAL; + } + + /* Set duty cycle to maximum allowed and enable PWM output */ state.duty_cycle = ctx->pwm->args.period - 1; state.enabled = true; -- GitLab From c318840fb2a42ce25febc95c4c19357acf1ae5ca Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 30 Dec 2020 11:20:44 -0500 Subject: [PATCH 0128/2012] USB: Gadget: dummy-hcd: Fix shift-out-of-bounds bug The dummy-hcd driver was written under the assumption that all the parameters in URBs sent to its root hub would be valid. With URBs sent from userspace via usbfs, that assumption can be violated. In particular, the driver doesn't fully check the port-feature values stored in the wValue entry of Clear-Port-Feature and Set-Port-Feature requests. Values that are too large can cause the driver to perform an invalid left shift of more than 32 bits. Ironically, two of those left shifts are unnecessary, because they implement Set-Port-Feature requests that hubs are not required to support, according to section 11.24.2.13 of the USB-2.0 spec. This patch adds the appropriate checks for the port feature selector values and removes the unnecessary feature settings. It also rejects requests to set the TEST feature or to set or clear the INDICATOR and C_OVERCURRENT features, as none of these are relevant to dummy-hcd's root-hub emulation. CC: Reported-and-tested-by: syzbot+5925509f78293baa7331@syzkaller.appspotmail.com Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20201230162044.GA727759@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/dummy_hcd.c | 35 ++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index ab5e978b5052c..1a953f44183aa 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -2118,9 +2118,21 @@ static int dummy_hub_control( dum_hcd->port_status &= ~USB_PORT_STAT_POWER; set_link_state(dum_hcd); break; - default: + case USB_PORT_FEAT_ENABLE: + case USB_PORT_FEAT_C_ENABLE: + case USB_PORT_FEAT_C_SUSPEND: + /* Not allowed for USB-3 */ + if (hcd->speed == HCD_USB3) + goto error; + fallthrough; + case USB_PORT_FEAT_C_CONNECTION: + case USB_PORT_FEAT_C_RESET: dum_hcd->port_status &= ~(1 << wValue); set_link_state(dum_hcd); + break; + default: + /* Disallow INDICATOR and C_OVER_CURRENT */ + goto error; } break; case GetHubDescriptor: @@ -2281,18 +2293,17 @@ static int dummy_hub_control( */ dum_hcd->re_timeout = jiffies + msecs_to_jiffies(50); fallthrough; + case USB_PORT_FEAT_C_CONNECTION: + case USB_PORT_FEAT_C_RESET: + case USB_PORT_FEAT_C_ENABLE: + case USB_PORT_FEAT_C_SUSPEND: + /* Not allowed for USB-3, and ignored for USB-2 */ + if (hcd->speed == HCD_USB3) + goto error; + break; default: - if (hcd->speed == HCD_USB3) { - if ((dum_hcd->port_status & - USB_SS_PORT_STAT_POWER) != 0) { - dum_hcd->port_status |= (1 << wValue); - } - } else - if ((dum_hcd->port_status & - USB_PORT_STAT_POWER) != 0) { - dum_hcd->port_status |= (1 << wValue); - } - set_link_state(dum_hcd); + /* Disallow TEST, INDICATOR, and C_OVER_CURRENT */ + goto error; } break; case GetPortErrorCount: -- GitLab From 355845b738e76445c8522802552146d96cb4afa7 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 31 Dec 2020 06:10:32 +0100 Subject: [PATCH 0129/2012] efi/apple-properties: Reinstate support for boolean properties Since commit 4466bf82821b ("efi/apple-properties: use PROPERTY_ENTRY_U8_ARRAY_LEN"), my MacBook Pro issues a -ENODATA error when trying to assign EFI properties to the discrete GPU: pci 0000:01:00.0: assigning 56 device properties pci 0000:01:00.0: error -61 assigning properties That's because some of the properties have no value. They're booleans whose presence can be checked by drivers, e.g. "use-backlight-blanking". Commit 6e98503dba64 ("efi/apple-properties: Remove redundant attribute initialization from unmarshal_key_value_pairs()") employed a trick to store such booleans as u8 arrays (which is the data type used for all other EFI properties on Macs): It cleared the property_entry's "is_array" flag, thereby denoting that the value is stored inline in the property_entry. Commit 4466bf82821b erroneously removed that trick. It was probably a little fragile to begin with. Reinstate support for boolean properties by explicitly invoking the PROPERTY_ENTRY_BOOL() initializer for properties with zero-length value. Fixes: 4466bf82821b ("efi/apple-properties: use PROPERTY_ENTRY_U8_ARRAY_LEN") Cc: Reviewed-by: Andy Shevchenko Signed-off-by: Lukas Wunner Link: https://lore.kernel.org/r/be958bda75331a011d53c696d1deec8dccd06fd2.1609388549.git.lukas@wunner.de Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/apple-properties.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c index 34f53d898acb0..e1926483ae2fd 100644 --- a/drivers/firmware/efi/apple-properties.c +++ b/drivers/firmware/efi/apple-properties.c @@ -3,8 +3,9 @@ * apple-properties.c - EFI device properties on Macs * Copyright (C) 2016 Lukas Wunner * - * Note, all properties are considered as u8 arrays. - * To get a value of any of them the caller must use device_property_read_u8_array(). + * Properties are stored either as: + * u8 arrays which can be retrieved with device_property_read_u8_array() or + * booleans which can be queried with device_property_present(). */ #define pr_fmt(fmt) "apple-properties: " fmt @@ -88,8 +89,12 @@ static void __init unmarshal_key_value_pairs(struct dev_header *dev_header, entry_data = ptr + key_len + sizeof(val_len); entry_len = val_len - sizeof(val_len); - entry[i] = PROPERTY_ENTRY_U8_ARRAY_LEN(key, entry_data, - entry_len); + if (entry_len) + entry[i] = PROPERTY_ENTRY_U8_ARRAY_LEN(key, entry_data, + entry_len); + else + entry[i] = PROPERTY_ENTRY_BOOL(key); + if (dump_properties) { dev_info(dev, "property: %s\n", key); print_hex_dump(KERN_INFO, pr_fmt(), DUMP_PREFIX_OFFSET, -- GitLab From f93274ef0fe972c120c96b3207f8fce376231a60 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 4 Dec 2020 09:01:36 +0100 Subject: [PATCH 0130/2012] crypto: asym_tpm: correct zero out potential secrets The function derive_pub_key() should be calling memzero_explicit() instead of memset() in case the complier decides to optimize away the call to memset() because it "knows" no one is going to touch the memory anymore. Cc: stable Reported-by: Ilil Blum Shem-Tov Tested-by: Ilil Blum Shem-Tov Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/X8ns4AfwjKudpyfe@kroah.com Signed-off-by: Greg Kroah-Hartman --- crypto/asymmetric_keys/asym_tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c index 511932aa94a6f..0959613560b9e 100644 --- a/crypto/asymmetric_keys/asym_tpm.c +++ b/crypto/asymmetric_keys/asym_tpm.c @@ -354,7 +354,7 @@ static uint32_t derive_pub_key(const void *pub_key, uint32_t len, uint8_t *buf) memcpy(cur, e, sizeof(e)); cur += sizeof(e); /* Zero parameters to satisfy set_pub_key ABI. */ - memset(cur, 0, SETKEY_PARAMS_SIZE); + memzero_explicit(cur, SETKEY_PARAMS_SIZE); return cur - buf; } -- GitLab From 957cbca7317f7413e1bac555a6b567af06598b10 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 31 Dec 2020 15:05:46 +0000 Subject: [PATCH 0131/2012] KVM: arm64: Remove spurious semicolon in reg_to_encoding() Although not a problem right now, it flared up while working on some other aspects of the code-base. Remove the useless semicolon. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d46e7f706cb06..42ccc27fb684c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -923,7 +923,7 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, #define reg_to_encoding(x) \ sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \ - (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2); + (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2) /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ -- GitLab From 4f8af077a02eed4831885048a10e04daa4e61a72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=ADcolas=20F=2E=20R=2E=20A=2E=20Prado?= Date: Mon, 28 Dec 2020 14:46:07 +0000 Subject: [PATCH 0132/2012] docs: Fix reST markup when linking to sections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the process of converting the documentation to reST, some links were converted using the following wrong syntax (and sometimes using %20 instead of spaces): `Display text <#section-name-in-html>`__ This syntax isn't valid according to the docutils' spec [1], but more importantly, it is specific to HTML, since it uses '#' to link to an HTML anchor. The right syntax would instead use a docutils hyperlink reference as the embedded URI to point to the section [2], that is: `Display text
`__ This syntax works in both HTML and PDF. The LaTeX toolchain doesn't mind the HTML anchor syntax when generating the pdf documentation (make pdfdocs), that is, the build succeeds but the links don't work, but that syntax causes errors when trying to build using the not-yet-merged rst2pdf: ValueError: format not resolved, probably missing URL scheme or undefined destination target for 'Forcing%20Quiescent%20States' So, use the correct syntax in order to have it work in all different output formats. [1]: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names [2]: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#embedded-uris-and-aliases Fixes: ccc9971e2147 ("docs: rcu: convert some articles from html to ReST") Fixes: c8cce10a62aa ("docs: Fix the reference labels in Locking.rst") Fixes: e548cdeffcd8 ("docs-rst: convert kernel-locking to ReST") Fixes: 7ddedebb03b7 ("ALSA: doc: ReSTize writing-an-alsa-driver document") Signed-off-by: Nícolas F. R. A. Prado Reviewed-by: Takashi Iwai Reviewed-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20201228144537.135353-1-nfraprado@protonmail.com Signed-off-by: Jonathan Corbet --- .../Tree-RCU-Memory-Ordering.rst | 8 ++++---- .../RCU/Design/Requirements/Requirements.rst | 20 +++++++++---------- Documentation/kernel-hacking/locking.rst | 8 ++++---- .../kernel-api/writing-an-alsa-driver.rst | 16 +++++++-------- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst index 83ae3b79a6439..a648b423ba0eb 100644 --- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst +++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst @@ -473,7 +473,7 @@ read-side critical sections that follow the idle period (the oval near the bottom of the diagram above). Plumbing this into the full grace-period execution is described -`below <#Forcing%20Quiescent%20States>`__. +`below `__. CPU-Hotplug Interface ^^^^^^^^^^^^^^^^^^^^^ @@ -494,7 +494,7 @@ mask to detect CPUs having gone offline since the beginning of this grace period. Plumbing this into the full grace-period execution is described -`below <#Forcing%20Quiescent%20States>`__. +`below `__. Forcing Quiescent States ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -532,7 +532,7 @@ from other CPUs. | RCU. But this diagram is complex enough as it is, so simplicity | | overrode accuracy. You can think of it as poetic license, or you can | | think of it as misdirection that is resolved in the | -| `stitched-together diagram <#Putting%20It%20All%20Together>`__. | +| `stitched-together diagram `__. | +-----------------------------------------------------------------------+ Grace-Period Cleanup @@ -596,7 +596,7 @@ maintain ordering. For example, if the callback function wakes up a task that runs on some other CPU, proper ordering must in place in both the callback function and the task being awakened. To see why this is important, consider the top half of the `grace-period -cleanup <#Grace-Period%20Cleanup>`__ diagram. The callback might be +cleanup`_ diagram. The callback might be running on a CPU corresponding to the leftmost leaf ``rcu_node`` structure, and awaken a task that is to run on a CPU corresponding to the rightmost leaf ``rcu_node`` structure, and the grace-period kernel diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst index e8c84fcc05071..d4c9a016074b3 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.rst +++ b/Documentation/RCU/Design/Requirements/Requirements.rst @@ -45,7 +45,7 @@ requirements: #. `Other RCU Flavors`_ #. `Possible Future Changes`_ -This is followed by a `summary <#Summary>`__, however, the answers to +This is followed by a summary_, however, the answers to each quick quiz immediately follows the quiz. Select the big white space with your mouse to see the answer. @@ -1096,7 +1096,7 @@ memory barriers. | case, voluntary context switch) within an RCU read-side critical | | section. However, sleeping locks may be used within userspace RCU | | read-side critical sections, and also within Linux-kernel sleepable | -| RCU `(SRCU) <#Sleepable%20RCU>`__ read-side critical sections. In | +| RCU `(SRCU) `__ read-side critical sections. In | | addition, the -rt patchset turns spinlocks into a sleeping locks so | | that the corresponding critical sections can be preempted, which also | | means that these sleeplockified spinlocks (but not other sleeping | @@ -1186,7 +1186,7 @@ non-preemptible (``CONFIG_PREEMPT=n``) kernels, and thus `tiny RCU `__ was born. Josh Triplett has since taken over the small-memory banner with his `Linux kernel tinification `__ -project, which resulted in `SRCU <#Sleepable%20RCU>`__ becoming optional +project, which resulted in `SRCU `__ becoming optional for those kernels not needing it. The remaining performance requirements are, for the most part, @@ -1457,8 +1457,8 @@ will vary as the value of ``HZ`` varies, and can also be changed using the relevant Kconfig options and kernel boot parameters. RCU currently does not do much sanity checking of these parameters, so please use caution when changing them. Note that these forward-progress measures -are provided only for RCU, not for `SRCU <#Sleepable%20RCU>`__ or `Tasks -RCU <#Tasks%20RCU>`__. +are provided only for RCU, not for `SRCU `__ or `Tasks +RCU`_. RCU takes the following steps in ``call_rcu()`` to encourage timely invocation of callbacks when any given non-\ ``rcu_nocbs`` CPU has @@ -1477,8 +1477,8 @@ encouragement was provided: Again, these are default values when running at ``HZ=1000``, and can be overridden. Again, these forward-progress measures are provided only for -RCU, not for `SRCU <#Sleepable%20RCU>`__ or `Tasks -RCU <#Tasks%20RCU>`__. Even for RCU, callback-invocation forward +RCU, not for `SRCU `__ or `Tasks +RCU`_. Even for RCU, callback-invocation forward progress for ``rcu_nocbs`` CPUs is much less well-developed, in part because workloads benefiting from ``rcu_nocbs`` CPUs tend to invoke ``call_rcu()`` relatively infrequently. If workloads emerge that need @@ -1920,7 +1920,7 @@ Hotplug CPU The Linux kernel supports CPU hotplug, which means that CPUs can come and go. It is of course illegal to use any RCU API member from an -offline CPU, with the exception of `SRCU <#Sleepable%20RCU>`__ read-side +offline CPU, with the exception of `SRCU `__ read-side critical sections. This requirement was present from day one in DYNIX/ptx, but on the other hand, the Linux kernel's CPU-hotplug implementation is “interesting.” @@ -2177,7 +2177,7 @@ handles these states differently: However, RCU must be reliably informed as to whether any given CPU is currently in the idle loop, and, for ``NO_HZ_FULL``, also whether that CPU is executing in usermode, as discussed -`earlier <#Energy%20Efficiency>`__. It also requires that the +`earlier `__. It also requires that the scheduling-clock interrupt be enabled when RCU needs it to be: #. If a CPU is either idle or executing in usermode, and RCU believes it @@ -2294,7 +2294,7 @@ Performance, Scalability, Response Time, and Reliability ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Expanding on the `earlier -discussion <#Performance%20and%20Scalability>`__, RCU is used heavily by +discussion `__, RCU is used heavily by hot code paths in performance-critical portions of the Linux kernel's networking, security, virtualization, and scheduling code paths. RCU must therefore use efficient implementations, especially in its diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst index 6ed806e6061bb..c3448929a824a 100644 --- a/Documentation/kernel-hacking/locking.rst +++ b/Documentation/kernel-hacking/locking.rst @@ -118,11 +118,11 @@ spinlock, but you may block holding a mutex. If you can't lock a mutex, your task will suspend itself, and be woken up when the mutex is released. This means the CPU can do something else while you are waiting. There are many cases when you simply can't sleep (see -`What Functions Are Safe To Call From Interrupts? <#sleeping-things>`__), +`What Functions Are Safe To Call From Interrupts?`_), and so have to use a spinlock instead. Neither type of lock is recursive: see -`Deadlock: Simple and Advanced <#deadlock>`__. +`Deadlock: Simple and Advanced`_. Locks and Uniprocessor Kernels ------------------------------ @@ -179,7 +179,7 @@ perfect world). Note that you can also use spin_lock_irq() or spin_lock_irqsave() here, which stop hardware interrupts -as well: see `Hard IRQ Context <#hard-irq-context>`__. +as well: see `Hard IRQ Context`_. This works perfectly for UP as well: the spin lock vanishes, and this macro simply becomes local_bh_disable() @@ -230,7 +230,7 @@ The Same Softirq ~~~~~~~~~~~~~~~~ The same softirq can run on the other CPUs: you can use a per-CPU array -(see `Per-CPU Data <#per-cpu-data>`__) for better performance. If you're +(see `Per-CPU Data`_) for better performance. If you're going so far as to use a softirq, you probably care about scalable performance enough to justify the extra complexity. diff --git a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst index 73bbd59afc33a..e6365836fa8bd 100644 --- a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst +++ b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst @@ -71,7 +71,7 @@ core/oss The codes for PCM and mixer OSS emulation modules are stored in this directory. The rawmidi OSS emulation is included in the ALSA rawmidi code since it's quite small. The sequencer code is stored in -``core/seq/oss`` directory (see `below <#core-seq-oss>`__). +``core/seq/oss`` directory (see `below `__). core/seq ~~~~~~~~ @@ -382,7 +382,7 @@ where ``enable[dev]`` is the module option. Each time the ``probe`` callback is called, check the availability of the device. If not available, simply increment the device index and returns. dev will be incremented also later (`step 7 -<#set-the-pci-driver-data-and-return-zero>`__). +<7) Set the PCI driver data and return zero._>`__). 2) Create a card instance ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -450,10 +450,10 @@ field contains the information shown in ``/proc/asound/cards``. 5) Create other components, such as mixer, MIDI, etc. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Here you define the basic components such as `PCM <#PCM-Interface>`__, -mixer (e.g. `AC97 <#API-for-AC97-Codec>`__), MIDI (e.g. -`MPU-401 <#MIDI-MPU401-UART-Interface>`__), and other interfaces. -Also, if you want a `proc file <#Proc-Interface>`__, define it here, +Here you define the basic components such as `PCM `__, +mixer (e.g. `AC97 `__), MIDI (e.g. +`MPU-401 `__), and other interfaces. +Also, if you want a `proc file `__, define it here, too. 6) Register the card instance. @@ -941,7 +941,7 @@ The allocation of an interrupt source is done like this: chip->irq = pci->irq; where :c:func:`snd_mychip_interrupt()` is the interrupt handler -defined `later <#pcm-interface-interrupt-handler>`__. Note that +defined `later `__. Note that ``chip->irq`` should be defined only when :c:func:`request_irq()` succeeded. @@ -3104,7 +3104,7 @@ processing the output stream in the irq handler. If the MPU-401 interface shares its interrupt with the other logical devices on the card, set ``MPU401_INFO_IRQ_HOOK`` (see -`below <#MIDI-Interrupt-Handler>`__). +`below `__). Usually, the port address corresponds to the command port and port + 1 corresponds to the data port. If not, you may change the ``cport`` -- GitLab From 81e79063004f32aae5196f0c929192e69aca1694 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 26 Dec 2020 09:44:33 -0800 Subject: [PATCH 0133/2012] Documentation: admin: early_param()s are also listed in kernel-parameters Add info that "early_param()" kernel boot parameters are also listed in kernel-parameters.txt. Signed-off-by: Randy Dunlap Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Link: https://lore.kernel.org/r/20201226174433.7885-1-rdunlap@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index 06fb1b4aa849c..682ab28b5c94b 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -3,8 +3,8 @@ The kernel's command-line parameters ==================================== -The following is a consolidated list of the kernel parameters as -implemented by the __setup(), core_param() and module_param() macros +The following is a consolidated list of the kernel parameters as implemented +by the __setup(), early_param(), core_param() and module_param() macros and sorted into English Dictionary order (defined as ignoring all punctuation and sorting digits before letters in a case insensitive manner), and with descriptions where known. -- GitLab From c7e74b3c7b1cf4c04164ff16e6c047232fd3bcef Mon Sep 17 00:00:00 2001 From: Liao Pingfang Date: Sun, 27 Dec 2020 15:15:19 +0800 Subject: [PATCH 0134/2012] docs/mm: concepts.rst: Correct the threshold to low watermark It should be "low watermark" where we wake up kswapd daemon. Signed-off-by: Liao Pingfang Link: https://lore.kernel.org/r/1609053319-3112-1-git-send-email-winndows@163.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/mm/concepts.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/mm/concepts.rst b/Documentation/admin-guide/mm/concepts.rst index fa0974fbeae7c..b966fcff993b2 100644 --- a/Documentation/admin-guide/mm/concepts.rst +++ b/Documentation/admin-guide/mm/concepts.rst @@ -184,7 +184,7 @@ pages either asynchronously or synchronously, depending on the state of the system. When the system is not loaded, most of the memory is free and allocation requests will be satisfied immediately from the free pages supply. As the load increases, the amount of the free pages goes -down and when it reaches a certain threshold (high watermark), an +down and when it reaches a certain threshold (low watermark), an allocation request will awaken the ``kswapd`` daemon. It will asynchronously scan memory pages and either just free them if the data they contain is available elsewhere, or evict to the backing storage -- GitLab From 0be1511f516e2b9766597336cedc6dc6d19e5af1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Dec 2020 15:12:12 -0800 Subject: [PATCH 0135/2012] Documentation: doc-guide: fixes to sphinx.rst Various fixes to sphinx.rst: - eliminate a double-space between 2 words - grammar/wording - punctuation - call rows in a table 'rows' instead of 'columns' (or does Sphinx call everything a column?) - It seems that "amdfonts" should be "amsfonts". I can't find any amdfonts. Signed-off-by: Randy Dunlap Reviewed-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20201228231212.22448-1-rdunlap@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/doc-guide/sphinx.rst | 32 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/Documentation/doc-guide/sphinx.rst b/Documentation/doc-guide/sphinx.rst index 2fb2ff297d69c..36ac2166ad675 100644 --- a/Documentation/doc-guide/sphinx.rst +++ b/Documentation/doc-guide/sphinx.rst @@ -48,12 +48,12 @@ or ``virtualenv``, depending on how your distribution packaged Python 3. those versions, you should run ``pip install 'docutils==0.12'``. #) It is recommended to use the RTD theme for html output. Depending - on the Sphinx version, it should be installed in separate, + on the Sphinx version, it should be installed separately, with ``pip install sphinx_rtd_theme``. - #) Some ReST pages contain math expressions. Due to the way Sphinx work, + #) Some ReST pages contain math expressions. Due to the way Sphinx works, those expressions are written using LaTeX notation. It needs texlive - installed with amdfonts and amsmath in order to evaluate them. + installed with amsfonts and amsmath in order to evaluate them. In summary, if you want to install Sphinx version 1.7.9, you should do:: @@ -128,7 +128,7 @@ Sphinx Build ============ The usual way to generate the documentation is to run ``make htmldocs`` or -``make pdfdocs``. There are also other formats available, see the documentation +``make pdfdocs``. There are also other formats available: see the documentation section of ``make help``. The generated documentation is placed in format-specific subdirectories under ``Documentation/output``. @@ -303,17 +303,17 @@ and *targets* (e.g. a ref to ``:ref:`last row ``` / :ref:`last row - head col 3 - head col 4 - * - column 1 + * - row 1 - field 1.1 - field 1.2 with autospan - * - column 2 + * - row 2 - field 2.1 - :rspan:`1` :cspan:`1` field 2.2 - 3.3 * .. _`last row`: - - column 3 + - row 3 Rendered as: @@ -325,17 +325,17 @@ Rendered as: - head col 3 - head col 4 - * - column 1 + * - row 1 - field 1.1 - field 1.2 with autospan - * - column 2 + * - row 2 - field 2.1 - :rspan:`1` :cspan:`1` field 2.2 - 3.3 * .. _`last row`: - - column 3 + - row 3 Cross-referencing ----------------- @@ -361,7 +361,7 @@ Figures & Images If you want to add an image, you should use the ``kernel-figure`` and ``kernel-image`` directives. E.g. to insert a figure with a scalable -image format use SVG (:ref:`svg_image_example`):: +image format, use SVG (:ref:`svg_image_example`):: .. kernel-figure:: svg_image.svg :alt: simple SVG image @@ -375,7 +375,7 @@ image format use SVG (:ref:`svg_image_example`):: SVG image example -The kernel figure (and image) directive support **DOT** formatted files, see +The kernel figure (and image) directive supports **DOT** formatted files, see * DOT: http://graphviz.org/pdf/dotguide.pdf * Graphviz: http://www.graphviz.org/content/dot-language @@ -394,7 +394,7 @@ A simple example (:ref:`hello_dot_file`):: DOT's hello world example -Embed *render* markups (or languages) like Graphviz's **DOT** is provided by the +Embedded *render* markups (or languages) like Graphviz's **DOT** are provided by the ``kernel-render`` directives.:: .. kernel-render:: DOT @@ -406,7 +406,7 @@ Embed *render* markups (or languages) like Graphviz's **DOT** is provided by the } How this will be rendered depends on the installed tools. If Graphviz is -installed, you will see an vector image. If not the raw markup is inserted as +installed, you will see a vector image. If not, the raw markup is inserted as *literal-block* (:ref:`hello_dot_render`). .. _hello_dot_render: @@ -421,8 +421,8 @@ installed, you will see an vector image. If not the raw markup is inserted as The *render* directive has all the options known from the *figure* directive, plus option ``caption``. If ``caption`` has a value, a *figure* node is -inserted. If not, a *image* node is inserted. A ``caption`` is also needed, if -you want to refer it (:ref:`hello_svg_render`). +inserted. If not, an *image* node is inserted. A ``caption`` is also needed, if +you want to refer to it (:ref:`hello_svg_render`). Embedded **SVG**:: -- GitLab From 798ed7800e20dfc3304de1b99df5ac71ad48966b Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sun, 20 Dec 2020 07:09:27 +0100 Subject: [PATCH 0136/2012] atomic: remove further references to atomic_ops Commit f0400a77ebdc ("atomic: Delete obsolete documentation") removed ./Documentation/core-api/atomic_ops.rst, but missed to remove further references to that file. Hence, make htmldocs warns: Documentation/core-api/index.rst:53: WARNING: toctree contains reference to nonexisting document 'core-api/atomic_ops' Also, ./scripts/get_maintainer.pl --self-test=patterns warns: warning: no file matches F: Documentation/core-api/atomic_ops.rst Remove further references to ./Documentation/core-api/atomic_ops.rst. Fixes: f0400a77ebdc ("atomic: Delete obsolete documentation") Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20201220060927.21582-1-lukas.bulwahn@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/core-api/index.rst | 1 - MAINTAINERS | 1 - 2 files changed, 2 deletions(-) diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst index 69171b1799f21..f1c9d20bd42dd 100644 --- a/Documentation/core-api/index.rst +++ b/Documentation/core-api/index.rst @@ -53,7 +53,6 @@ How Linux keeps everything from happening at the same time. See .. toctree:: :maxdepth: 1 - atomic_ops refcount-vs-atomic irq/index local_ops diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9f..ddc8e90c12247 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10260,7 +10260,6 @@ S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev F: Documentation/atomic_bitops.txt F: Documentation/atomic_t.txt -F: Documentation/core-api/atomic_ops.rst F: Documentation/core-api/refcount-vs-atomic.rst F: Documentation/litmus-tests/ F: Documentation/memory-barriers.txt -- GitLab From 2e202ad873365513c6ad72e29a531071dffa498a Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Mon, 28 Dec 2020 00:10:40 +0800 Subject: [PATCH 0137/2012] gpiolib: cdev: fix frame size warning in gpio_ioctl() The kernel test robot reports the following warning in [1]: drivers/gpio/gpiolib-cdev.c: In function 'gpio_ioctl': >>drivers/gpio/gpiolib-cdev.c:1437:1: warning: the frame size of 1040 bytes is larger than 1024 bytes [-Wframe-larger-than=] Refactor gpio_ioctl() to handle each ioctl in its own helper function and so reduce the variables stored on the stack to those explicitly required to service the ioctl at hand. The lineinfo_get_v1() helper handles both the GPIO_GET_LINEINFO_IOCTL and GPIO_GET_LINEINFO_WATCH_IOCTL, as per the corresponding v2 implementation - lineinfo_get(). [1] https://lore.kernel.org/lkml/202012270910.VW3qc1ER-lkp@intel.com/ Fixes: aad955842d1c ("gpiolib: cdev: support GPIO_V2_GET_LINEINFO_IOCTL and GPIO_V2_GET_LINEINFO_WATCH_IOCTL") Reported-by: kernel test robot Signed-off-by: Kent Gibson Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-cdev.c | 145 ++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 72 deletions(-) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 12b679ca552cc..1a7b51163528b 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -1979,6 +1979,21 @@ struct gpio_chardev_data { #endif }; +static int chipinfo_get(struct gpio_chardev_data *cdev, void __user *ip) +{ + struct gpio_device *gdev = cdev->gdev; + struct gpiochip_info chipinfo; + + memset(&chipinfo, 0, sizeof(chipinfo)); + + strscpy(chipinfo.name, dev_name(&gdev->dev), sizeof(chipinfo.name)); + strscpy(chipinfo.label, gdev->label, sizeof(chipinfo.label)); + chipinfo.lines = gdev->ngpio; + if (copy_to_user(ip, &chipinfo, sizeof(chipinfo))) + return -EFAULT; + return 0; +} + #ifdef CONFIG_GPIO_CDEV_V1 /* * returns 0 if the versions match, else the previously selected ABI version @@ -1993,6 +2008,41 @@ static int lineinfo_ensure_abi_version(struct gpio_chardev_data *cdata, return abiv; } + +static int lineinfo_get_v1(struct gpio_chardev_data *cdev, void __user *ip, + bool watch) +{ + struct gpio_desc *desc; + struct gpioline_info lineinfo; + struct gpio_v2_line_info lineinfo_v2; + + if (copy_from_user(&lineinfo, ip, sizeof(lineinfo))) + return -EFAULT; + + /* this doubles as a range check on line_offset */ + desc = gpiochip_get_desc(cdev->gdev->chip, lineinfo.line_offset); + if (IS_ERR(desc)) + return PTR_ERR(desc); + + if (watch) { + if (lineinfo_ensure_abi_version(cdev, 1)) + return -EPERM; + + if (test_and_set_bit(lineinfo.line_offset, cdev->watched_lines)) + return -EBUSY; + } + + gpio_desc_to_lineinfo(desc, &lineinfo_v2); + gpio_v2_line_info_to_v1(&lineinfo_v2, &lineinfo); + + if (copy_to_user(ip, &lineinfo, sizeof(lineinfo))) { + if (watch) + clear_bit(lineinfo.line_offset, cdev->watched_lines); + return -EFAULT; + } + + return 0; +} #endif static int lineinfo_get(struct gpio_chardev_data *cdev, void __user *ip, @@ -2030,6 +2080,22 @@ static int lineinfo_get(struct gpio_chardev_data *cdev, void __user *ip, return 0; } +static int lineinfo_unwatch(struct gpio_chardev_data *cdev, void __user *ip) +{ + __u32 offset; + + if (copy_from_user(&offset, ip, sizeof(offset))) + return -EFAULT; + + if (offset >= cdev->gdev->ngpio) + return -EINVAL; + + if (!test_and_clear_bit(offset, cdev->watched_lines)) + return -EBUSY; + + return 0; +} + /* * gpio_ioctl() - ioctl handler for the GPIO chardev */ @@ -2037,80 +2103,24 @@ static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct gpio_chardev_data *cdev = file->private_data; struct gpio_device *gdev = cdev->gdev; - struct gpio_chip *gc = gdev->chip; void __user *ip = (void __user *)arg; - __u32 offset; /* We fail any subsequent ioctl():s when the chip is gone */ - if (!gc) + if (!gdev->chip) return -ENODEV; /* Fill in the struct and pass to userspace */ if (cmd == GPIO_GET_CHIPINFO_IOCTL) { - struct gpiochip_info chipinfo; - - memset(&chipinfo, 0, sizeof(chipinfo)); - - strscpy(chipinfo.name, dev_name(&gdev->dev), - sizeof(chipinfo.name)); - strscpy(chipinfo.label, gdev->label, - sizeof(chipinfo.label)); - chipinfo.lines = gdev->ngpio; - if (copy_to_user(ip, &chipinfo, sizeof(chipinfo))) - return -EFAULT; - return 0; + return chipinfo_get(cdev, ip); #ifdef CONFIG_GPIO_CDEV_V1 - } else if (cmd == GPIO_GET_LINEINFO_IOCTL) { - struct gpio_desc *desc; - struct gpioline_info lineinfo; - struct gpio_v2_line_info lineinfo_v2; - - if (copy_from_user(&lineinfo, ip, sizeof(lineinfo))) - return -EFAULT; - - /* this doubles as a range check on line_offset */ - desc = gpiochip_get_desc(gc, lineinfo.line_offset); - if (IS_ERR(desc)) - return PTR_ERR(desc); - - gpio_desc_to_lineinfo(desc, &lineinfo_v2); - gpio_v2_line_info_to_v1(&lineinfo_v2, &lineinfo); - - if (copy_to_user(ip, &lineinfo, sizeof(lineinfo))) - return -EFAULT; - return 0; } else if (cmd == GPIO_GET_LINEHANDLE_IOCTL) { return linehandle_create(gdev, ip); } else if (cmd == GPIO_GET_LINEEVENT_IOCTL) { return lineevent_create(gdev, ip); - } else if (cmd == GPIO_GET_LINEINFO_WATCH_IOCTL) { - struct gpio_desc *desc; - struct gpioline_info lineinfo; - struct gpio_v2_line_info lineinfo_v2; - - if (copy_from_user(&lineinfo, ip, sizeof(lineinfo))) - return -EFAULT; - - /* this doubles as a range check on line_offset */ - desc = gpiochip_get_desc(gc, lineinfo.line_offset); - if (IS_ERR(desc)) - return PTR_ERR(desc); - - if (lineinfo_ensure_abi_version(cdev, 1)) - return -EPERM; - - if (test_and_set_bit(lineinfo.line_offset, cdev->watched_lines)) - return -EBUSY; - - gpio_desc_to_lineinfo(desc, &lineinfo_v2); - gpio_v2_line_info_to_v1(&lineinfo_v2, &lineinfo); - - if (copy_to_user(ip, &lineinfo, sizeof(lineinfo))) { - clear_bit(lineinfo.line_offset, cdev->watched_lines); - return -EFAULT; - } - - return 0; + } else if (cmd == GPIO_GET_LINEINFO_IOCTL || + cmd == GPIO_GET_LINEINFO_WATCH_IOCTL) { + return lineinfo_get_v1(cdev, ip, + cmd == GPIO_GET_LINEINFO_WATCH_IOCTL); #endif /* CONFIG_GPIO_CDEV_V1 */ } else if (cmd == GPIO_V2_GET_LINEINFO_IOCTL || cmd == GPIO_V2_GET_LINEINFO_WATCH_IOCTL) { @@ -2119,16 +2129,7 @@ static long gpio_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } else if (cmd == GPIO_V2_GET_LINE_IOCTL) { return linereq_create(gdev, ip); } else if (cmd == GPIO_GET_LINEINFO_UNWATCH_IOCTL) { - if (copy_from_user(&offset, ip, sizeof(offset))) - return -EFAULT; - - if (offset >= cdev->gdev->ngpio) - return -EINVAL; - - if (!test_and_clear_bit(offset, cdev->watched_lines)) - return -EBUSY; - - return 0; + return lineinfo_unwatch(cdev, ip); } return -EINVAL; } -- GitLab From 36a106a4c1c100d55ba3d32a21ef748cfcd4fa99 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:42:39 +0100 Subject: [PATCH 0138/2012] block: rsxx: select CONFIG_CRC32 Without crc32, the driver fails to link: arm-linux-gnueabi-ld: drivers/block/rsxx/config.o: in function `rsxx_load_config': config.c:(.text+0x124): undefined reference to `crc32_le' Fixes: 8722ff8cdbfa ("block: IBM RamSan 70/80 device driver") Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 262326973ee01..583b671b1d2d2 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -445,6 +445,7 @@ config BLK_DEV_RBD config BLK_DEV_RSXX tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver" depends on PCI + select CRC32 help Device driver for IBM's high speed PCIe SSD storage device: Flash Adapter 900GB Full Height. -- GitLab From 19cd3403cb0d522dd5e10188eef85817de29e26e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:43:09 +0100 Subject: [PATCH 0139/2012] lightnvm: select CONFIG_CRC32 Without CRC32 support, this fails to link: arm-linux-gnueabi-ld: drivers/lightnvm/pblk-init.o: in function `pblk_init': pblk-init.c:(.text+0x2654): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/lightnvm/pblk-init.o: in function `pblk_exit': pblk-init.c:(.text+0x2a7c): undefined reference to `crc32_le' Fixes: a4bd217b4326 ("lightnvm: physical block device (pblk) target") Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- drivers/lightnvm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 8f39f9ba5c80e..4c2ce210c1237 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -19,6 +19,7 @@ if NVM config NVM_PBLK tristate "Physical Block Device Open-Channel SSD target" + select CRC32 help Allows an open-channel SSD to be exposed as a block device to the host. The target assumes the device exposes raw flash and must be -- GitLab From 4f8b848788f77c7f5c3bd98febce66b7aa14785f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:43:52 +0100 Subject: [PATCH 0140/2012] zonefs: select CONFIG_CRC32 When CRC32 is disabled, zonefs cannot be linked: ld: fs/zonefs/super.o: in function `zonefs_fill_super': Add a Kconfig 'select' statement for it. Fixes: 8dcc1a9d90c1 ("fs: New zonefs file system") Signed-off-by: Arnd Bergmann Signed-off-by: Damien Le Moal --- fs/zonefs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/zonefs/Kconfig b/fs/zonefs/Kconfig index ef2697b78820d..827278f937fe7 100644 --- a/fs/zonefs/Kconfig +++ b/fs/zonefs/Kconfig @@ -3,6 +3,7 @@ config ZONEFS_FS depends on BLOCK depends on BLK_DEV_ZONED select FS_IOMAP + select CRC32 help zonefs is a simple file system which exposes zones of a zoned block device (e.g. host-managed or host-aware SMR disk drives) as files. -- GitLab From 5136bb8c8b5872676f397b27f93a30568baf3a25 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sat, 19 Dec 2020 17:24:56 +0100 Subject: [PATCH 0141/2012] MAINTAINERS: adjust GCC PLUGINS after gcc-plugin.sh removal Commit 1e860048c53e ("gcc-plugins: simplify GCC plugin-dev capability test") removed ./scripts/gcc-plugin.sh, but missed to adjust MAINTAINERS. Hence, ./scripts/get_maintainers.pl --self-test=patterns warns: warning: no file matches F: scripts/gcc-plugin.sh Adjust entries in GGC PLUGINS section after this file removal. Signed-off-by: Lukas Bulwahn Signed-off-by: Masahiro Yamada --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6eff4f720c721..181c32c18aaf1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7363,7 +7363,6 @@ L: linux-hardening@vger.kernel.org S: Maintained F: Documentation/kbuild/gcc-plugins.rst F: scripts/Makefile.gcc-plugins -F: scripts/gcc-plugin.sh F: scripts/gcc-plugins/ GCOV BASED KERNEL PROFILING -- GitLab From d39648eb67ac851c7918c794424c266a5d2635b9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 19 Dec 2020 09:08:05 -0800 Subject: [PATCH 0142/2012] kconfig: config script: add a little user help Give the user a clue about the problem along with the 35 lines of usage/help text. Signed-off-by: Randy Dunlap Cc: Andi Kleen Cc: Masahiro Yamada Cc: linux-kbuild@vger.kernel.org Signed-off-by: Masahiro Yamada --- scripts/config | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/config b/scripts/config index 8c8d7c3d7accc..ff88e2faefd35 100755 --- a/scripts/config +++ b/scripts/config @@ -223,6 +223,7 @@ while [ "$1" != "" ] ; do ;; *) + echo "bad command: $CMD" >&2 usage ;; esac -- GitLab From c0f975af1745391749e4306aa8081b9a4d2cced8 Mon Sep 17 00:00:00 2001 From: John Millikin Date: Wed, 23 Dec 2020 14:04:23 +0900 Subject: [PATCH 0143/2012] kconfig: Support building mconf with vendor sysroot ncurses Changes the final fallback path in the ncurses locator for mconf to support host compilers with a non-default sysroot. This is similar to the hardcoded search for ncurses under '/usr/include', but can support compilers that keep their default header and library directories elsewhere. For nconfig, do nothing because the only vendor compiler I'm aware of with this layout (Apple Clang) ships an ncurses version that's too old for nconfig anyway. Signed-off-by: John Millikin Signed-off-by: Masahiro Yamada --- scripts/kconfig/mconf-cfg.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/kconfig/mconf-cfg.sh b/scripts/kconfig/mconf-cfg.sh index aa68ec95620d6..fcd4acd4e9cbc 100755 --- a/scripts/kconfig/mconf-cfg.sh +++ b/scripts/kconfig/mconf-cfg.sh @@ -33,7 +33,9 @@ if [ -f /usr/include/ncurses/ncurses.h ]; then exit 0 fi -if [ -f /usr/include/ncurses.h ]; then +# As a final fallback before giving up, check if $HOSTCC knows of a default +# ncurses installation (e.g. from a vendor-specific sysroot). +if echo '#include ' | "${HOSTCC}" -E - >/dev/null 2>&1; then echo cflags=\"-D_GNU_SOURCE\" echo libs=\"-lncurses\" exit 0 -- GitLab From 0c36d88cff4d72149f94809303c5180b6f716d39 Mon Sep 17 00:00:00 2001 From: John Millikin Date: Wed, 23 Dec 2020 15:23:25 +0900 Subject: [PATCH 0144/2012] lib/raid6: Let $(UNROLL) rules work with macOS userland Older versions of BSD awk are fussy about the order of '-v' and '-f' flags, and require a space after the flag name. This causes build failures on platforms with an old awk, such as macOS and NetBSD. Since GNU awk and modern versions of BSD awk (distributed with FreeBSD/OpenBSD) are fine with either form, the definition of 'cmd_unroll' can be trivially tweaked to let the lib/raid6 Makefile work with both old and new awk flag dialects. Signed-off-by: John Millikin Signed-off-by: Masahiro Yamada --- lib/raid6/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index b4c0df6d706dc..c770570bfe4f2 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -48,7 +48,7 @@ endif endif quiet_cmd_unroll = UNROLL $@ - cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$* < $< > $@ + cmd_unroll = $(AWK) -v N=$* -f $(srctree)/$(src)/unroll.awk < $< > $@ targets += int1.c int2.c int4.c int8.c int16.c int32.c $(obj)/int%.c: $(src)/int.uc $(src)/unroll.awk FORCE -- GitLab From 9bba03d4473df0b707224d4d2067b62d1e1e2a77 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 23 Dec 2020 15:35:42 +0900 Subject: [PATCH 0145/2012] kconfig: remove 'kvmconfig' and 'xenconfig' shorthands Linux 5.10 is out. Remove the 'kvmconfig' and 'xenconfig' shorthands as previously announced. Signed-off-by: Masahiro Yamada --- scripts/kconfig/Makefile | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index e46df0a2d4f9d..2c40e68853dde 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -94,16 +94,6 @@ configfiles=$(wildcard $(srctree)/kernel/configs/$@ $(srctree)/arch/$(SRCARCH)/c $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m .config $(configfiles) $(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig -PHONY += kvmconfig -kvmconfig: kvm_guest.config - @echo >&2 "WARNING: 'make $@' will be removed after Linux 5.10" - @echo >&2 " Please use 'make $<' instead." - -PHONY += xenconfig -xenconfig: xen.config - @echo >&2 "WARNING: 'make $@' will be removed after Linux 5.10" - @echo >&2 " Please use 'make $<' instead." - PHONY += tinyconfig tinyconfig: $(Q)$(MAKE) -f $(srctree)/Makefile allnoconfig tiny.config -- GitLab From 764257d9069a9c19758b626cc1ba4ae079335d9e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 30 Dec 2020 12:21:05 +0200 Subject: [PATCH 0146/2012] phy: cpcap-usb: Fix warning for missing regulator_disable On deferred probe, we will get the following splat: cpcap-usb-phy cpcap-usb-phy.0: could not initialize VBUS or ID IIO: -517 WARNING: CPU: 0 PID: 21 at drivers/regulator/core.c:2123 regulator_put+0x68/0x78 ... (regulator_put) from [] (release_nodes+0x1b4/0x1fc) (release_nodes) from [] (really_probe+0x104/0x4a0) (really_probe) from [] (driver_probe_device+0x58/0xb4) Signed-off-by: Tony Lindgren Link: https://lore.kernel.org/r/20201230102105.11826-1-tony@atomide.com Signed-off-by: Vinod Koul --- drivers/phy/motorola/phy-cpcap-usb.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/phy/motorola/phy-cpcap-usb.c b/drivers/phy/motorola/phy-cpcap-usb.c index 442522ba487f0..4728e2bff6620 100644 --- a/drivers/phy/motorola/phy-cpcap-usb.c +++ b/drivers/phy/motorola/phy-cpcap-usb.c @@ -662,35 +662,42 @@ static int cpcap_usb_phy_probe(struct platform_device *pdev) generic_phy = devm_phy_create(ddata->dev, NULL, &ops); if (IS_ERR(generic_phy)) { error = PTR_ERR(generic_phy); - return PTR_ERR(generic_phy); + goto out_reg_disable; } phy_set_drvdata(generic_phy, ddata); phy_provider = devm_of_phy_provider_register(ddata->dev, of_phy_simple_xlate); - if (IS_ERR(phy_provider)) - return PTR_ERR(phy_provider); + if (IS_ERR(phy_provider)) { + error = PTR_ERR(phy_provider); + goto out_reg_disable; + } error = cpcap_usb_init_optional_pins(ddata); if (error) - return error; + goto out_reg_disable; cpcap_usb_init_optional_gpios(ddata); error = cpcap_usb_init_iio(ddata); if (error) - return error; + goto out_reg_disable; error = cpcap_usb_init_interrupts(pdev, ddata); if (error) - return error; + goto out_reg_disable; usb_add_phy_dev(&ddata->phy); atomic_set(&ddata->active, 1); schedule_delayed_work(&ddata->detect_work, msecs_to_jiffies(1)); return 0; + +out_reg_disable: + regulator_disable(ddata->vusb); + + return error; } static int cpcap_usb_phy_remove(struct platform_device *pdev) -- GitLab From d092bd9110494de3372722b317510b3692f1b2fe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 14:55:17 +0100 Subject: [PATCH 0147/2012] phy: mediatek: allow compile-testing the dsi phy Randconfig builds show another broken dependency: WARNING: unmet direct dependencies detected for PHY_MTK_MIPI_DSI Depends on [n]: ARCH_MEDIATEK [=n] && OF [=y] Selected by [m]: - DRM_MEDIATEK [=m] && HAS_IOMEM [=y] && DRM [=m] && (ARCH_MEDIATEK [=n] || ARM [=y] && COMPILE_TEST [=y]) && COMMON_CLK [=y] && HAVE_ARM_SMCCC [=y] && OF [=y] && MTK_MMSYS [=y] This is similar to the hdmi driver I fixed earlier, and I guess the common-clk bug would sooner or later also manifest here, so just use the exact same solution I chose for the other driver, and hope that any future drivers just copy it from here. Fixes: 90f80d95992f ("phy: mediatek: Move mtk_mipi_dsi_phy driver into drivers/phy/mediatek folder") Fixes: f5f6e01f9164 ("phy: mediatek: allow compile-testing the hdmi phy") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210103135524.3678664-1-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/phy/mediatek/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/phy/mediatek/Kconfig b/drivers/phy/mediatek/Kconfig index d38def43b1bf6..55f8e6c048ab3 100644 --- a/drivers/phy/mediatek/Kconfig +++ b/drivers/phy/mediatek/Kconfig @@ -49,7 +49,9 @@ config PHY_MTK_HDMI config PHY_MTK_MIPI_DSI tristate "MediaTek MIPI-DSI Driver" - depends on ARCH_MEDIATEK && OF + depends on ARCH_MEDIATEK || COMPILE_TEST + depends on COMMON_CLK + depends on OF select GENERIC_PHY help Support MIPI DSI for Mediatek SoCs. -- GitLab From d6c1ddd938d84a1adef7e19e8efc10e1b4df5034 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 30 Dec 2020 16:25:34 +0100 Subject: [PATCH 0148/2012] USB: serial: option: add Quectel EM160R-GL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New modem using ff/ff/30 for QCDM, ff/00/00 for AT and NMEA, and ff/ff/ff for RMNET/QMI. T: Bus=02 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 D: Ver= 3.20 Cls=ef(misc ) Sub=02 Prot=01 MxPS= 9 #Cfgs= 1 P: Vendor=2c7c ProdID=0620 Rev= 4.09 S: Manufacturer=Quectel S: Product=EM160R-GL S: SerialNumber=e31cedc1 C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=896mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=(none) E: Ad=81(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=8e(I) Atr=02(Bulk) MxPS=1024 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS=1024 Ivl=0ms Cc: stable@vger.kernel.org Signed-off-by: Bjørn Mork [ johan: add model comment ] Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 2c21e34235bbb..ee726a1067066 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1117,6 +1117,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0xff, 0x30) }, /* EM160R-GL */ + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0620, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10), -- GitLab From 9f8550e4bd9d78a8436c2061ad2530215f875376 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Wed, 23 Dec 2020 17:00:46 +0200 Subject: [PATCH 0149/2012] xfrm: fix disable_xfrm sysctl when used on xfrm interfaces The disable_xfrm flag signals that xfrm should not be performed during routing towards a device before reaching device xmit. For xfrm interfaces this is usually desired as they perform the outbound policy lookup as part of their xmit using their if_id. Before this change enabling this flag on xfrm interfaces prevented them from xmitting as xfrm_lookup_with_ifid() would not perform a policy lookup in case the original dst had the DST_NOXFRM flag. This optimization is incorrect when the lookup is done by the xfrm interface xmit logic. Fix by performing policy lookup when invoked by xfrmi as if_id != 0. Similarly it's unlikely for the 'no policy exists on net' check to yield any performance benefits when invoked from xfrmi. Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d622c2548d229..2f84136af48ab 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3078,8 +3078,8 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net, xflo.flags = flags; /* To accelerate a bit... */ - if ((dst_orig->flags & DST_NOXFRM) || - !net->xfrm.policy_count[XFRM_POLICY_OUT]) + if (!if_id && ((dst_orig->flags & DST_NOXFRM) || + !net->xfrm.policy_count[XFRM_POLICY_OUT])) goto nopol; xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo, if_id); -- GitLab From afbc293add6466f8f3f0c3d944d85f53709c170f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 26 Dec 2020 16:50:20 -0800 Subject: [PATCH 0150/2012] af_key: relax availability checks for skb size calculation xfrm_probe_algs() probes kernel crypto modules and changes the availability of struct xfrm_algo_desc. But there is a small window where ealg->available and aalg->available get changed between count_ah_combs()/count_esp_combs() and dump_ah_combs()/dump_esp_combs(), in this case we may allocate a smaller skb but later put a larger amount of data and trigger the panic in skb_put(). Fix this by relaxing the checks when counting the size, that is, skipping the test of ->available. We may waste some memory for a few of sizeof(struct sadb_comb), but it is still much better than a panic. Reported-by: syzbot+b2bf2652983d23734c5c@syzkaller.appspotmail.com Cc: Steffen Klassert Cc: Herbert Xu Signed-off-by: Cong Wang Signed-off-by: Steffen Klassert --- net/key/af_key.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index c12dbc51ef5fe..ef9b4ac03e7b7 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2902,7 +2902,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t) break; if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); @@ -2920,7 +2920,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!ealg->pfkey_supported) continue; - if (!(ealg_tmpl_set(t, ealg) && ealg->available)) + if (!(ealg_tmpl_set(t, ealg))) continue; for (k = 1; ; k++) { @@ -2931,7 +2931,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t) if (!aalg->pfkey_supported) continue; - if (aalg_tmpl_set(t, aalg) && aalg->available) + if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } } -- GitLab From f6e9ceb7a7fc321a31a9dde93a99b7b4b016a3b3 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Wed, 30 Dec 2020 17:52:04 +0800 Subject: [PATCH 0151/2012] selftests: xfrm: fix test return value override issue in xfrm_policy.sh When running this xfrm_policy.sh test script, even with some cases marked as FAIL, the overall test result will still be PASS: $ sudo ./xfrm_policy.sh PASS: policy before exception matches FAIL: expected ping to .254 to fail (exceptions) PASS: direct policy matches (exceptions) PASS: policy matches (exceptions) FAIL: expected ping to .254 to fail (exceptions and block policies) PASS: direct policy matches (exceptions and block policies) PASS: policy matches (exceptions and block policies) FAIL: expected ping to .254 to fail (exceptions and block policies after hresh changes) PASS: direct policy matches (exceptions and block policies after hresh changes) PASS: policy matches (exceptions and block policies after hresh changes) FAIL: expected ping to .254 to fail (exceptions and block policies after hthresh change in ns3) PASS: direct policy matches (exceptions and block policies after hthresh change in ns3) PASS: policy matches (exceptions and block policies after hthresh change in ns3) FAIL: expected ping to .254 to fail (exceptions and block policies after htresh change to normal) PASS: direct policy matches (exceptions and block policies after htresh change to normal) PASS: policy matches (exceptions and block policies after htresh change to normal) PASS: policies with repeated htresh change $ echo $? 0 This is because the $lret in check_xfrm() is not a local variable. Therefore when a test failed in check_exceptions(), the non-zero $lret will later get reset to 0 when the next test calls check_xfrm(). With this fix, the final return value will be 1. Make it easier for testers to spot this failure. Fixes: 39aa6928d462d0 ("xfrm: policy: fix netlink/pf_key policy lookups") Signed-off-by: Po-Hsu Lin Acked-by: Florian Westphal Signed-off-by: Steffen Klassert --- tools/testing/selftests/net/xfrm_policy.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index 7a1bf94c5bd38..5922941e70c6c 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -202,7 +202,7 @@ check_xfrm() { # 1: iptables -m policy rule count != 0 rval=$1 ip=$2 - lret=0 + local lret=0 ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null -- GitLab From da64ae2d35d3673233f0403b035d4c6acbf71965 Mon Sep 17 00:00:00 2001 From: Visa Hankala Date: Wed, 30 Dec 2020 16:15:53 +0000 Subject: [PATCH 0152/2012] xfrm: Fix wraparound in xfrm_policy_addr_delta() Use three-way comparison for address components to avoid integer wraparound in the result of xfrm_policy_addr_delta(). This ensures that the search trees are built and traversed correctly. Treat IPv4 and IPv6 similarly by returning 0 when prefixlen == 0. Prefix /0 has only one equivalence class. Fixes: 9cf545ebd591d ("xfrm: policy: store inexact policies in a tree ordered by destination address") Signed-off-by: Visa Hankala Acked-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 26 +++++++++---- tools/testing/selftests/net/xfrm_policy.sh | 43 ++++++++++++++++++++++ 2 files changed, 61 insertions(+), 8 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2f84136af48ab..b74f28cabe24f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -793,15 +793,22 @@ static int xfrm_policy_addr_delta(const xfrm_address_t *a, const xfrm_address_t *b, u8 prefixlen, u16 family) { + u32 ma, mb, mask; unsigned int pdw, pbi; int delta = 0; switch (family) { case AF_INET: - if (sizeof(long) == 4 && prefixlen == 0) - return ntohl(a->a4) - ntohl(b->a4); - return (ntohl(a->a4) & ((~0UL << (32 - prefixlen)))) - - (ntohl(b->a4) & ((~0UL << (32 - prefixlen)))); + if (prefixlen == 0) + return 0; + mask = ~0U << (32 - prefixlen); + ma = ntohl(a->a4) & mask; + mb = ntohl(b->a4) & mask; + if (ma < mb) + delta = -1; + else if (ma > mb) + delta = 1; + break; case AF_INET6: pdw = prefixlen >> 5; pbi = prefixlen & 0x1f; @@ -812,10 +819,13 @@ static int xfrm_policy_addr_delta(const xfrm_address_t *a, return delta; } if (pbi) { - u32 mask = ~0u << (32 - pbi); - - delta = (ntohl(a->a6[pdw]) & mask) - - (ntohl(b->a6[pdw]) & mask); + mask = ~0U << (32 - pbi); + ma = ntohl(a->a6[pdw]) & mask; + mb = ntohl(b->a6[pdw]) & mask; + if (ma < mb) + delta = -1; + else if (ma > mb) + delta = 1; } break; default: diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index 5922941e70c6c..bdf450eaf60cf 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -287,6 +287,47 @@ check_hthresh_repeat() return 0 } +# insert non-overlapping policies in a random order and check that +# all of them can be fetched using the traffic selectors. +check_random_order() +{ + local ns=$1 + local log=$2 + + for i in $(seq 100); do + ip -net $ns xfrm policy flush + for j in $(seq 0 16 255 | sort -R); do + ip -net $ns xfrm policy add dst $j.0.0.0/24 dir out priority 10 action allow + done + for j in $(seq 0 16 255); do + if ! ip -net $ns xfrm policy get dst $j.0.0.0/24 dir out > /dev/null; then + echo "FAIL: $log" 1>&2 + return 1 + fi + done + done + + for i in $(seq 100); do + ip -net $ns xfrm policy flush + for j in $(seq 0 16 255 | sort -R); do + local addr=$(printf "e000:0000:%02x00::/56" $j) + ip -net $ns xfrm policy add dst $addr dir out priority 10 action allow + done + for j in $(seq 0 16 255); do + local addr=$(printf "e000:0000:%02x00::/56" $j) + if ! ip -net $ns xfrm policy get dst $addr dir out > /dev/null; then + echo "FAIL: $log" 1>&2 + return 1 + fi + done + done + + ip -net $ns xfrm policy flush + + echo "PASS: $log" + return 0 +} + #check for needed privileges if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" @@ -438,6 +479,8 @@ check_exceptions "exceptions and block policies after htresh change to normal" check_hthresh_repeat "policies with repeated htresh change" +check_random_order ns3 "policies inserted in random order" + for i in 1 2 3 4;do ip netns del ns$i;done exit $ret -- GitLab From 896111dc4bcf887b835b3ef54f48b450d4692a1d Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sun, 20 Dec 2020 13:29:54 +0100 Subject: [PATCH 0153/2012] media: rc: ensure that uevent can be read directly after rc device register There is a race condition where if the /sys/class/rc0/uevent file is read before rc_dev->registered is set to true, -ENODEV will be returned. Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1901089 Cc: stable@vger.kernel.org Fixes: a2e2d73fa281 ("media: rc: do not access device via sysfs after rc_unregister_device()") Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/rc-main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index 1d811e5ffb557..29d4d01896ff6 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -1928,6 +1928,8 @@ int rc_register_device(struct rc_dev *dev) goto out_raw; } + dev->registered = true; + rc = device_add(&dev->dev); if (rc) goto out_rx_free; @@ -1937,8 +1939,6 @@ int rc_register_device(struct rc_dev *dev) dev->device_name ?: "Unspecified device", path ?: "N/A"); kfree(path); - dev->registered = true; - /* * once the the input device is registered in rc_setup_rx_device, * userspace can open the input device and rc_open() will be called -- GitLab From 42e85f90171a4ba59a1e1cedbbc30ce3f68f2317 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 15 Dec 2020 11:30:26 +0100 Subject: [PATCH 0154/2012] arm64/smp: Remove unused irq variable in arch_show_interrupts() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/arm64/kernel/smp.c: In function ‘arch_show_interrupts’: arch/arm64/kernel/smp.c:808:16: warning: unused variable ‘irq’ [-Wunused-variable] 808 | unsigned int irq = irq_desc_get_irq(ipi_desc[i]); | ^~~ The removal of the last user forgot to remove the variable. Fixes: 5089bc51f81f ("arm64/smp: Use irq_desc_kstat_cpu() in arch_show_interrupts()") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20201215103026.2872532-1-geert+renesas@glider.be Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6bc3a3698c3d1..376343d6f13ae 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -807,7 +807,6 @@ int arch_show_interrupts(struct seq_file *p, int prec) unsigned int cpu, i; for (i = 0; i < NR_IPI; i++) { - unsigned int irq = irq_desc_get_irq(ipi_desc[i]); seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, prec >= 4 ? " " : ""); for_each_online_cpu(cpu) -- GitLab From b614231dec7864a338ce85032aa3d2d7ea2bc46d Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 2 Dec 2020 23:34:58 -0800 Subject: [PATCH 0155/2012] arm64: mte: remove an ISB on kernel exit This ISB is unnecessary because we will soon do an ERET. Signed-off-by: Peter Collingbourne Link: https://linux-review.googlesource.com/id/I69f1ee6bb09b1372dd744a0e01cedaf090c8d448 Link: https://lore.kernel.org/r/20201203073458.2675400-1-pcc@google.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2a93fa5f4e49d..a8c3e7aaca749 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -182,7 +182,6 @@ alternative_else_nop_endif mrs_s \tmp2, SYS_GCR_EL1 bfi \tmp2, \tmp, #0, #16 msr_s SYS_GCR_EL1, \tmp2 - isb #endif .endm @@ -194,6 +193,7 @@ alternative_else_nop_endif ldr_l \tmp, gcr_kernel_excl mte_set_gcr \tmp, \tmp2 + isb 1: #endif .endm -- GitLab From 095507dc1350b3a2b8b39fdc05edba0c10859eca Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Fri, 18 Dec 2020 17:33:07 +0100 Subject: [PATCH 0156/2012] arm64: mm: Fix ARCH_LOW_ADDRESS_LIMIT when !CONFIG_ZONE_DMA Systems configured with CONFIG_ZONE_DMA32, CONFIG_ZONE_NORMAL and !CONFIG_ZONE_DMA will fail to properly setup ARCH_LOW_ADDRESS_LIMIT. The limit will default to ~0ULL, effectively spanning the whole memory, which is too high for a configuration that expects low memory to be capped at 4GB. Fix ARCH_LOW_ADDRESS_LIMIT by falling back to arm64_dma32_phys_limit when arm64_dma_phys_limit isn't set. arm64_dma32_phys_limit will honour CONFIG_ZONE_DMA32, or span the entire memory when not enabled. Fixes: 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32") Signed-off-by: Nicolas Saenz Julienne Link: https://lore.kernel.org/r/20201218163307.10150-1-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 3 ++- arch/arm64/mm/init.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ca2cd75d32863..69ad25fbeae4a 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -94,7 +94,8 @@ #endif /* CONFIG_ARM64_FORCE_52BIT */ extern phys_addr_t arm64_dma_phys_limit; -#define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) +extern phys_addr_t arm64_dma32_phys_limit; +#define ARCH_LOW_ADDRESS_LIMIT ((arm64_dma_phys_limit ? : arm64_dma32_phys_limit) - 1) struct debug_info { #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 75addb36354aa..7deddf56f7c3e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -59,7 +59,7 @@ EXPORT_SYMBOL(memstart_addr); * bit addressable memory area. */ phys_addr_t arm64_dma_phys_limit __ro_after_init; -static phys_addr_t arm64_dma32_phys_limit __ro_after_init; +phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* -- GitLab From 4883a60c17eda6bf52d1c817ee7ead65b4a02da2 Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Mon, 21 Dec 2020 11:00:13 +0100 Subject: [PATCH 0157/2012] mtd: rawnand: gpmi: fix dst bit offset when extracting raw payload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-add the multiply by 8 to "step * eccsize" to correct the destination bit offset when extracting the data payload in gpmi_ecc_read_page_raw(). Fixes: e5e5631cc889 ("mtd: rawnand: gpmi: Use nand_extract_bits()") Cc: stable@vger.kernel.org Reported-by: Martin Hundebøll Signed-off-by: Sean Nyekjaer Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20201221100013.2715675-1-sean@geanix.com --- drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index 5cdf05bcbf8fa..3fa8c22d3f36a 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -1615,7 +1615,7 @@ static int gpmi_ecc_read_page_raw(struct nand_chip *chip, uint8_t *buf, /* Extract interleaved payload data and ECC bits */ for (step = 0; step < nfc_geo->ecc_chunk_count; step++) { if (buf) - nand_extract_bits(buf, step * eccsize, tmp_buf, + nand_extract_bits(buf, step * eccsize * 8, tmp_buf, src_bit_off, eccsize * 8); src_bit_off += eccsize * 8; -- GitLab From 0bd1bf86ab79555425b9f0b63005e181defe4da6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 14:57:29 +0100 Subject: [PATCH 0158/2012] dmaengine: qcom: fix gpi undefined behavior gcc points out an incorrect error handling loop: drivers/dma/qcom/gpi.c: In function 'gpi_ch_init': drivers/dma/qcom/gpi.c:1254:15: error: iteration 2 invokes undefined behavior [-Werror=aggressive-loop-optimizations] 1254 | struct gpii *gpii = gchan->gpii; | ^~~~ drivers/dma/qcom/gpi.c:1951:2: note: within this loop 1951 | for (i = i - 1; i >= 0; i++) { | ^~~ Change the loop to correctly walk backwards through the initialized fields rather than off into the woods. Fixes: 5d0c3533a19f ("dmaengine: qcom: Add GPI dma driver") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210103135738.3741123-1-arnd@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/qcom/gpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index 556c070a514ca..1a0bf6b0567a5 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -1948,7 +1948,7 @@ static int gpi_ch_init(struct gchan *gchan) return ret; error_start_chan: - for (i = i - 1; i >= 0; i++) { + for (i = i - 1; i >= 0; i--) { gpi_stop_chan(&gpii->gchan[i]); gpi_send_cmd(gpii, gchan, GPI_CH_CMD_RESET); } -- GitLab From 99974aedbd73523969afb09f33c6e3047cd0ddae Mon Sep 17 00:00:00 2001 From: Shravya Kumbham Date: Wed, 23 Dec 2020 16:51:00 +0530 Subject: [PATCH 0159/2012] dmaengine: xilinx_dma: check dma_async_device_register return value dma_async_device_register() can return non-zero error code. Add condition to check the return value of dma_async_device_register function and handle the error path. Addresses-Coverity: Event check_return. Fixes: 9cd4360de609 ("dma: Add Xilinx AXI Video Direct Memory Access Engine driver support") Signed-off-by: Shravya Kumbham Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1608722462-29519-2-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 22faea653ea82..091f1f80dcff6 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -3112,7 +3112,11 @@ static int xilinx_dma_probe(struct platform_device *pdev) } /* Register the DMA engine with the core */ - dma_async_device_register(&xdev->common); + err = dma_async_device_register(&xdev->common); + if (err) { + dev_err(xdev->dev, "failed to register the dma device\n"); + goto error; + } err = of_dma_controller_register(node, of_dma_xilinx_xlate, xdev); -- GitLab From faeb0731be0a31e2246b21a85fa7dabbd750101d Mon Sep 17 00:00:00 2001 From: Shravya Kumbham Date: Wed, 23 Dec 2020 16:51:01 +0530 Subject: [PATCH 0160/2012] dmaengine: xilinx_dma: fix incompatible param warning in _child_probe() In xilinx_dma_child_probe function, the nr_channels variable is passed to of_property_read_u32() which expects an u32 return value pointer. Modify the nr_channels variable type from int to u32 to fix the incompatible parameter coverity warning. Addresses-Coverity: Event incompatible_param. Fixes: 1a9e7a03c761 ("dmaengine: vdma: Add support for mulit-channel dma mode") Signed-off-by: Shravya Kumbham Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1608722462-29519-3-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 091f1f80dcff6..3e374e8d22444 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2900,7 +2900,8 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev, struct device_node *node) { - int ret, i, nr_channels = 1; + int ret, i; + u32 nr_channels = 1; ret = of_property_read_u32(node, "dma-channels", &nr_channels); if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA && ret < 0) -- GitLab From 2d5efea64472469117dc1a9a39530069e95b21e9 Mon Sep 17 00:00:00 2001 From: Shravya Kumbham Date: Wed, 23 Dec 2020 16:51:02 +0530 Subject: [PATCH 0161/2012] dmaengine: xilinx_dma: fix mixed_enum_type coverity warning Typecast the fls(width -1) with (enum dmaengine_alignment) in xilinx_dma_chan_probe function to fix the coverity warning. Addresses-Coverity: Event mixed_enum_type. Fixes: 9cd4360de609 ("dma: Add Xilinx AXI Video Direct Memory Access Engine driver support") Signed-off-by: Shravya Kumbham Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/1608722462-29519-4-git-send-email-radhey.shyam.pandey@xilinx.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 3e374e8d22444..79777550a6ffc 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -2781,7 +2781,7 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev, has_dre = false; if (!has_dre) - xdev->common.copy_align = fls(width - 1); + xdev->common.copy_align = (enum dmaengine_alignment)fls(width - 1); if (of_device_is_compatible(node, "xlnx,axi-vdma-mm2s-channel") || of_device_is_compatible(node, "xlnx,axi-dma-mm2s-channel") || -- GitLab From 98bf2d3f4970179c702ef64db658e0553bc6ef3a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Dec 2020 07:11:18 +0000 Subject: [PATCH 0162/2012] powerpc/32s: Fix RTAS machine check with VMAP stack When we have VMAP stack, exception prolog 1 sets r1, not r11. When it is not an RTAS machine check, don't trash r1 because it is needed by prolog 1. Fixes: da7bb43ab9da ("powerpc/32: Fix vmap stack - Properly set r1 before activating MMU") Fixes: d2e006036082 ("powerpc/32: Use SPRN_SPRG_SCRATCH2 in exception prologs") Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Christophe Leroy [mpe: Squash in fixup for RTAS machine check from Christophe] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/bc77d61d1c18940e456a2dee464f1e2eda65a3f0.1608621048.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 349bf3f0c3afa..858fbc8b19f32 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -260,10 +260,19 @@ __secondary_hold_acknowledge: MachineCheck: EXCEPTION_PROLOG_0 #ifdef CONFIG_PPC_CHRP +#ifdef CONFIG_VMAP_STACK + mtspr SPRN_SPRG_SCRATCH2,r1 + mfspr r1, SPRN_SPRG_THREAD + lwz r1, RTAS_SP(r1) + cmpwi cr1, r1, 0 + bne cr1, 7f + mfspr r1, SPRN_SPRG_SCRATCH2 +#else mfspr r11, SPRN_SPRG_THREAD lwz r11, RTAS_SP(r11) cmpwi cr1, r11, 0 bne cr1, 7f +#endif #endif /* CONFIG_PPC_CHRP */ EXCEPTION_PROLOG_1 for_rtas=1 7: EXCEPTION_PROLOG_2 -- GitLab From c3d6eb6e54373f297313b65c1f2319d36914d579 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 30 Dec 2020 20:44:07 +0800 Subject: [PATCH 0163/2012] HID: multitouch: Enable multi-input for Synaptics pointstick/touchpad device Pointstick and its left/right buttons on HP EliteBook 850 G7 need multi-input quirk to work correctly. Signed-off-by: Kai-Heng Feng Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index d670bcd57bdef..0743ef51d3b24 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -2054,6 +2054,10 @@ static const struct hid_device_id mt_devices[] = { HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, USB_VENDOR_ID_SYNAPTICS, 0xce08) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT, + HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8, + USB_VENDOR_ID_SYNAPTICS, 0xce09) }, + /* TopSeed panels */ { .driver_data = MT_CLS_TOPSEED, MT_USB_DEVICE(USB_VENDOR_ID_TOPSEED2, -- GitLab From 4b1a60a1bb8f03d82c3f6da424adc96667b59f2a Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Mon, 4 Jan 2021 15:50:43 +0200 Subject: [PATCH 0164/2012] MAINTAINERS: Update Georgi's email address Use my kernel.org email as main address to make things a bit easier for me to handle. Link: https://lore.kernel.org/r/20210104135043.31262-1-georgi.djakov@linaro.org Signed-off-by: Georgi Djakov --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9f..a15e306123ef8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9240,7 +9240,7 @@ F: tools/testing/selftests/sgx/* K: \bSGX_ INTERCONNECT API -M: Georgi Djakov +M: Georgi Djakov L: linux-pm@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/interconnect/ -- GitLab From 9a85c09a3f507b925d75cb0c7c8f364467038052 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 11 Dec 2020 23:28:09 +0000 Subject: [PATCH 0165/2012] pinctrl: ingenic: Fix JZ4760 support - JZ4760 and JZ4760B have a similar register layout as the JZ4740, and don't use the new register layout, which was introduced with the JZ4770 SoC and not the JZ4760 or JZ4760B SoCs. - The JZ4740 code path only expected two function modes to be configurable for each pin, and wouldn't work with more than two. Fix it for the JZ4760, which has four configurable function modes. Fixes: 0257595a5cf4 ("pinctrl: Ingenic: Add pinctrl driver for JZ4760 and JZ4760B.") Cc: # 5.3 Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20201211232810.261565-1-paul@crapouillou.net Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-ingenic.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c index 53a6a24bd0527..8ac3091c44691 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -1688,7 +1688,7 @@ static inline bool ingenic_gpio_get_value(struct ingenic_gpio_chip *jzgc, static void ingenic_gpio_set_value(struct ingenic_gpio_chip *jzgc, u8 offset, int value) { - if (jzgc->jzpc->info->version >= ID_JZ4760) + if (jzgc->jzpc->info->version >= ID_JZ4770) ingenic_gpio_set_bit(jzgc, JZ4760_GPIO_PAT0, offset, !!value); else ingenic_gpio_set_bit(jzgc, JZ4740_GPIO_DATA, offset, !!value); @@ -1718,7 +1718,7 @@ static void irq_set_type(struct ingenic_gpio_chip *jzgc, break; } - if (jzgc->jzpc->info->version >= ID_JZ4760) { + if (jzgc->jzpc->info->version >= ID_JZ4770) { reg1 = JZ4760_GPIO_PAT1; reg2 = JZ4760_GPIO_PAT0; } else { @@ -1758,7 +1758,7 @@ static void ingenic_gpio_irq_enable(struct irq_data *irqd) struct ingenic_gpio_chip *jzgc = gpiochip_get_data(gc); int irq = irqd->hwirq; - if (jzgc->jzpc->info->version >= ID_JZ4760) + if (jzgc->jzpc->info->version >= ID_JZ4770) ingenic_gpio_set_bit(jzgc, JZ4760_GPIO_INT, irq, true); else ingenic_gpio_set_bit(jzgc, JZ4740_GPIO_SELECT, irq, true); @@ -1774,7 +1774,7 @@ static void ingenic_gpio_irq_disable(struct irq_data *irqd) ingenic_gpio_irq_mask(irqd); - if (jzgc->jzpc->info->version >= ID_JZ4760) + if (jzgc->jzpc->info->version >= ID_JZ4770) ingenic_gpio_set_bit(jzgc, JZ4760_GPIO_INT, irq, false); else ingenic_gpio_set_bit(jzgc, JZ4740_GPIO_SELECT, irq, false); @@ -1799,7 +1799,7 @@ static void ingenic_gpio_irq_ack(struct irq_data *irqd) irq_set_type(jzgc, irq, IRQ_TYPE_LEVEL_HIGH); } - if (jzgc->jzpc->info->version >= ID_JZ4760) + if (jzgc->jzpc->info->version >= ID_JZ4770) ingenic_gpio_set_bit(jzgc, JZ4760_GPIO_FLAG, irq, false); else ingenic_gpio_set_bit(jzgc, JZ4740_GPIO_DATA, irq, true); @@ -1856,7 +1856,7 @@ static void ingenic_gpio_irq_handler(struct irq_desc *desc) chained_irq_enter(irq_chip, desc); - if (jzgc->jzpc->info->version >= ID_JZ4760) + if (jzgc->jzpc->info->version >= ID_JZ4770) flag = ingenic_gpio_read_reg(jzgc, JZ4760_GPIO_FLAG); else flag = ingenic_gpio_read_reg(jzgc, JZ4740_GPIO_FLAG); @@ -1938,7 +1938,7 @@ static int ingenic_gpio_get_direction(struct gpio_chip *gc, unsigned int offset) struct ingenic_pinctrl *jzpc = jzgc->jzpc; unsigned int pin = gc->base + offset; - if (jzpc->info->version >= ID_JZ4760) { + if (jzpc->info->version >= ID_JZ4770) { if (ingenic_get_pin_config(jzpc, pin, JZ4760_GPIO_INT) || ingenic_get_pin_config(jzpc, pin, JZ4760_GPIO_PAT1)) return GPIO_LINE_DIRECTION_IN; @@ -1996,7 +1996,7 @@ static int ingenic_pinmux_set_pin_fn(struct ingenic_pinctrl *jzpc, ingenic_shadow_config_pin(jzpc, pin, JZ4760_GPIO_PAT1, func & 0x2); ingenic_shadow_config_pin(jzpc, pin, JZ4760_GPIO_PAT0, func & 0x1); ingenic_shadow_config_pin_load(jzpc, pin); - } else if (jzpc->info->version >= ID_JZ4760) { + } else if (jzpc->info->version >= ID_JZ4770) { ingenic_config_pin(jzpc, pin, JZ4760_GPIO_INT, false); ingenic_config_pin(jzpc, pin, GPIO_MSK, false); ingenic_config_pin(jzpc, pin, JZ4760_GPIO_PAT1, func & 0x2); @@ -2004,7 +2004,7 @@ static int ingenic_pinmux_set_pin_fn(struct ingenic_pinctrl *jzpc, } else { ingenic_config_pin(jzpc, pin, JZ4740_GPIO_FUNC, true); ingenic_config_pin(jzpc, pin, JZ4740_GPIO_TRIG, func & 0x2); - ingenic_config_pin(jzpc, pin, JZ4740_GPIO_SELECT, func > 0); + ingenic_config_pin(jzpc, pin, JZ4740_GPIO_SELECT, func & 0x1); } return 0; @@ -2061,7 +2061,7 @@ static int ingenic_pinmux_gpio_set_direction(struct pinctrl_dev *pctldev, ingenic_shadow_config_pin(jzpc, pin, GPIO_MSK, true); ingenic_shadow_config_pin(jzpc, pin, JZ4760_GPIO_PAT1, input); ingenic_shadow_config_pin_load(jzpc, pin); - } else if (jzpc->info->version >= ID_JZ4760) { + } else if (jzpc->info->version >= ID_JZ4770) { ingenic_config_pin(jzpc, pin, JZ4760_GPIO_INT, false); ingenic_config_pin(jzpc, pin, GPIO_MSK, true); ingenic_config_pin(jzpc, pin, JZ4760_GPIO_PAT1, input); @@ -2091,7 +2091,7 @@ static int ingenic_pinconf_get(struct pinctrl_dev *pctldev, unsigned int offt = pin / PINS_PER_GPIO_CHIP; bool pull; - if (jzpc->info->version >= ID_JZ4760) + if (jzpc->info->version >= ID_JZ4770) pull = !ingenic_get_pin_config(jzpc, pin, JZ4760_GPIO_PEN); else pull = !ingenic_get_pin_config(jzpc, pin, JZ4740_GPIO_PULL_DIS); @@ -2141,7 +2141,7 @@ static void ingenic_set_bias(struct ingenic_pinctrl *jzpc, REG_SET(X1830_GPIO_PEH), bias << idxh); } - } else if (jzpc->info->version >= ID_JZ4760) { + } else if (jzpc->info->version >= ID_JZ4770) { ingenic_config_pin(jzpc, pin, JZ4760_GPIO_PEN, !bias); } else { ingenic_config_pin(jzpc, pin, JZ4740_GPIO_PULL_DIS, !bias); @@ -2151,7 +2151,7 @@ static void ingenic_set_bias(struct ingenic_pinctrl *jzpc, static void ingenic_set_output_level(struct ingenic_pinctrl *jzpc, unsigned int pin, bool high) { - if (jzpc->info->version >= ID_JZ4760) + if (jzpc->info->version >= ID_JZ4770) ingenic_config_pin(jzpc, pin, JZ4760_GPIO_PAT0, high); else ingenic_config_pin(jzpc, pin, JZ4740_GPIO_DATA, high); -- GitLab From b4aa4876e58d12fb3ace425969dcbf4df37aa254 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Fri, 11 Dec 2020 23:28:10 +0000 Subject: [PATCH 0166/2012] pinctrl: ingenic: Rename registers from JZ4760_GPIO_* to JZ4770_GPIO_* Now that JZ4760 support has been fixed, it looks wrong to have JZ4760_GPIO_* registers being written if the SoC is a JZ4770 or later. Signed-off-by: Paul Cercueil Link: https://lore.kernel.org/r/20201211232810.261565-2-paul@crapouillou.net Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-ingenic.c | 54 +++++++++++++++---------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c index 8ac3091c44691..3ea163498647f 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -37,11 +37,11 @@ #define JZ4740_GPIO_TRIG 0x70 #define JZ4740_GPIO_FLAG 0x80 -#define JZ4760_GPIO_INT 0x10 -#define JZ4760_GPIO_PAT1 0x30 -#define JZ4760_GPIO_PAT0 0x40 -#define JZ4760_GPIO_FLAG 0x50 -#define JZ4760_GPIO_PEN 0x70 +#define JZ4770_GPIO_INT 0x10 +#define JZ4770_GPIO_PAT1 0x30 +#define JZ4770_GPIO_PAT0 0x40 +#define JZ4770_GPIO_FLAG 0x50 +#define JZ4770_GPIO_PEN 0x70 #define X1830_GPIO_PEL 0x110 #define X1830_GPIO_PEH 0x120 @@ -1689,7 +1689,7 @@ static void ingenic_gpio_set_value(struct ingenic_gpio_chip *jzgc, u8 offset, int value) { if (jzgc->jzpc->info->version >= ID_JZ4770) - ingenic_gpio_set_bit(jzgc, JZ4760_GPIO_PAT0, offset, !!value); + ingenic_gpio_set_bit(jzgc, JZ4770_GPIO_PAT0, offset, !!value); else ingenic_gpio_set_bit(jzgc, JZ4740_GPIO_DATA, offset, !!value); } @@ -1719,8 +1719,8 @@ static void irq_set_type(struct ingenic_gpio_chip *jzgc, } if (jzgc->jzpc->info->version >= ID_JZ4770) { - reg1 = JZ4760_GPIO_PAT1; - reg2 = JZ4760_GPIO_PAT0; + reg1 = JZ4770_GPIO_PAT1; + reg2 = JZ4770_GPIO_PAT0; } else { reg1 = JZ4740_GPIO_TRIG; reg2 = JZ4740_GPIO_DIR; @@ -1759,7 +1759,7 @@ static void ingenic_gpio_irq_enable(struct irq_data *irqd) int irq = irqd->hwirq; if (jzgc->jzpc->info->version >= ID_JZ4770) - ingenic_gpio_set_bit(jzgc, JZ4760_GPIO_INT, irq, true); + ingenic_gpio_set_bit(jzgc, JZ4770_GPIO_INT, irq, true); else ingenic_gpio_set_bit(jzgc, JZ4740_GPIO_SELECT, irq, true); @@ -1775,7 +1775,7 @@ static void ingenic_gpio_irq_disable(struct irq_data *irqd) ingenic_gpio_irq_mask(irqd); if (jzgc->jzpc->info->version >= ID_JZ4770) - ingenic_gpio_set_bit(jzgc, JZ4760_GPIO_INT, irq, false); + ingenic_gpio_set_bit(jzgc, JZ4770_GPIO_INT, irq, false); else ingenic_gpio_set_bit(jzgc, JZ4740_GPIO_SELECT, irq, false); } @@ -1800,7 +1800,7 @@ static void ingenic_gpio_irq_ack(struct irq_data *irqd) } if (jzgc->jzpc->info->version >= ID_JZ4770) - ingenic_gpio_set_bit(jzgc, JZ4760_GPIO_FLAG, irq, false); + ingenic_gpio_set_bit(jzgc, JZ4770_GPIO_FLAG, irq, false); else ingenic_gpio_set_bit(jzgc, JZ4740_GPIO_DATA, irq, true); } @@ -1857,7 +1857,7 @@ static void ingenic_gpio_irq_handler(struct irq_desc *desc) chained_irq_enter(irq_chip, desc); if (jzgc->jzpc->info->version >= ID_JZ4770) - flag = ingenic_gpio_read_reg(jzgc, JZ4760_GPIO_FLAG); + flag = ingenic_gpio_read_reg(jzgc, JZ4770_GPIO_FLAG); else flag = ingenic_gpio_read_reg(jzgc, JZ4740_GPIO_FLAG); @@ -1939,8 +1939,8 @@ static int ingenic_gpio_get_direction(struct gpio_chip *gc, unsigned int offset) unsigned int pin = gc->base + offset; if (jzpc->info->version >= ID_JZ4770) { - if (ingenic_get_pin_config(jzpc, pin, JZ4760_GPIO_INT) || - ingenic_get_pin_config(jzpc, pin, JZ4760_GPIO_PAT1)) + if (ingenic_get_pin_config(jzpc, pin, JZ4770_GPIO_INT) || + ingenic_get_pin_config(jzpc, pin, JZ4770_GPIO_PAT1)) return GPIO_LINE_DIRECTION_IN; return GPIO_LINE_DIRECTION_OUT; } @@ -1991,16 +1991,16 @@ static int ingenic_pinmux_set_pin_fn(struct ingenic_pinctrl *jzpc, 'A' + offt, idx, func); if (jzpc->info->version >= ID_X1000) { - ingenic_shadow_config_pin(jzpc, pin, JZ4760_GPIO_INT, false); + ingenic_shadow_config_pin(jzpc, pin, JZ4770_GPIO_INT, false); ingenic_shadow_config_pin(jzpc, pin, GPIO_MSK, false); - ingenic_shadow_config_pin(jzpc, pin, JZ4760_GPIO_PAT1, func & 0x2); - ingenic_shadow_config_pin(jzpc, pin, JZ4760_GPIO_PAT0, func & 0x1); + ingenic_shadow_config_pin(jzpc, pin, JZ4770_GPIO_PAT1, func & 0x2); + ingenic_shadow_config_pin(jzpc, pin, JZ4770_GPIO_PAT0, func & 0x1); ingenic_shadow_config_pin_load(jzpc, pin); } else if (jzpc->info->version >= ID_JZ4770) { - ingenic_config_pin(jzpc, pin, JZ4760_GPIO_INT, false); + ingenic_config_pin(jzpc, pin, JZ4770_GPIO_INT, false); ingenic_config_pin(jzpc, pin, GPIO_MSK, false); - ingenic_config_pin(jzpc, pin, JZ4760_GPIO_PAT1, func & 0x2); - ingenic_config_pin(jzpc, pin, JZ4760_GPIO_PAT0, func & 0x1); + ingenic_config_pin(jzpc, pin, JZ4770_GPIO_PAT1, func & 0x2); + ingenic_config_pin(jzpc, pin, JZ4770_GPIO_PAT0, func & 0x1); } else { ingenic_config_pin(jzpc, pin, JZ4740_GPIO_FUNC, true); ingenic_config_pin(jzpc, pin, JZ4740_GPIO_TRIG, func & 0x2); @@ -2057,14 +2057,14 @@ static int ingenic_pinmux_gpio_set_direction(struct pinctrl_dev *pctldev, 'A' + offt, idx, input ? "in" : "out"); if (jzpc->info->version >= ID_X1000) { - ingenic_shadow_config_pin(jzpc, pin, JZ4760_GPIO_INT, false); + ingenic_shadow_config_pin(jzpc, pin, JZ4770_GPIO_INT, false); ingenic_shadow_config_pin(jzpc, pin, GPIO_MSK, true); - ingenic_shadow_config_pin(jzpc, pin, JZ4760_GPIO_PAT1, input); + ingenic_shadow_config_pin(jzpc, pin, JZ4770_GPIO_PAT1, input); ingenic_shadow_config_pin_load(jzpc, pin); } else if (jzpc->info->version >= ID_JZ4770) { - ingenic_config_pin(jzpc, pin, JZ4760_GPIO_INT, false); + ingenic_config_pin(jzpc, pin, JZ4770_GPIO_INT, false); ingenic_config_pin(jzpc, pin, GPIO_MSK, true); - ingenic_config_pin(jzpc, pin, JZ4760_GPIO_PAT1, input); + ingenic_config_pin(jzpc, pin, JZ4770_GPIO_PAT1, input); } else { ingenic_config_pin(jzpc, pin, JZ4740_GPIO_SELECT, false); ingenic_config_pin(jzpc, pin, JZ4740_GPIO_DIR, !input); @@ -2092,7 +2092,7 @@ static int ingenic_pinconf_get(struct pinctrl_dev *pctldev, bool pull; if (jzpc->info->version >= ID_JZ4770) - pull = !ingenic_get_pin_config(jzpc, pin, JZ4760_GPIO_PEN); + pull = !ingenic_get_pin_config(jzpc, pin, JZ4770_GPIO_PEN); else pull = !ingenic_get_pin_config(jzpc, pin, JZ4740_GPIO_PULL_DIS); @@ -2142,7 +2142,7 @@ static void ingenic_set_bias(struct ingenic_pinctrl *jzpc, } } else if (jzpc->info->version >= ID_JZ4770) { - ingenic_config_pin(jzpc, pin, JZ4760_GPIO_PEN, !bias); + ingenic_config_pin(jzpc, pin, JZ4770_GPIO_PEN, !bias); } else { ingenic_config_pin(jzpc, pin, JZ4740_GPIO_PULL_DIS, !bias); } @@ -2152,7 +2152,7 @@ static void ingenic_set_output_level(struct ingenic_pinctrl *jzpc, unsigned int pin, bool high) { if (jzpc->info->version >= ID_JZ4770) - ingenic_config_pin(jzpc, pin, JZ4760_GPIO_PAT0, high); + ingenic_config_pin(jzpc, pin, JZ4770_GPIO_PAT0, high); else ingenic_config_pin(jzpc, pin, JZ4740_GPIO_DATA, high); } -- GitLab From f981dc171c04c6cf5a35c712543b231ebf805832 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Sun, 20 Dec 2020 23:18:58 -0800 Subject: [PATCH 0167/2012] tools/power/x86/intel-speed-select: Set scaling_max_freq to base_frequency When BIOS disables turbo, The scaling_max_freq in cpufreq sysfs will be limited to config level 0 base frequency. But when user selects a higher config levels, this will result in higher base frequency. But since scaling_max_freq is still old base frequency, the performance will still be limited. So when the turbo is disabled and cpufreq base_frequency is higher than scaling_max_freq, update the scaling_max_freq to the base_frequency. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20201221071859.2783957-2-srinivas.pandruvada@linux.intel.com Signed-off-by: Hans de Goede --- .../x86/intel-speed-select/isst-config.c | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 5390158cdb401..9b6f0e6f150db 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -1249,6 +1249,8 @@ static void dump_isst_config(int arg) isst_ctdp_display_information_end(outf); } +static void adjust_scaling_max_from_base_freq(int cpu); + static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, void *arg4) { @@ -1267,6 +1269,9 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, int pkg_id = get_physical_package_id(cpu); int die_id = get_physical_die_id(cpu); + /* Wait for updated base frequencies */ + usleep(2000); + fprintf(stderr, "Option is set to online/offline\n"); ctdp_level.core_cpumask_size = alloc_cpu_set(&ctdp_level.core_cpumask); @@ -1283,6 +1288,7 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, if (CPU_ISSET_S(i, ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) { fprintf(stderr, "online cpu %d\n", i); set_cpu_online_offline(i, 1); + adjust_scaling_max_from_base_freq(i); } else { fprintf(stderr, "offline cpu %d\n", i); set_cpu_online_offline(i, 0); @@ -1440,6 +1446,21 @@ static int set_cpufreq_scaling_min_max(int cpu, int max, int freq) return 0; } +static int no_turbo(void) +{ + return parse_int_file(0, "/sys/devices/system/cpu/intel_pstate/no_turbo"); +} + +static void adjust_scaling_max_from_base_freq(int cpu) +{ + int base_freq, scaling_max_freq; + + scaling_max_freq = parse_int_file(0, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_max_freq", cpu); + base_freq = get_cpufreq_base_freq(cpu); + if (scaling_max_freq < base_freq || no_turbo()) + set_cpufreq_scaling_min_max(cpu, 1, base_freq); +} + static int set_clx_pbf_cpufreq_scaling_min_max(int cpu) { struct isst_pkg_ctdp_level_info *ctdp_level; -- GitLab From bbaa2e95e23e74791dd75b90d5ad9aad535acc6e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Sun, 20 Dec 2020 23:18:59 -0800 Subject: [PATCH 0168/2012] tools/power/x86/intel-speed-select: Set higher of cpuinfo_max_freq or base_frequency In some case when BIOS disabled turbo, cpufreq cpuinfo_max_freq can be lower than base_frequency at higher config level. So, in that case set scaling_min_freq to base_frequency. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20201221071859.2783957-3-srinivas.pandruvada@linux.intel.com Signed-off-by: Hans de Goede --- tools/power/x86/intel-speed-select/isst-config.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 9b6f0e6f150db..09cb3a6672f3e 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -1461,6 +1461,16 @@ static void adjust_scaling_max_from_base_freq(int cpu) set_cpufreq_scaling_min_max(cpu, 1, base_freq); } +static void adjust_scaling_min_from_base_freq(int cpu) +{ + int base_freq, scaling_min_freq; + + scaling_min_freq = parse_int_file(0, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_min_freq", cpu); + base_freq = get_cpufreq_base_freq(cpu); + if (scaling_min_freq < base_freq) + set_cpufreq_scaling_min_max(cpu, 0, base_freq); +} + static int set_clx_pbf_cpufreq_scaling_min_max(int cpu) { struct isst_pkg_ctdp_level_info *ctdp_level; @@ -1558,6 +1568,7 @@ static void set_scaling_min_to_cpuinfo_max(int cpu) continue; set_cpufreq_scaling_min_max_from_cpuinfo(i, 1, 0); + adjust_scaling_min_from_base_freq(i); } } -- GitLab From 6a4b1f2dff558d75ee4f2ce88595a933b6918183 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 14 Dec 2020 15:33:36 -0800 Subject: [PATCH 0169/2012] platform/surface: surface_gpe: Fix non-PM_SLEEP build warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix build warnings when CONFIG_PM_SLEEP is not enabled and these functions are not used: ../drivers/platform/surface/surface_gpe.c:189:12: warning: ‘surface_gpe_resume’ defined but not used [-Wunused-function] static int surface_gpe_resume(struct device *dev) ^~~~~~~~~~~~~~~~~~ ../drivers/platform/surface/surface_gpe.c:184:12: warning: ‘surface_gpe_suspend’ defined but not used [-Wunused-function] static int surface_gpe_suspend(struct device *dev) ^~~~~~~~~~~~~~~~~~~ Fixes: 274335f1c557 ("platform/surface: Add Driver to set up lid GPEs on MS Surface device") Signed-off-by: Randy Dunlap Cc: Maximilian Luz Cc: Hans de Goede Cc: platform-driver-x86@vger.kernel.org Reviewed-by: Maximilian Luz Link: https://lore.kernel.org/r/20201214233336.19782-1-rdunlap@infradead.org Signed-off-by: Hans de Goede --- drivers/platform/surface/surface_gpe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/surface/surface_gpe.c b/drivers/platform/surface/surface_gpe.c index e49e5d6d5d4e1..86f6991b12157 100644 --- a/drivers/platform/surface/surface_gpe.c +++ b/drivers/platform/surface/surface_gpe.c @@ -181,12 +181,12 @@ static int surface_lid_enable_wakeup(struct device *dev, bool enable) return 0; } -static int surface_gpe_suspend(struct device *dev) +static int __maybe_unused surface_gpe_suspend(struct device *dev) { return surface_lid_enable_wakeup(dev, true); } -static int surface_gpe_resume(struct device *dev) +static int __maybe_unused surface_gpe_resume(struct device *dev) { return surface_lid_enable_wakeup(dev, false); } -- GitLab From e4a02c7a0e3aeb966e4fdc4a43dce978c467fe83 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 16 Dec 2020 14:37:52 +0100 Subject: [PATCH 0170/2012] platform/surface: SURFACE_PLATFORMS should depend on ACPI All Microsoft Surface platform-specific device drivers depend on ACPI, but the gatekeeper symbol SURFACE_PLATFORMS does not. Hence when the user is configuring a kernel without ACPI support, he is still asked about Microsoft Surface drivers, even though this question is irrelevant. Fix this by moving the dependency on ACPI from the individual driver symbols to SURFACE_PLATFORMS. Signed-off-by: Geert Uytterhoeven Reviewed-by: Maximilian Luz Link: https://lore.kernel.org/r/20201216133752.1321978-1-geert@linux-m68k.org Signed-off-by: Hans de Goede --- drivers/platform/surface/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/platform/surface/Kconfig b/drivers/platform/surface/Kconfig index 33040b0b3b799..2c941cdac9eed 100644 --- a/drivers/platform/surface/Kconfig +++ b/drivers/platform/surface/Kconfig @@ -5,6 +5,7 @@ menuconfig SURFACE_PLATFORMS bool "Microsoft Surface Platform-Specific Device Drivers" + depends on ACPI default y help Say Y here to get to see options for platform-specific device drivers @@ -29,20 +30,19 @@ config SURFACE3_WMI config SURFACE_3_BUTTON tristate "Power/home/volume buttons driver for Microsoft Surface 3 tablet" - depends on ACPI && KEYBOARD_GPIO && I2C + depends on KEYBOARD_GPIO && I2C help This driver handles the power/home/volume buttons on the Microsoft Surface 3 tablet. config SURFACE_3_POWER_OPREGION tristate "Surface 3 battery platform operation region support" - depends on ACPI && I2C + depends on I2C help This driver provides support for ACPI operation region of the Surface 3 battery platform driver. config SURFACE_GPE tristate "Surface GPE/Lid Support Driver" - depends on ACPI depends on DMI help This driver marks the GPEs related to the ACPI lid device found on @@ -52,7 +52,7 @@ config SURFACE_GPE config SURFACE_PRO3_BUTTON tristate "Power/home/volume buttons driver for Microsoft Surface Pro 3/4 tablet" - depends on ACPI && INPUT + depends on INPUT help This driver handles the power/home/volume buttons on the Microsoft Surface Pro 3/4 tablet. -- GitLab From 9bba96275576da0cf78ede62aeb2fc975ed8a32d Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 23 Dec 2020 17:36:44 +0300 Subject: [PATCH 0171/2012] platform/x86: i2c-multi-instantiate: Don't create platform device for INT3515 ACPI nodes There are several reports about the tps6598x causing interrupt flood on boards with the INT3515 ACPI node, which then causes instability. There appears to be several problems with the interrupt. One problem is that the I2CSerialBus resources do not always map to the Interrupt resource with the same index, but that is not the only problem. We have not been able to come up with a solution for all the issues, and because of that disabling the device for now. The PD controller on these platforms is autonomous, and the purpose for the driver is primarily to supply status to the userspace, so this will not affect any functionality. Reported-by: Moody Salem Fixes: a3dd034a1707 ("ACPI / scan: Create platform device for INT3515 ACPI nodes") Cc: stable@vger.kernel.org BugLink: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1883511 Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20201223143644.33341-1-heikki.krogerus@linux.intel.com Signed-off-by: Hans de Goede --- drivers/platform/x86/i2c-multi-instantiate.c | 31 +++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/platform/x86/i2c-multi-instantiate.c b/drivers/platform/x86/i2c-multi-instantiate.c index b457b0babde39..2cce82579d091 100644 --- a/drivers/platform/x86/i2c-multi-instantiate.c +++ b/drivers/platform/x86/i2c-multi-instantiate.c @@ -164,13 +164,29 @@ static const struct i2c_inst_data bsg2150_data[] = { {} }; -static const struct i2c_inst_data int3515_data[] = { - { "tps6598x", IRQ_RESOURCE_APIC, 0 }, - { "tps6598x", IRQ_RESOURCE_APIC, 1 }, - { "tps6598x", IRQ_RESOURCE_APIC, 2 }, - { "tps6598x", IRQ_RESOURCE_APIC, 3 }, - {} -}; +/* + * Device with _HID INT3515 (TI PD controllers) has some unresolved interrupt + * issues. The most common problem seen is interrupt flood. + * + * There are at least two known causes. Firstly, on some boards, the + * I2CSerialBus resource index does not match the Interrupt resource, i.e. they + * are not one-to-one mapped like in the array below. Secondly, on some boards + * the IRQ line from the PD controller is not actually connected at all. But the + * interrupt flood is also seen on some boards where those are not a problem, so + * there are some other problems as well. + * + * Because of the issues with the interrupt, the device is disabled for now. If + * you wish to debug the issues, uncomment the below, and add an entry for the + * INT3515 device to the i2c_multi_instance_ids table. + * + * static const struct i2c_inst_data int3515_data[] = { + * { "tps6598x", IRQ_RESOURCE_APIC, 0 }, + * { "tps6598x", IRQ_RESOURCE_APIC, 1 }, + * { "tps6598x", IRQ_RESOURCE_APIC, 2 }, + * { "tps6598x", IRQ_RESOURCE_APIC, 3 }, + * { } + * }; + */ /* * Note new device-ids must also be added to i2c_multi_instantiate_ids in @@ -179,7 +195,6 @@ static const struct i2c_inst_data int3515_data[] = { static const struct acpi_device_id i2c_multi_inst_acpi_ids[] = { { "BSG1160", (unsigned long)bsg1160_data }, { "BSG2150", (unsigned long)bsg2150_data }, - { "INT3515", (unsigned long)int3515_data }, { } }; MODULE_DEVICE_TABLE(acpi, i2c_multi_inst_acpi_ids); -- GitLab From 46c54cf2706122c37497896d56d67b0c0aca2ede Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 24 Dec 2020 14:51:58 +0100 Subject: [PATCH 0172/2012] platform/x86: touchscreen_dmi: Add swap-x-y quirk for Goodix touchscreen on Estar Beauty HD tablet The Estar Beauty HD (MID 7316R) tablet uses a Goodix touchscreen, with the X and Y coordinates swapped compared to the LCD panel. Add a touchscreen_dmi entry for this adding a "touchscreen-swapped-x-y" device-property to the i2c-client instantiated for this device before the driver binds. This is the first entry of a Goodix touchscreen to touchscreen_dmi.c, so far DMI quirks for Goodix touchscreen's have been added directly to drivers/input/touchscreen/goodix.c. Currently there are 3 DMI tables in goodix.c: 1. rotated_screen[] for devices where the touchscreen is rotated 180 degrees vs the LCD panel 2. inverted_x_screen[] for devices where the X axis is inverted 3. nine_bytes_report[] for devices which use a non standard touch report size Arguably only 3. really needs to be inside the driver and the other 2 cases are better handled through the generic touchscreen DMI quirk mechanism from touchscreen_dmi.c, which allows adding device-props to any i2c-client. Esp. now that goodix.c is using the generic touchscreen_properties code. Alternative to the approach from this patch we could add a 4th dmi_system_id table for devices with swapped-x-y axis to goodix.c, but that seems undesirable. Cc: Bastien Nocera Cc: Dmitry Torokhov Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20201224135158.10976-1-hdegoede@redhat.com --- drivers/platform/x86/touchscreen_dmi.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 5783139d0a119..c4de932302d6b 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -263,6 +263,16 @@ static const struct ts_dmi_data digma_citi_e200_data = { .properties = digma_citi_e200_props, }; +static const struct property_entry estar_beauty_hd_props[] = { + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + { } +}; + +static const struct ts_dmi_data estar_beauty_hd_data = { + .acpi_name = "GDIX1001:00", + .properties = estar_beauty_hd_props, +}; + static const struct property_entry gp_electronic_t701_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 960), PROPERTY_ENTRY_U32("touchscreen-size-y", 640), @@ -942,6 +952,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), }, }, + { + /* Estar Beauty HD (MID 7316R) */ + .driver_data = (void *)&estar_beauty_hd_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Estar"), + DMI_MATCH(DMI_PRODUCT_NAME, "eSTAR BEAUTY HD Intel Quad core"), + }, + }, { /* GP-electronic T701 */ .driver_data = (void *)&gp_electronic_t701_data, -- GitLab From fcd38f178b785623c0325958225744f0d8a075c0 Mon Sep 17 00:00:00 2001 From: Arnold Gozum Date: Sat, 26 Dec 2020 15:53:06 -0500 Subject: [PATCH 0173/2012] platform/x86: intel-vbtn: Support for tablet mode on Dell Inspiron 7352 The Dell Inspiron 7352 is a 2-in-1 model that has chassis-type "Notebook". Add this model to the dmi_switches_allow_list. Signed-off-by: Arnold Gozum Link: https://lore.kernel.org/r/20201226205307.249659-1-arngozum@gmail.com Signed-off-by: Hans de Goede --- drivers/platform/x86/intel-vbtn.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index 3b49a1f4061bc..9bbdb26d43054 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -222,6 +222,12 @@ static const struct dmi_system_id dmi_switches_allow_list[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Switch SA5-271"), }, }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7352"), + }, + }, {} /* Array terminator */ }; -- GitLab From aa44afab87af079e0cf2ead9621d0447798a305e Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Tue, 29 Dec 2020 21:47:26 -0500 Subject: [PATCH 0174/2012] platform/x86: thinkpad_acpi: correct palmsensor error checking The previous commit adding functionality for the palm sensor had a mistake which meant the error conditions on initialisation was not checked correctly. On some older platforms this meant that if the sensor wasn't available an error would be returned and the driver would fail to load. This commit corrects the error condition. Many thanks to Mario Oenning for reporting and determining the issue Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20201230024726.7861-1-markpearson@lenovo.com Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index e03df2881dc6d..c102657b3eb32 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -9951,9 +9951,9 @@ static int tpacpi_proxsensor_init(struct ibm_init_struct *iibm) if ((palm_err == -ENODEV) && (lap_err == -ENODEV)) return 0; /* Otherwise, if there was an error return it */ - if (palm_err && (palm_err != ENODEV)) + if (palm_err && (palm_err != -ENODEV)) return palm_err; - if (lap_err && (lap_err != ENODEV)) + if (lap_err && (lap_err != -ENODEV)) return lap_err; if (has_palmsensor) { -- GitLab From 5b569302520ac8cef03e7a841e45cb37234f8b5f Mon Sep 17 00:00:00 2001 From: Shyam Sundar S K Date: Wed, 30 Dec 2020 13:40:28 +0530 Subject: [PATCH 0175/2012] platform/x86: amd-pmc: Fix CONFIG_DEBUG_FS check lkp reported that CONFIG_DEBUG_FS was not defined because of wrong usage if macro, correcting it now. Fixes: 156ec4731cb2 ("platform/x86: amd-pmc: Add AMD platform support for S2Idle") Reported-by: kernel test robot Signed-off-by: Shyam Sundar S K Link: https://lore.kernel.org/r/20201230081028.2615217-1-Shyam-sundar.S-k@amd.com Signed-off-by: Hans de Goede --- drivers/platform/x86/amd-pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index 0102bf1c79160..ef83425724634 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -85,7 +85,7 @@ static inline void amd_pmc_reg_write(struct amd_pmc_dev *dev, int reg_offset, u3 iowrite32(val, dev->regbase + reg_offset); } -#if CONFIG_DEBUG_FS +#ifdef CONFIG_DEBUG_FS static int smu_fw_info_show(struct seq_file *s, void *unused) { struct amd_pmc_dev *dev = s->private; -- GitLab From 92ff62a7bcc17d47c0ce8dddfb7a6e1a2e55ebf4 Mon Sep 17 00:00:00 2001 From: Billy Tsai Date: Thu, 17 Dec 2020 10:49:12 +0800 Subject: [PATCH 0176/2012] pinctrl: aspeed: g6: Fix PWMG0 pinctrl setting The SCU offset for signal PWM8 in group PWM8G0 is wrong, fix it from SCU414 to SCU4B4. Signed-off-by: Billy Tsai Fixes: 2eda1cdec49f ("pinctrl: aspeed: Add AST2600 pinmux support") Reviewed-by: Joel Stanley Reviewed-by: Andrew Jeffery Link: https://lore.kernel.org/r/20201217024912.3198-1-billy_tsai@aspeedtech.com Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c index 34803a6c76643..5c1a109842a76 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c @@ -347,7 +347,7 @@ FUNC_GROUP_DECL(RMII4, F24, E23, E24, E25, C25, C24, B26, B25, B24); #define D22 40 SIG_EXPR_LIST_DECL_SESG(D22, SD1CLK, SD1, SIG_DESC_SET(SCU414, 8)); -SIG_EXPR_LIST_DECL_SEMG(D22, PWM8, PWM8G0, PWM8, SIG_DESC_SET(SCU414, 8)); +SIG_EXPR_LIST_DECL_SEMG(D22, PWM8, PWM8G0, PWM8, SIG_DESC_SET(SCU4B4, 8)); PIN_DECL_2(D22, GPIOF0, SD1CLK, PWM8); GROUP_DECL(PWM8G0, D22); -- GitLab From de30491e8bfeeba1500bba293333eb51ece529d5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 14:53:55 +0100 Subject: [PATCH 0177/2012] HID: sfh: fix address space confusion The new driver uses a phys_addr_t to store a DMA address, which does not work when the two are different size: drivers/hid/amd-sfh-hid/amd_sfh_client.c:157:11: error: incompatible pointer types passing 'phys_addr_t *' (aka 'unsigned int *') to parameter of type 'dma_addr_t *' (aka 'unsigned long long *') [-Werror,-Wincompatible-pointer-types] &cl_data->sensor_phys_addr[i], ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ include/linux/dma-mapping.h:393:15: note: passing argument to parameter 'dma_handle' here dma_addr_t *dma_handle, gfp_t gfp) ^ Change both the type and the variable name to dma_addr for consistency. Fixes: 4b2c53d93a4b ("SFH:Transport Driver to add support of AMD Sensor Fusion Hub (SFH)") Signed-off-by: Arnd Bergmann Signed-off-by: Jiri Kosina --- drivers/hid/amd-sfh-hid/amd_sfh_client.c | 8 ++++---- drivers/hid/amd-sfh-hid/amd_sfh_hid.h | 2 +- drivers/hid/amd-sfh-hid/amd_sfh_pcie.c | 2 +- drivers/hid/amd-sfh-hid/amd_sfh_pcie.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index 3d1ccac5d99a3..2ab38b7153477 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -154,7 +154,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) for (i = 0; i < cl_data->num_hid_devices; i++) { cl_data->sensor_virt_addr[i] = dma_alloc_coherent(dev, sizeof(int) * 8, - &cl_data->sensor_phys_addr[i], + &cl_data->sensor_dma_addr[i], GFP_KERNEL); cl_data->sensor_sts[i] = 0; cl_data->sensor_requested_cnt[i] = 0; @@ -187,7 +187,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) } info.period = msecs_to_jiffies(AMD_SFH_IDLE_LOOP); info.sensor_idx = cl_idx; - info.phys_address = cl_data->sensor_phys_addr[i]; + info.dma_address = cl_data->sensor_dma_addr[i]; cl_data->report_descr[i] = kzalloc(cl_data->report_descr_sz[i], GFP_KERNEL); if (!cl_data->report_descr[i]) { @@ -212,7 +212,7 @@ cleanup: if (cl_data->sensor_virt_addr[i]) { dma_free_coherent(&privdata->pdev->dev, 8 * sizeof(int), cl_data->sensor_virt_addr[i], - cl_data->sensor_phys_addr[i]); + cl_data->sensor_dma_addr[i]); } kfree(cl_data->feature_report[i]); kfree(cl_data->input_report[i]); @@ -238,7 +238,7 @@ int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata) if (cl_data->sensor_virt_addr[i]) { dma_free_coherent(&privdata->pdev->dev, 8 * sizeof(int), cl_data->sensor_virt_addr[i], - cl_data->sensor_phys_addr[i]); + cl_data->sensor_dma_addr[i]); } } kfree(cl_data); diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h index 6be0783d885ce..d7eac1728e314 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.h +++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.h @@ -27,7 +27,7 @@ struct amdtp_cl_data { int hid_descr_size[MAX_HID_DEVICES]; phys_addr_t phys_addr_base; u32 *sensor_virt_addr[MAX_HID_DEVICES]; - phys_addr_t sensor_phys_addr[MAX_HID_DEVICES]; + dma_addr_t sensor_dma_addr[MAX_HID_DEVICES]; u32 sensor_sts[MAX_HID_DEVICES]; u32 sensor_requested_cnt[MAX_HID_DEVICES]; u8 report_type[MAX_HID_DEVICES]; diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index a51c7b76283bb..dbac166416627 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -41,7 +41,7 @@ void amd_start_sensor(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info i cmd_param.s.buf_layout = 1; cmd_param.s.buf_length = 16; - writeq(info.phys_address, privdata->mmio + AMD_C2P_MSG2); + writeq(info.dma_address, privdata->mmio + AMD_C2P_MSG2); writel(cmd_param.ul, privdata->mmio + AMD_C2P_MSG1); writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0); } diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h index e8be94f935b78..8f8d19b2cfe5b 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.h @@ -67,7 +67,7 @@ struct amd_mp2_dev { struct amd_mp2_sensor_info { u8 sensor_idx; u32 period; - phys_addr_t phys_address; + dma_addr_t dma_address; }; void amd_start_sensor(struct amd_mp2_dev *privdata, struct amd_mp2_sensor_info info); -- GitLab From 273435a1d4e5826f039625c23ba4fe9a09f24d75 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:41:44 +0100 Subject: [PATCH 0178/2012] HID: sony: select CONFIG_CRC32 Without crc32 support, this driver fails to link: arm-linux-gnueabi-ld: drivers/hid/hid-sony.o: in function `sony_raw_event': hid-sony.c:(.text+0x8f4): undefined reference to `crc32_le' arm-linux-gnueabi-ld: hid-sony.c:(.text+0x900): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/hid/hid-sony.o:hid-sony.c:(.text+0x4408): more undefined references to `crc32_le' follow Signed-off-by: Arnd Bergmann Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 7bdda1b5b2217..09fa75a2b289e 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -899,6 +899,7 @@ config HID_SONY depends on NEW_LEDS depends on LEDS_CLASS select POWER_SUPPLY + select CRC32 help Support for -- GitLab From 0e2d6795e8dbe91c2f5473564c6b25d11df3778b Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Sun, 27 Dec 2020 12:17:16 +0900 Subject: [PATCH 0179/2012] USB: serial: option: add LongSung M5710 module support Add a device-id entry for the LongSung M5710 module. T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2df3 ProdID=9d03 Rev= 1.00 S: Manufacturer=Marvell S: Product=Mobile Composite Device Bus S: SerialNumber= C:* #Ifs= 5 Cfg#= 1 Atr=c0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=e0(wlcon) Sub=01 Prot=03 I:* If#= 0 Alt= 0 #EPs= 1 Cls=e0(wlcon) Sub=01 Prot=03 Driver=rndis_host E: Ad=87(I) Atr=03(Int.) MxPS= 64 Ivl=4096ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0c(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0b(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=4096ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0f(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=88(I) Atr=03(Int.) MxPS= 64 Ivl=4096ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=0a(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Daniel Palmer https://lore.kernel.org/r/20201227031716.1343300-1-daniel@0x0f.com [ johan: drop id defines, only bind to vendor class ] Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ee726a1067066..3fe959104311b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2059,6 +2059,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ .driver_info = RSVD(6) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1406, 0xff) }, /* GosunCn GM500 ECM/NCM */ -- GitLab From 54d0a3ab80f49f19ee916def62fe067596833403 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 4 Jan 2021 15:50:07 +0100 Subject: [PATCH 0180/2012] USB: serial: iuu_phoenix: fix DMA from stack Stack-allocated buffers cannot be used for DMA (on all architectures) so allocate the flush command buffer using kmalloc(). Fixes: 60a8fc017103 ("USB: add iuu_phoenix driver") Cc: stable # 2.6.25 Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold --- drivers/usb/serial/iuu_phoenix.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c index f1201d4de2970..e8f06b41a5039 100644 --- a/drivers/usb/serial/iuu_phoenix.c +++ b/drivers/usb/serial/iuu_phoenix.c @@ -532,23 +532,29 @@ static int iuu_uart_flush(struct usb_serial_port *port) struct device *dev = &port->dev; int i; int status; - u8 rxcmd = IUU_UART_RX; + u8 *rxcmd; struct iuu_private *priv = usb_get_serial_port_data(port); if (iuu_led(port, 0xF000, 0, 0, 0xFF) < 0) return -EIO; + rxcmd = kmalloc(1, GFP_KERNEL); + if (!rxcmd) + return -ENOMEM; + + rxcmd[0] = IUU_UART_RX; + for (i = 0; i < 2; i++) { - status = bulk_immediate(port, &rxcmd, 1); + status = bulk_immediate(port, rxcmd, 1); if (status != IUU_OPERATION_OK) { dev_dbg(dev, "%s - uart_flush_write error\n", __func__); - return status; + goto out_free; } status = read_immediate(port, &priv->len, 1); if (status != IUU_OPERATION_OK) { dev_dbg(dev, "%s - uart_flush_read error\n", __func__); - return status; + goto out_free; } if (priv->len > 0) { @@ -556,12 +562,16 @@ static int iuu_uart_flush(struct usb_serial_port *port) status = read_immediate(port, priv->buf, priv->len); if (status != IUU_OPERATION_OK) { dev_dbg(dev, "%s - uart_flush_read error\n", __func__); - return status; + goto out_free; } } } dev_dbg(dev, "%s - uart_flush_read OK!\n", __func__); iuu_led(port, 0, 0xF000, 0, 0xFF); + +out_free: + kfree(rxcmd); + return status; } -- GitLab From 020a1f453449294926ca548d8d5ca970926e8dfd Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 4 Jan 2021 15:53:02 +0100 Subject: [PATCH 0181/2012] USB: usblp: fix DMA to stack Stack-allocated buffers cannot be used for DMA (on all architectures). Replace the HP-channel macro with a helper function that allocates a dedicated transfer buffer so that it can continue to be used with arguments from the stack. Note that the buffer is cleared on allocation as usblp_ctrl_msg() returns success also on short transfers (the buffer is only used for debugging). Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20210104145302.2087-1-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usblp.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 67cbd42421bee..134dc2005ce97 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -274,8 +274,25 @@ static int usblp_ctrl_msg(struct usblp *usblp, int request, int type, int dir, i #define usblp_reset(usblp)\ usblp_ctrl_msg(usblp, USBLP_REQ_RESET, USB_TYPE_CLASS, USB_DIR_OUT, USB_RECIP_OTHER, 0, NULL, 0) -#define usblp_hp_channel_change_request(usblp, channel, buffer) \ - usblp_ctrl_msg(usblp, USBLP_REQ_HP_CHANNEL_CHANGE_REQUEST, USB_TYPE_VENDOR, USB_DIR_IN, USB_RECIP_INTERFACE, channel, buffer, 1) +static int usblp_hp_channel_change_request(struct usblp *usblp, int channel, u8 *new_channel) +{ + u8 *buf; + int ret; + + buf = kzalloc(1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = usblp_ctrl_msg(usblp, USBLP_REQ_HP_CHANNEL_CHANGE_REQUEST, + USB_TYPE_VENDOR, USB_DIR_IN, USB_RECIP_INTERFACE, + channel, buf, 1); + if (ret == 0) + *new_channel = buf[0]; + + kfree(buf); + + return ret; +} /* * See the description for usblp_select_alts() below for the usage -- GitLab From 718bf42b119de652ebcc93655a1f33a9c0d04b3c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Dec 2020 23:13:09 -0800 Subject: [PATCH 0182/2012] usb: usbip: vhci_hcd: protect shift size Fix shift out-of-bounds in vhci_hcd.c: UBSAN: shift-out-of-bounds in ../drivers/usb/usbip/vhci_hcd.c:399:41 shift exponent 768 is too large for 32-bit type 'int' Fixes: 03cd00d538a6 ("usbip: vhci-hcd: Set the vhci structure up to work") Signed-off-by: Randy Dunlap Reported-by: syzbot+297d20e437b79283bf6d@syzkaller.appspotmail.com Cc: Yuyang Du Cc: Shuah Khan Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Cc: stable Link: https://lore.kernel.org/r/20201229071309.18418-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/vhci_hcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 66cde5e5f7964..3209b5ddd30c9 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -396,6 +396,8 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, default: usbip_dbg_vhci_rh(" ClearPortFeature: default %x\n", wValue); + if (wValue >= 32) + goto error; vhci_hcd->port_status[rhport] &= ~(1 << wValue); break; } -- GitLab From a1383b3537a7bea1c213baa7878ccc4ecf4413b5 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Tue, 29 Dec 2020 15:00:37 -0800 Subject: [PATCH 0183/2012] usb: dwc3: gadget: Restart DWC3 gadget when enabling pullup usb_gadget_deactivate/usb_gadget_activate does not execute the UDC start operation, which may leave EP0 disabled and event IRQs disabled when re-activating the function. Move the enabling/disabling of USB EP0 and device event IRQs to be performed in the pullup routine. Fixes: ae7e86108b12 ("usb: dwc3: Stop active transfers before halting the controller") Tested-by: Michael Tretter Cc: stable Reported-by: Michael Tretter Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/1609282837-21666-1-git-send-email-wcheng@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 78cb4db8a6e45..25f654b79e480 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2083,6 +2083,7 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend) static void dwc3_gadget_disable_irq(struct dwc3 *dwc); static void __dwc3_gadget_stop(struct dwc3 *dwc); +static int __dwc3_gadget_start(struct dwc3 *dwc); static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) { @@ -2145,6 +2146,8 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) % dwc->ev_buf->length; } + } else { + __dwc3_gadget_start(dwc); } ret = dwc3_gadget_run_stop(dwc, is_on, false); @@ -2319,10 +2322,6 @@ static int dwc3_gadget_start(struct usb_gadget *g, } dwc->gadget_driver = driver; - - if (pm_runtime_active(dwc->dev)) - __dwc3_gadget_start(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); return 0; @@ -2348,13 +2347,6 @@ static int dwc3_gadget_stop(struct usb_gadget *g) unsigned long flags; spin_lock_irqsave(&dwc->lock, flags); - - if (pm_runtime_suspended(dwc->dev)) - goto out; - - __dwc3_gadget_stop(dwc); - -out: dwc->gadget_driver = NULL; spin_unlock_irqrestore(&dwc->lock, flags); -- GitLab From 64e6bbfff52db4bf6785fab9cffab850b2de6870 Mon Sep 17 00:00:00 2001 From: Eddie Hung Date: Tue, 29 Dec 2020 18:53:35 +0800 Subject: [PATCH 0184/2012] usb: gadget: configfs: Fix use-after-free issue with udc_name There is a use-after-free issue, if access udc_name in function gadget_dev_desc_UDC_store after another context free udc_name in function unregister_gadget. Context 1: gadget_dev_desc_UDC_store()->unregister_gadget()-> free udc_name->set udc_name to NULL Context 2: gadget_dev_desc_UDC_show()-> access udc_name Call trace: dump_backtrace+0x0/0x340 show_stack+0x14/0x1c dump_stack+0xe4/0x134 print_address_description+0x78/0x478 __kasan_report+0x270/0x2ec kasan_report+0x10/0x18 __asan_report_load1_noabort+0x18/0x20 string+0xf4/0x138 vsnprintf+0x428/0x14d0 sprintf+0xe4/0x12c gadget_dev_desc_UDC_show+0x54/0x64 configfs_read_file+0x210/0x3a0 __vfs_read+0xf0/0x49c vfs_read+0x130/0x2b4 SyS_read+0x114/0x208 el0_svc_naked+0x34/0x38 Add mutex_lock to protect this kind of scenario. Signed-off-by: Eddie Hung Signed-off-by: Macpaul Lin Reviewed-by: Peter Chen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1609239215-21819-1-git-send-email-macpaul.lin@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 56051bb973498..d9743f4b56c34 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -221,9 +221,16 @@ static ssize_t gadget_dev_desc_bcdUSB_store(struct config_item *item, static ssize_t gadget_dev_desc_UDC_show(struct config_item *item, char *page) { - char *udc_name = to_gadget_info(item)->composite.gadget_driver.udc_name; + struct gadget_info *gi = to_gadget_info(item); + char *udc_name; + int ret; + + mutex_lock(&gi->lock); + udc_name = gi->composite.gadget_driver.udc_name; + ret = sprintf(page, "%s\n", udc_name ?: ""); + mutex_unlock(&gi->lock); - return sprintf(page, "%s\n", udc_name ?: ""); + return ret; } static int unregister_gadget(struct gadget_info *gi) -- GitLab From 7043e311a57625467b6fdb032dec8a6dea878208 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 30 Dec 2020 13:11:14 +0800 Subject: [PATCH 0185/2012] usb: gadget: core: change the comment for usb_gadget_connect The pullup does not need to be enabled at below situations: - For platforms which the hardware pullup starts after setting register even they do not see the VBUS. If the pullup is always enabled for these platforms, it will consume more power and break the USB IF compliance tests [1]. - For platforms which need to do BC 1.2 charger detection after seeing the VBUS. Pullup D+ will break the charger detection flow. - For platforms which the system suspend is allowed when the connection with host is there but the bus is not at suspend. For these platforms, it is better to disable pullup when entering the system suspend otherwise the host may confuse the device behavior after controller is in low power mode. Disable pullup is considered as a disconnection event from host side. [1] USB-IF Full and Low Speed Compliance Test Procedure F Back-voltage Testing Section 7.2.1 of the USB specification requires that no device shall supply (source) current on VBUS at its upstream facing port at any time. From VBUS on its upstream facing port, a device may only draw (sink) current. They may not provide power to the pull-up resistor on D+/D- unless VBUS is present (see Section 7.1.5). Signed-off-by: Peter Chen Link: https://lore.kernel.org/r/20201230051114.21370-1-peter.chen@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 5b5cfeb6c14a6..6a62bbd01324f 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -659,8 +659,7 @@ EXPORT_SYMBOL_GPL(usb_gadget_vbus_disconnect); * * Enables the D+ (or potentially D-) pullup. The host will start * enumerating this gadget when the pullup is active and a VBUS session - * is active (the link is powered). This pullup is always enabled unless - * usb_gadget_disconnect() has been used to disable it. + * is active (the link is powered). * * Returns zero on success, else negative errno. */ -- GitLab From d7889c2020e08caab0d7e36e947f642d91015bd0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:42:17 +0100 Subject: [PATCH 0186/2012] usb: gadget: select CONFIG_CRC32 Without crc32 support, this driver fails to link: arm-linux-gnueabi-ld: drivers/usb/gadget/function/f_eem.o: in function `eem_unwrap': f_eem.c:(.text+0x11cc): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/usb/gadget/function/f_ncm.o:f_ncm.c:(.text+0x1e40): more undefined references to `crc32_le' follow Fixes: 6d3865f9d41f ("usb: gadget: NCM: Add transmit multi-frame.") Signed-off-by: Arnd Bergmann Cc: stable Link: https://lore.kernel.org/r/20210103214224.1996535-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 7e47e6223089c..2d152571a7de8 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -265,6 +265,7 @@ config USB_CONFIGFS_NCM depends on NET select USB_U_ETHER select USB_F_NCM + select CRC32 help NCM is an advanced protocol for Ethernet encapsulation, allows grouping of several ethernet frames into one USB transfer and @@ -314,6 +315,7 @@ config USB_CONFIGFS_EEM depends on NET select USB_U_ETHER select USB_F_EEM + select CRC32 help CDC EEM is a newer USB standard that is somewhat simpler than CDC ECM and therefore can be supported by more hardware. Technically ECM and -- GitLab From 6cd0fe91387917be48e91385a572a69dfac2f3f7 Mon Sep 17 00:00:00 2001 From: Chandana Kishori Chiluveru Date: Tue, 29 Dec 2020 14:44:43 -0800 Subject: [PATCH 0187/2012] usb: gadget: configfs: Preserve function ordering after bind failure When binding the ConfigFS gadget to a UDC, the functions in each configuration are added in list order. However, if usb_add_function() fails, the failed function is put back on its configuration's func_list and purge_configs_funcs() is called to further clean up. purge_configs_funcs() iterates over the configurations and functions in forward order, calling unbind() on each of the previously added functions. But after doing so, each function gets moved to the tail of the configuration's func_list. This results in reshuffling the original order of the functions within a configuration such that the failed function now appears first even though it may have originally appeared in the middle or even end of the list. At this point if the ConfigFS gadget is attempted to re-bind to the UDC, the functions will be added in a different order than intended, with the only recourse being to remove and relink the functions all over again. An example of this as follows: ln -s functions/mass_storage.0 configs/c.1 ln -s functions/ncm.0 configs/c.1 ln -s functions/ffs.adb configs/c.1 # oops, forgot to start adbd echo "" > UDC # fails start adbd echo "" > UDC # now succeeds, but... # bind order is # "ADB", mass_storage, ncm [30133.118289] configfs-gadget gadget: adding 'Mass Storage Function'/ffffff810af87200 to config 'c'/ffffff817d6a2520 [30133.119875] configfs-gadget gadget: adding 'cdc_network'/ffffff80f48d1a00 to config 'c'/ffffff817d6a2520 [30133.119974] using random self ethernet address [30133.120002] using random host ethernet address [30133.139604] usb0: HOST MAC 3e:27:46:ba:3e:26 [30133.140015] usb0: MAC 6e:28:7e:42:66:6a [30133.140062] configfs-gadget gadget: adding 'Function FS Gadget'/ffffff80f3868438 to config 'c'/ffffff817d6a2520 [30133.140081] configfs-gadget gadget: adding 'Function FS Gadget'/ffffff80f3868438 --> -19 [30133.140098] configfs-gadget gadget: unbind function 'Mass Storage Function'/ffffff810af87200 [30133.140119] configfs-gadget gadget: unbind function 'cdc_network'/ffffff80f48d1a00 [30133.173201] configfs-gadget a600000.dwc3: failed to start g1: -19 [30136.661933] init: starting service 'adbd'... [30136.700126] read descriptors [30136.700413] read strings [30138.574484] configfs-gadget gadget: adding 'Function FS Gadget'/ffffff80f3868438 to config 'c'/ffffff817d6a2520 [30138.575497] configfs-gadget gadget: adding 'Mass Storage Function'/ffffff810af87200 to config 'c'/ffffff817d6a2520 [30138.575554] configfs-gadget gadget: adding 'cdc_network'/ffffff80f48d1a00 to config 'c'/ffffff817d6a2520 [30138.575631] using random self ethernet address [30138.575660] using random host ethernet address [30138.595338] usb0: HOST MAC 2e:cf:43:cd:ca:c8 [30138.597160] usb0: MAC 6a:f0:9f:ee:82:a0 [30138.791490] configfs-gadget gadget: super-speed config #1: c Fix this by reversing the iteration order of the functions in purge_config_funcs() when unbinding them, and adding them back to the config's func_list at the head instead of the tail. This ensures that we unbind and unwind back to the original list order. Fixes: 88af8bbe4ef7 ("usb: gadget: the start of the configfs interface") Signed-off-by: Chandana Kishori Chiluveru Signed-off-by: Jack Pham Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/20201229224443.31623-1-jackp@codeaurora.org Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index d9743f4b56c34..408a5332a975b 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1255,9 +1255,9 @@ static void purge_configs_funcs(struct gadget_info *gi) cfg = container_of(c, struct config_usb_cfg, c); - list_for_each_entry_safe(f, tmp, &c->functions, list) { + list_for_each_entry_safe_reverse(f, tmp, &c->functions, list) { - list_move_tail(&f->list, &cfg->func_list); + list_move(&f->list, &cfg->func_list); if (f->unbind) { dev_dbg(&gi->cdev.gadget->dev, "unbind function '%s'/%p\n", -- GitLab From e1263f9277bad198c2acc8092a41aea1edbea0e4 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 4 Jan 2021 15:20:45 +0100 Subject: [PATCH 0188/2012] dmaengine: stm32-mdma: fix STM32_MDMA_VERY_HIGH_PRIORITY value STM32_MDMA_VERY_HIGH_PRIORITY is b11 not 0x11, so fix it with 0x3. Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20210104142045.25583-1-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/stm32-mdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index e4637ec786d39..36ba8b43e78de 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -199,7 +199,7 @@ #define STM32_MDMA_MAX_CHANNELS 63 #define STM32_MDMA_MAX_REQUESTS 256 #define STM32_MDMA_MAX_BURST 128 -#define STM32_MDMA_VERY_HIGH_PRIORITY 0x11 +#define STM32_MDMA_VERY_HIGH_PRIORITY 0x3 enum stm32_mdma_trigger_mode { STM32_MDMA_BUFFER, -- GitLab From 65a4e5299739abe0888cda0938d21f8ea3b5c606 Mon Sep 17 00:00:00 2001 From: David Gow Date: Mon, 21 Dec 2020 23:39:00 -0800 Subject: [PATCH 0189/2012] kunit: tool: Force the use of the 'tty' console for UML kunit_tool relies on the UML console outputting printk() output to the tty in order to get results. Since the default console driver could change, pass 'console=tty' to the kernel. This is triggered by a change[1] to use ttynull as a fallback console driver which -- by chance or by design -- seems to have changed the default console output on UML, breaking kunit_tool. While this may be fixed, we should be less fragile to such changes in the default. [1]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=757055ae8dedf5333af17b3b5b4b70ba9bc9da4e Signed-off-by: David Gow Fixes: 757055ae8ded ("init/console: Use ttynull as a fallback when there is no console") Reported-by: Andy Shevchenko Tested-by: Andy Shevchenko Acked-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_kernel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 57c1724b7e5da..698358c9c0d60 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -198,7 +198,7 @@ class LinuxSourceTree(object): return self.validate_config(build_dir) def run_kernel(self, args=[], build_dir='', timeout=None): - args.extend(['mem=1G']) + args.extend(['mem=1G', 'console=tty']) self._ops.linux_bin(args, timeout, build_dir) outfile = get_outfile_path(build_dir) subprocess.call(['stty', 'sane']) -- GitLab From 3b4cf848dad5dad4bf239ba664c809c8cf29f1ed Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 17 Dec 2020 17:31:40 +0100 Subject: [PATCH 0190/2012] selftests/vDSO: add additional binaries to .gitignore Add the test binaries introduced by commit 693f5ca08ca0 ("kselftest: Extend vDSO selftest"), commit 03f55c7952c9 ("kselftest: Extend vDSO selftest to clock_getres") and commit c7e5789b24d3 ("kselftest: Move test_vdso to the vDSO test suite") to .gitignore. Signed-off-by: Tobias Klauser Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/.gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore index 5eb64d41e5419..a8dc51af5a9c0 100644 --- a/tools/testing/selftests/vDSO/.gitignore +++ b/tools/testing/selftests/vDSO/.gitignore @@ -1,5 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only vdso_test +vdso_test_abi +vdso_test_clock_getres +vdso_test_correctness vdso_test_gettimeofday vdso_test_getcpu vdso_standalone_test_x86 -- GitLab From df00d02989024d193a6efd1a85513a5658c6a10f Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 17 Dec 2020 17:32:35 +0100 Subject: [PATCH 0191/2012] selftests/vDSO: fix -Wformat warning in vdso_test_correctness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following -Wformat warnings in vdso_test_correctness.c: vdso_test_correctness.c: In function ‘test_one_clock_gettime64’: vdso_test_correctness.c:352:21: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 3 has type ‘long long int’ [-Wformat=] 352 | printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", | ~~~~^ | | | long int | %09lld 353 | (unsigned long long)start.tv_sec, start.tv_nsec, | ~~~~~~~~~~~~~ | | | long long int vdso_test_correctness.c:352:32: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 5 has type ‘long long int’ [-Wformat=] 352 | printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", | ~~~~^ | | | long int | %09lld 353 | (unsigned long long)start.tv_sec, start.tv_nsec, 354 | (unsigned long long)vdso.tv_sec, vdso.tv_nsec, | ~~~~~~~~~~~~ | | | long long int vdso_test_correctness.c:352:43: warning: format ‘%ld’ expects argument of type ‘long int’, but argument 7 has type ‘long long int’ [-Wformat=] The tv_sec member of __kernel_timespec is long long, both in uapi/linux/time_types.h and locally in vdso_test_correctness.c. Signed-off-by: Tobias Klauser Signed-off-by: Shuah Khan --- tools/testing/selftests/vDSO/vdso_test_correctness.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c index 5029ef9b228c3..c4aea794725a7 100644 --- a/tools/testing/selftests/vDSO/vdso_test_correctness.c +++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c @@ -349,7 +349,7 @@ static void test_one_clock_gettime64(int clock, const char *name) return; } - printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", + printf("\t%llu.%09lld %llu.%09lld %llu.%09lld\n", (unsigned long long)start.tv_sec, start.tv_nsec, (unsigned long long)vdso.tv_sec, vdso.tv_nsec, (unsigned long long)end.tv_sec, end.tv_nsec); -- GitLab From 7a6eb7c34a78498742b5f82543b7a68c1c443329 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 10 Dec 2020 18:52:33 +0000 Subject: [PATCH 0192/2012] selftests: Skip BPF seftests by default The BPF selftests have build time dependencies on cutting edge versions of tools in the BPF ecosystem including LLVM which are more involved to satisfy than more typical requirements like installing a package from your distribution. This causes issues for users looking at kselftest in as a whole who find that a default build of kselftest fails and that resolving this is time consuming and adds administrative overhead. The fast pace of BPF development and the need for a full BPF stack to do substantial development or validation work on the code mean that people working directly on it don't see a reasonable way to keep supporting older environments without causing problems with the usability of the BPF tests in BPF development so these requirements are unlikely to be relaxed in the immediate future. There is already support for skipping targets so in order to reduce the barrier to entry for people interested in kselftest as a whole let's use that to skip the BPF tests by default when people work with the top level kselftest build system. Users can still build the BPF selftests as part of the wider kselftest build by specifying SKIP_TARGETS, including setting an empty SKIP_TARGETS to build everything. They can also continue to build the BPF selftests individually in cases where they are specifically focused on BPF. This isn't ideal since it means people will need to take special steps to build the BPF tests but the dependencies mean that realistically this is already the case to some extent and it makes it easier for people to pick up and work with the other selftests which is hopefully a net win. Signed-off-by: Mark Brown Signed-off-by: Shuah Khan --- tools/testing/selftests/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index afbab4aeef3c8..8a917cb4426a0 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -77,8 +77,10 @@ TARGETS += zram TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug -# User can optionally provide a TARGETS skiplist. -SKIP_TARGETS ?= +# User can optionally provide a TARGETS skiplist. By default we skip +# BPF since it has cutting edge build time dependencies which require +# more effort to install. +SKIP_TARGETS ?= bpf ifneq ($(SKIP_TARGETS),) TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS)) override TARGETS := $(TMP) -- GitLab From 10f42b3e648377b2f2f323a5530354710616c6cc Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 22 Dec 2020 19:00:48 +0100 Subject: [PATCH 0193/2012] libceph: zero out session key and connection secret Try and avoid leaving bits and pieces of session key and connection secret (gets split into GCM key and a pair of GCM IVs) around. Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton --- net/ceph/auth_x.c | 57 ++++++++++++++++++++++++----------------- net/ceph/crypto.c | 3 ++- net/ceph/messenger_v2.c | 45 ++++++++++++++++++-------------- 3 files changed, 62 insertions(+), 43 deletions(-) diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 9815cfe42af09..ca44c327baced 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -569,6 +569,34 @@ e_range: return -ERANGE; } +static int decode_con_secret(void **p, void *end, u8 *con_secret, + int *con_secret_len) +{ + int len; + + ceph_decode_32_safe(p, end, len, bad); + ceph_decode_need(p, end, len, bad); + + dout("%s len %d\n", __func__, len); + if (con_secret) { + if (len > CEPH_MAX_CON_SECRET_LEN) { + pr_err("connection secret too big %d\n", len); + goto bad_memzero; + } + memcpy(con_secret, *p, len); + *con_secret_len = len; + } + memzero_explicit(*p, len); + *p += len; + return 0; + +bad_memzero: + memzero_explicit(*p, len); +bad: + pr_err("failed to decode connection secret\n"); + return -EINVAL; +} + static int handle_auth_session_key(struct ceph_auth_client *ac, void **p, void *end, u8 *session_key, int *session_key_len, @@ -612,17 +640,9 @@ static int handle_auth_session_key(struct ceph_auth_client *ac, dout("%s decrypted %d bytes\n", __func__, ret); dend = dp + ret; - ceph_decode_32_safe(&dp, dend, len, e_inval); - if (len > CEPH_MAX_CON_SECRET_LEN) { - pr_err("connection secret too big %d\n", len); - return -EINVAL; - } - - dout("%s connection secret len %d\n", __func__, len); - if (con_secret) { - memcpy(con_secret, dp, len); - *con_secret_len = len; - } + ret = decode_con_secret(&dp, dend, con_secret, con_secret_len); + if (ret) + return ret; } /* service tickets */ @@ -828,7 +848,6 @@ static int decrypt_authorizer_reply(struct ceph_crypto_key *secret, { void *dp, *dend; u8 struct_v; - int len; int ret; dp = *p + ceph_x_encrypt_offset(); @@ -843,17 +862,9 @@ static int decrypt_authorizer_reply(struct ceph_crypto_key *secret, ceph_decode_64_safe(&dp, dend, *nonce_plus_one, e_inval); dout("%s nonce_plus_one %llu\n", __func__, *nonce_plus_one); if (struct_v >= 2) { - ceph_decode_32_safe(&dp, dend, len, e_inval); - if (len > CEPH_MAX_CON_SECRET_LEN) { - pr_err("connection secret too big %d\n", len); - return -EINVAL; - } - - dout("%s connection secret len %d\n", __func__, len); - if (con_secret) { - memcpy(con_secret, dp, len); - *con_secret_len = len; - } + ret = decode_con_secret(&dp, dend, con_secret, con_secret_len); + if (ret) + return ret; } return 0; diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 4f75df40fb121..92d89b3316459 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -96,6 +96,7 @@ int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end) key->len = ceph_decode_16(p); ceph_decode_need(p, end, key->len, bad); ret = set_secret(key, *p); + memzero_explicit(*p, key->len); *p += key->len; return ret; @@ -134,7 +135,7 @@ int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey) void ceph_crypto_key_destroy(struct ceph_crypto_key *key) { if (key) { - kfree(key->key); + kfree_sensitive(key->key); key->key = NULL; if (key->tfm) { crypto_free_sync_skcipher(key->tfm); diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index c38d8de938363..cc40ce4e02fbc 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -689,11 +689,10 @@ static int verify_epilogue_crcs(struct ceph_connection *con, u32 front_crc, } static int setup_crypto(struct ceph_connection *con, - u8 *session_key, int session_key_len, - u8 *con_secret, int con_secret_len) + const u8 *session_key, int session_key_len, + const u8 *con_secret, int con_secret_len) { unsigned int noio_flag; - void *p; int ret; dout("%s con %p con_mode %d session_key_len %d con_secret_len %d\n", @@ -751,15 +750,14 @@ static int setup_crypto(struct ceph_connection *con, return ret; } - p = con_secret; - WARN_ON((unsigned long)p & crypto_aead_alignmask(con->v2.gcm_tfm)); - ret = crypto_aead_setkey(con->v2.gcm_tfm, p, CEPH_GCM_KEY_LEN); + WARN_ON((unsigned long)con_secret & + crypto_aead_alignmask(con->v2.gcm_tfm)); + ret = crypto_aead_setkey(con->v2.gcm_tfm, con_secret, CEPH_GCM_KEY_LEN); if (ret) { pr_err("failed to set gcm key: %d\n", ret); return ret; } - p += CEPH_GCM_KEY_LEN; WARN_ON(crypto_aead_ivsize(con->v2.gcm_tfm) != CEPH_GCM_IV_LEN); ret = crypto_aead_setauthsize(con->v2.gcm_tfm, CEPH_GCM_TAG_LEN); if (ret) { @@ -777,8 +775,11 @@ static int setup_crypto(struct ceph_connection *con, aead_request_set_callback(con->v2.gcm_req, CRYPTO_TFM_REQ_MAY_BACKLOG, crypto_req_done, &con->v2.gcm_wait); - memcpy(&con->v2.in_gcm_nonce, p, CEPH_GCM_IV_LEN); - memcpy(&con->v2.out_gcm_nonce, p + CEPH_GCM_IV_LEN, CEPH_GCM_IV_LEN); + memcpy(&con->v2.in_gcm_nonce, con_secret + CEPH_GCM_KEY_LEN, + CEPH_GCM_IV_LEN); + memcpy(&con->v2.out_gcm_nonce, + con_secret + CEPH_GCM_KEY_LEN + CEPH_GCM_IV_LEN, + CEPH_GCM_IV_LEN); return 0; /* auth_x, secure mode */ } @@ -800,7 +801,7 @@ static int hmac_sha256(struct ceph_connection *con, const struct kvec *kvecs, desc->tfm = con->v2.hmac_tfm; ret = crypto_shash_init(desc); if (ret) - return ret; + goto out; for (i = 0; i < kvec_cnt; i++) { WARN_ON((unsigned long)kvecs[i].iov_base & @@ -808,15 +809,14 @@ static int hmac_sha256(struct ceph_connection *con, const struct kvec *kvecs, ret = crypto_shash_update(desc, kvecs[i].iov_base, kvecs[i].iov_len); if (ret) - return ret; + goto out; } ret = crypto_shash_final(desc, hmac); - if (ret) - return ret; +out: shash_desc_zero(desc); - return 0; /* auth_x, both plain and secure modes */ + return ret; /* auth_x, both plain and secure modes */ } static void gcm_inc_nonce(struct ceph_gcm_nonce *nonce) @@ -2072,27 +2072,32 @@ static int process_auth_done(struct ceph_connection *con, void *p, void *end) if (con->state != CEPH_CON_S_V2_AUTH) { dout("%s con %p state changed to %d\n", __func__, con, con->state); - return -EAGAIN; + ret = -EAGAIN; + goto out; } dout("%s con %p handle_auth_done ret %d\n", __func__, con, ret); if (ret) - return ret; + goto out; ret = setup_crypto(con, session_key, session_key_len, con_secret, con_secret_len); if (ret) - return ret; + goto out; reset_out_kvecs(con); ret = prepare_auth_signature(con); if (ret) { pr_err("prepare_auth_signature failed: %d\n", ret); - return ret; + goto out; } con->state = CEPH_CON_S_V2_AUTH_SIGNATURE; - return 0; + +out: + memzero_explicit(session_key_buf, sizeof(session_key_buf)); + memzero_explicit(con_secret_buf, sizeof(con_secret_buf)); + return ret; bad: pr_err("failed to decode auth_done\n"); @@ -3436,6 +3441,8 @@ void ceph_con_v2_reset_protocol(struct ceph_connection *con) } con->v2.con_mode = CEPH_CON_MODE_UNKNOWN; + memzero_explicit(&con->v2.in_gcm_nonce, CEPH_GCM_IV_LEN); + memzero_explicit(&con->v2.out_gcm_nonce, CEPH_GCM_IV_LEN); if (con->v2.hmac_tfm) { crypto_free_shash(con->v2.hmac_tfm); -- GitLab From 4972cf605f8a10784bb9ec9bdf3465892fb547c8 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 23 Dec 2020 16:32:05 +0100 Subject: [PATCH 0194/2012] libceph, ceph: disambiguate ceph_connection_operations handlers Since a few years, kernel addresses are no longer included in oops dumps, at least on x86. All we get is a symbol name with offset and size. This is a problem for ceph_connection_operations handlers, especially con->ops->dispatch(). All three handlers have the same name and there is little context to disambiguate between e.g. monitor and OSD clients because almost everything is inlined. gdb sneakily stops at the first matching symbol, so one has to resort to nm and addr2line. Some of these are already prefixed with mon_, osd_ or mds_. Let's do the same for all others. Signed-off-by: Ilya Dryomov Acked-by: Jeff Layton --- fs/ceph/mds_client.c | 34 +++++++++++++++++----------------- net/ceph/mon_client.c | 14 +++++++------- net/ceph/osd_client.c | 40 ++++++++++++++++++++-------------------- 3 files changed, 44 insertions(+), 44 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 840587037b59b..d87bd852ed961 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -5038,7 +5038,7 @@ bad: return; } -static struct ceph_connection *con_get(struct ceph_connection *con) +static struct ceph_connection *mds_get_con(struct ceph_connection *con) { struct ceph_mds_session *s = con->private; @@ -5047,7 +5047,7 @@ static struct ceph_connection *con_get(struct ceph_connection *con) return NULL; } -static void con_put(struct ceph_connection *con) +static void mds_put_con(struct ceph_connection *con) { struct ceph_mds_session *s = con->private; @@ -5058,7 +5058,7 @@ static void con_put(struct ceph_connection *con) * if the client is unresponsive for long enough, the mds will kill * the session entirely. */ -static void peer_reset(struct ceph_connection *con) +static void mds_peer_reset(struct ceph_connection *con) { struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; @@ -5067,7 +5067,7 @@ static void peer_reset(struct ceph_connection *con) send_mds_reconnect(mdsc, s); } -static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) +static void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg) { struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; @@ -5125,8 +5125,8 @@ out: * Note: returned pointer is the address of a structure that's * managed separately. Caller must *not* attempt to free it. */ -static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, - int *proto, int force_new) +static struct ceph_auth_handshake * +mds_get_authorizer(struct ceph_connection *con, int *proto, int force_new) { struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; @@ -5142,7 +5142,7 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, return auth; } -static int add_authorizer_challenge(struct ceph_connection *con, +static int mds_add_authorizer_challenge(struct ceph_connection *con, void *challenge_buf, int challenge_buf_len) { struct ceph_mds_session *s = con->private; @@ -5153,7 +5153,7 @@ static int add_authorizer_challenge(struct ceph_connection *con, challenge_buf, challenge_buf_len); } -static int verify_authorizer_reply(struct ceph_connection *con) +static int mds_verify_authorizer_reply(struct ceph_connection *con) { struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; @@ -5165,7 +5165,7 @@ static int verify_authorizer_reply(struct ceph_connection *con) NULL, NULL, NULL, NULL); } -static int invalidate_authorizer(struct ceph_connection *con) +static int mds_invalidate_authorizer(struct ceph_connection *con) { struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; @@ -5288,15 +5288,15 @@ static int mds_check_message_signature(struct ceph_msg *msg) } static const struct ceph_connection_operations mds_con_ops = { - .get = con_get, - .put = con_put, - .dispatch = dispatch, - .get_authorizer = get_authorizer, - .add_authorizer_challenge = add_authorizer_challenge, - .verify_authorizer_reply = verify_authorizer_reply, - .invalidate_authorizer = invalidate_authorizer, - .peer_reset = peer_reset, + .get = mds_get_con, + .put = mds_put_con, .alloc_msg = mds_alloc_msg, + .dispatch = mds_dispatch, + .peer_reset = mds_peer_reset, + .get_authorizer = mds_get_authorizer, + .add_authorizer_challenge = mds_add_authorizer_challenge, + .verify_authorizer_reply = mds_verify_authorizer_reply, + .invalidate_authorizer = mds_invalidate_authorizer, .sign_message = mds_sign_message, .check_message_signature = mds_check_message_signature, .get_auth_request = mds_get_auth_request, diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index b9d54ed9f3384..195ceb8afb061 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1433,7 +1433,7 @@ static int mon_handle_auth_bad_method(struct ceph_connection *con, /* * handle incoming message */ -static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) +static void mon_dispatch(struct ceph_connection *con, struct ceph_msg *msg) { struct ceph_mon_client *monc = con->private; int type = le16_to_cpu(msg->hdr.type); @@ -1565,21 +1565,21 @@ static void mon_fault(struct ceph_connection *con) * will come from the messenger workqueue, which is drained prior to * mon_client destruction. */ -static struct ceph_connection *con_get(struct ceph_connection *con) +static struct ceph_connection *mon_get_con(struct ceph_connection *con) { return con; } -static void con_put(struct ceph_connection *con) +static void mon_put_con(struct ceph_connection *con) { } static const struct ceph_connection_operations mon_con_ops = { - .get = con_get, - .put = con_put, - .dispatch = dispatch, - .fault = mon_fault, + .get = mon_get_con, + .put = mon_put_con, .alloc_msg = mon_alloc_msg, + .dispatch = mon_dispatch, + .fault = mon_fault, .get_auth_request = mon_get_auth_request, .handle_auth_reply_more = mon_handle_auth_reply_more, .handle_auth_done = mon_handle_auth_done, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 61229c5e22cb8..ff8624a7c9643 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -5412,7 +5412,7 @@ void ceph_osdc_cleanup(void) /* * handle incoming message */ -static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) +static void osd_dispatch(struct ceph_connection *con, struct ceph_msg *msg) { struct ceph_osd *osd = con->private; struct ceph_osd_client *osdc = osd->o_osdc; @@ -5534,9 +5534,9 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr) return m; } -static struct ceph_msg *alloc_msg(struct ceph_connection *con, - struct ceph_msg_header *hdr, - int *skip) +static struct ceph_msg *osd_alloc_msg(struct ceph_connection *con, + struct ceph_msg_header *hdr, + int *skip) { struct ceph_osd *osd = con->private; int type = le16_to_cpu(hdr->type); @@ -5560,7 +5560,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con, /* * Wrappers to refcount containing ceph_osd struct */ -static struct ceph_connection *get_osd_con(struct ceph_connection *con) +static struct ceph_connection *osd_get_con(struct ceph_connection *con) { struct ceph_osd *osd = con->private; if (get_osd(osd)) @@ -5568,7 +5568,7 @@ static struct ceph_connection *get_osd_con(struct ceph_connection *con) return NULL; } -static void put_osd_con(struct ceph_connection *con) +static void osd_put_con(struct ceph_connection *con) { struct ceph_osd *osd = con->private; put_osd(osd); @@ -5582,8 +5582,8 @@ static void put_osd_con(struct ceph_connection *con) * Note: returned pointer is the address of a structure that's * managed separately. Caller must *not* attempt to free it. */ -static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, - int *proto, int force_new) +static struct ceph_auth_handshake * +osd_get_authorizer(struct ceph_connection *con, int *proto, int force_new) { struct ceph_osd *o = con->private; struct ceph_osd_client *osdc = o->o_osdc; @@ -5599,7 +5599,7 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, return auth; } -static int add_authorizer_challenge(struct ceph_connection *con, +static int osd_add_authorizer_challenge(struct ceph_connection *con, void *challenge_buf, int challenge_buf_len) { struct ceph_osd *o = con->private; @@ -5610,7 +5610,7 @@ static int add_authorizer_challenge(struct ceph_connection *con, challenge_buf, challenge_buf_len); } -static int verify_authorizer_reply(struct ceph_connection *con) +static int osd_verify_authorizer_reply(struct ceph_connection *con) { struct ceph_osd *o = con->private; struct ceph_osd_client *osdc = o->o_osdc; @@ -5622,7 +5622,7 @@ static int verify_authorizer_reply(struct ceph_connection *con) NULL, NULL, NULL, NULL); } -static int invalidate_authorizer(struct ceph_connection *con) +static int osd_invalidate_authorizer(struct ceph_connection *con) { struct ceph_osd *o = con->private; struct ceph_osd_client *osdc = o->o_osdc; @@ -5731,18 +5731,18 @@ static int osd_check_message_signature(struct ceph_msg *msg) } static const struct ceph_connection_operations osd_con_ops = { - .get = get_osd_con, - .put = put_osd_con, - .dispatch = dispatch, - .get_authorizer = get_authorizer, - .add_authorizer_challenge = add_authorizer_challenge, - .verify_authorizer_reply = verify_authorizer_reply, - .invalidate_authorizer = invalidate_authorizer, - .alloc_msg = alloc_msg, + .get = osd_get_con, + .put = osd_put_con, + .alloc_msg = osd_alloc_msg, + .dispatch = osd_dispatch, + .fault = osd_fault, .reencode_message = osd_reencode_message, + .get_authorizer = osd_get_authorizer, + .add_authorizer_challenge = osd_add_authorizer_challenge, + .verify_authorizer_reply = osd_verify_authorizer_reply, + .invalidate_authorizer = osd_invalidate_authorizer, .sign_message = osd_sign_message, .check_message_signature = osd_check_message_signature, - .fault = osd_fault, .get_auth_request = osd_get_auth_request, .handle_auth_reply_more = osd_handle_auth_reply_more, .handle_auth_done = osd_handle_auth_done, -- GitLab From 8cbebc4118b5933b3ae6351ceb433f75ac6b7c6b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 4 Jan 2021 16:50:16 +0000 Subject: [PATCH 0195/2012] KVM: arm64: Replace KVM_ARM_PMU with HW_PERF_EVENTS KVM_ARM_PMU only existed for the benefit of 32bit ARM hosts, and makes no sense now that we are 64bit only. Get rid of it. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/Kconfig | 8 -------- arch/arm64/kvm/Makefile | 2 +- include/kvm/arm_pmu.h | 2 +- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 043756db8f6ec..3964acf5451ea 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -49,14 +49,6 @@ if KVM source "virt/kvm/Kconfig" -config KVM_ARM_PMU - bool "Virtual Performance Monitoring Unit (PMU) support" - depends on HW_PERF_EVENTS - default y - help - Adds support for a virtual Performance Monitoring Unit (PMU) in - virtual machines. - endif # KVM endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 60fd181df6243..13b017284bf96 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -24,4 +24,4 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ vgic/vgic-its.o vgic/vgic-debug.o -kvm-$(CONFIG_KVM_ARM_PMU) += pmu-emul.o +kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index fc85f50fa0e96..8dcb3e1477bc9 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -13,7 +13,7 @@ #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) #define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1) -#ifdef CONFIG_KVM_ARM_PMU +#ifdef CONFIG_HW_PERF_EVENTS struct kvm_pmc { u8 idx; /* index into the pmu->pmc array */ -- GitLab From 0b884fe71f9ee6a5df35e677154256ea2099ebb8 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Mon, 14 Dec 2020 12:58:50 +0800 Subject: [PATCH 0196/2012] i2c: sprd: use a specific timeout to avoid system hang up issue If the i2c device SCL bus being pulled up due to some exception before message transfer done, the system cannot receive the completing interrupt signal any more, it would not exit waiting loop until MAX_SCHEDULE_TIMEOUT jiffies eclipse, that would make the system seemed hang up. To avoid that happen, this patch adds a specific timeout for message transfer. Fixes: 8b9ec0719834 ("i2c: Add Spreadtrum I2C controller driver") Signed-off-by: Linhua Xu Signed-off-by: Chunyan Zhang [wsa: changed errno to ETIMEDOUT] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-sprd.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-sprd.c b/drivers/i2c/busses/i2c-sprd.c index 19cda6742423d..2917fecf6c80d 100644 --- a/drivers/i2c/busses/i2c-sprd.c +++ b/drivers/i2c/busses/i2c-sprd.c @@ -72,6 +72,8 @@ /* timeout (ms) for pm runtime autosuspend */ #define SPRD_I2C_PM_TIMEOUT 1000 +/* timeout (ms) for transfer message */ +#define I2C_XFER_TIMEOUT 1000 /* SPRD i2c data structure */ struct sprd_i2c { @@ -244,6 +246,7 @@ static int sprd_i2c_handle_msg(struct i2c_adapter *i2c_adap, struct i2c_msg *msg, bool is_last_msg) { struct sprd_i2c *i2c_dev = i2c_adap->algo_data; + unsigned long time_left; i2c_dev->msg = msg; i2c_dev->buf = msg->buf; @@ -273,7 +276,10 @@ static int sprd_i2c_handle_msg(struct i2c_adapter *i2c_adap, sprd_i2c_opt_start(i2c_dev); - wait_for_completion(&i2c_dev->complete); + time_left = wait_for_completion_timeout(&i2c_dev->complete, + msecs_to_jiffies(I2C_XFER_TIMEOUT)); + if (!time_left) + return -ETIMEDOUT; return i2c_dev->err; } -- GitLab From 0b3ea2a06de1f52ea30865e227e109a5fd3b6214 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 21 Dec 2020 14:42:25 +0100 Subject: [PATCH 0197/2012] i2c: i801: Fix the i2c-mux gpiod_lookup_table not being properly terminated gpiod_add_lookup_table() expects the gpiod_lookup_table->table passed to it to be terminated with a zero-ed out entry. So we need to allocate one more entry then we will use. Fixes: d308dfbf62ef ("i2c: mux/i801: Switch to use descriptor passing") Signed-off-by: Hans de Goede Reviewed-by: Mika Westerberg Acked-by: Jean Delvare Reviewed-by: Linus Walleij Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index ae90713443fa6..877fe3733a42b 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1449,7 +1449,7 @@ static int i801_add_mux(struct i801_priv *priv) /* Register GPIO descriptor lookup table */ lookup = devm_kzalloc(dev, - struct_size(lookup, table, mux_config->n_gpios), + struct_size(lookup, table, mux_config->n_gpios + 1), GFP_KERNEL); if (!lookup) return -ENOMEM; -- GitLab From cc07d72bf350b77faeffee1c37bc52197171473f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 24 Sep 2020 13:14:52 -0400 Subject: [PATCH 0198/2012] dm raid: fix discard limits for raid1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Block core warned that discard_granularity was 0 for dm-raid with personality of raid1. Reason is that raid_io_hints() was incorrectly special-casing raid1 rather than raid0. Fix raid_io_hints() by removing discard limits settings for raid1. Check for raid0 instead. Fixes: 61697a6abd24a ("dm: eliminate 'split_discard_bios' flag from DM target interface") Cc: stable@vger.kernel.org Reported-by: Zdenek Kabelac Reported-by: Mikulas Patocka Reported-by: Stephan Bärwolf Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 23c38777e8f63..cab12b2251bac 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3729,10 +3729,10 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits) blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs)); /* - * RAID1 and RAID10 personalities require bio splitting, - * RAID0/4/5/6 don't and process large discard bios properly. + * RAID0 and RAID10 personalities require bio splitting, + * RAID1/4/5/6 don't and process large discard bios properly. */ - if (rs_is_raid1(rs) || rs_is_raid10(rs)) { + if (rs_is_raid0(rs) || rs_is_raid10(rs)) { limits->discard_granularity = chunk_size_bytes; limits->max_discard_sectors = rs->md.chunk_sectors; } -- GitLab From f7b347acb5f6c29d9229bb64893d8b6a2c7949fb Mon Sep 17 00:00:00 2001 From: Anthony Iliopoulos Date: Mon, 14 Dec 2020 18:18:11 +0100 Subject: [PATCH 0199/2012] dm integrity: select CRYPTO_SKCIPHER The integrity target relies on skcipher for encryption/decryption, but certain kernel configurations may not enable CRYPTO_SKCIPHER, leading to compilation errors due to unresolved symbols. Explicitly select CRYPTO_SKCIPHER for DM_INTEGRITY, since it is unconditionally dependent on it. Signed-off-by: Anthony Iliopoulos Signed-off-by: Mike Snitzer --- drivers/md/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index b7e2d96666142..a378a89670949 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -605,6 +605,7 @@ config DM_INTEGRITY select BLK_DEV_INTEGRITY select DM_BUFIO select CRYPTO + select CRYPTO_SKCIPHER select ASYNC_XOR help This device-mapper target emulates a block device that has -- GitLab From b690bd546b227c32b860dae985a18bed8aa946fe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 22:40:51 +0100 Subject: [PATCH 0200/2012] dm zoned: select CONFIG_CRC32 Without crc32 support, this driver fails to link: arm-linux-gnueabi-ld: drivers/md/dm-zoned-metadata.o: in function `dmz_write_sb': dm-zoned-metadata.c:(.text+0xe98): undefined reference to `crc32_le' arm-linux-gnueabi-ld: drivers/md/dm-zoned-metadata.o: in function `dmz_check_sb': dm-zoned-metadata.c:(.text+0x7978): undefined reference to `crc32_le' Fixes: 3b1a94c88b79 ("dm zoned: drive-managed zoned block device target") Signed-off-by: Arnd Bergmann Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index a378a89670949..9e44c09f64108 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -623,6 +623,7 @@ config DM_ZONED tristate "Drive-managed zoned block device target support" depends on BLK_DEV_DM depends on BLK_DEV_ZONED + select CRC32 help This device-mapper target takes a host-managed or host-aware zoned block device and exposes most of its capacity as a regular block -- GitLab From 8abec36d1274bbd5ae8f36f3658b9abb3db56c31 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 4 Jan 2021 14:59:47 +0000 Subject: [PATCH 0201/2012] dm crypt: do not wait for backlogged crypto request completion in softirq Commit 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") made it possible for some code paths in dm-crypt to be executed in softirq context, when the underlying driver processes IO requests in interrupt/softirq context. When Crypto API backlogs a crypto request, dm-crypt uses wait_for_completion to avoid sending further requests to an already overloaded crypto driver. However, if the code is executing in softirq context, we might get the following stacktrace: [ 210.235213][ C0] BUG: scheduling while atomic: fio/2602/0x00000102 [ 210.236701][ C0] Modules linked in: [ 210.237566][ C0] CPU: 0 PID: 2602 Comm: fio Tainted: G W 5.10.0+ #50 [ 210.239292][ C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 [ 210.241233][ C0] Call Trace: [ 210.241946][ C0] [ 210.242561][ C0] dump_stack+0x7d/0xa3 [ 210.243466][ C0] __schedule_bug.cold+0xb3/0xc2 [ 210.244539][ C0] __schedule+0x156f/0x20d0 [ 210.245518][ C0] ? io_schedule_timeout+0x140/0x140 [ 210.246660][ C0] schedule+0xd0/0x270 [ 210.247541][ C0] schedule_timeout+0x1fb/0x280 [ 210.248586][ C0] ? usleep_range+0x150/0x150 [ 210.249624][ C0] ? unpoison_range+0x3a/0x60 [ 210.250632][ C0] ? ____kasan_kmalloc.constprop.0+0x82/0xa0 [ 210.251949][ C0] ? unpoison_range+0x3a/0x60 [ 210.252958][ C0] ? __prepare_to_swait+0xa7/0x190 [ 210.254067][ C0] do_wait_for_common+0x2ab/0x370 [ 210.255158][ C0] ? usleep_range+0x150/0x150 [ 210.256192][ C0] ? bit_wait_io_timeout+0x160/0x160 [ 210.257358][ C0] ? blk_update_request+0x757/0x1150 [ 210.258582][ C0] ? _raw_spin_lock_irq+0x82/0xd0 [ 210.259674][ C0] ? _raw_read_unlock_irqrestore+0x30/0x30 [ 210.260917][ C0] wait_for_completion+0x4c/0x90 [ 210.261971][ C0] crypt_convert+0x19a6/0x4c00 [ 210.263033][ C0] ? _raw_spin_lock_irqsave+0x87/0xe0 [ 210.264193][ C0] ? kasan_set_track+0x1c/0x30 [ 210.265191][ C0] ? crypt_iv_tcw_ctr+0x4a0/0x4a0 [ 210.266283][ C0] ? kmem_cache_free+0x104/0x470 [ 210.267363][ C0] ? crypt_endio+0x91/0x180 [ 210.268327][ C0] kcryptd_crypt_read_convert+0x30e/0x420 [ 210.269565][ C0] blk_update_request+0x757/0x1150 [ 210.270563][ C0] blk_mq_end_request+0x4b/0x480 [ 210.271680][ C0] blk_done_softirq+0x21d/0x340 [ 210.272775][ C0] ? _raw_spin_lock+0x81/0xd0 [ 210.273847][ C0] ? blk_mq_stop_hw_queue+0x30/0x30 [ 210.275031][ C0] ? _raw_read_lock_irq+0x40/0x40 [ 210.276182][ C0] __do_softirq+0x190/0x611 [ 210.277203][ C0] ? handle_edge_irq+0x221/0xb60 [ 210.278340][ C0] asm_call_irq_on_stack+0x12/0x20 [ 210.279514][ C0] [ 210.280164][ C0] do_softirq_own_stack+0x37/0x40 [ 210.281281][ C0] irq_exit_rcu+0x110/0x1b0 [ 210.282286][ C0] common_interrupt+0x74/0x120 [ 210.283376][ C0] asm_common_interrupt+0x1e/0x40 [ 210.284496][ C0] RIP: 0010:_aesni_enc1+0x65/0xb0 Fix this by making crypt_convert function reentrant from the point of a single bio and make dm-crypt defer further bio processing to a workqueue, if Crypto API backlogs a request in interrupt context. Fixes: 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Ignat Korchagin Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 103 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 98 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 53791138d78bf..527b1475b7a07 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1529,13 +1529,19 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_ * Encrypt / decrypt data from one bio to another one (can be the same one) */ static blk_status_t crypt_convert(struct crypt_config *cc, - struct convert_context *ctx, bool atomic) + struct convert_context *ctx, bool atomic, bool reset_pending) { unsigned int tag_offset = 0; unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT; int r; - atomic_set(&ctx->cc_pending, 1); + /* + * if reset_pending is set we are dealing with the bio for the first time, + * else we're continuing to work on the previous bio, so don't mess with + * the cc_pending counter + */ + if (reset_pending) + atomic_set(&ctx->cc_pending, 1); while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) { @@ -1553,7 +1559,25 @@ static blk_status_t crypt_convert(struct crypt_config *cc, * but the driver request queue is full, let's wait. */ case -EBUSY: - wait_for_completion(&ctx->restart); + if (in_interrupt()) { + if (try_wait_for_completion(&ctx->restart)) { + /* + * we don't have to block to wait for completion, + * so proceed + */ + } else { + /* + * we can't wait for completion without blocking + * exit and continue processing in a workqueue + */ + ctx->r.req = NULL; + ctx->cc_sector += sector_step; + tag_offset++; + return BLK_STS_DEV_RESOURCE; + } + } else { + wait_for_completion(&ctx->restart); + } reinit_completion(&ctx->restart); fallthrough; /* @@ -1945,6 +1969,37 @@ static bool kcryptd_crypt_write_inline(struct crypt_config *cc, } } +static void kcryptd_crypt_write_continue(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + struct crypt_config *cc = io->cc; + struct convert_context *ctx = &io->ctx; + int crypt_finished; + sector_t sector = io->sector; + blk_status_t r; + + wait_for_completion(&ctx->restart); + reinit_completion(&ctx->restart); + + r = crypt_convert(cc, &io->ctx, true, false); + if (r) + io->error = r; + crypt_finished = atomic_dec_and_test(&ctx->cc_pending); + if (!crypt_finished && kcryptd_crypt_write_inline(cc, ctx)) { + /* Wait for completion signaled by kcryptd_async_done() */ + wait_for_completion(&ctx->restart); + crypt_finished = 1; + } + + /* Encryption was already finished, submit io now */ + if (crypt_finished) { + kcryptd_crypt_write_io_submit(io, 0); + io->sector = sector; + } + + crypt_dec_pending(io); +} + static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; @@ -1973,7 +2028,17 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) crypt_inc_pending(io); r = crypt_convert(cc, ctx, - test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)); + test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags), true); + /* + * Crypto API backlogged the request, because its queue was full + * and we're in softirq context, so continue from a workqueue + * (TODO: is it actually possible to be in softirq in the write path?) + */ + if (r == BLK_STS_DEV_RESOURCE) { + INIT_WORK(&io->work, kcryptd_crypt_write_continue); + queue_work(cc->crypt_queue, &io->work); + return; + } if (r) io->error = r; crypt_finished = atomic_dec_and_test(&ctx->cc_pending); @@ -1998,6 +2063,25 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io) crypt_dec_pending(io); } +static void kcryptd_crypt_read_continue(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + struct crypt_config *cc = io->cc; + blk_status_t r; + + wait_for_completion(&io->ctx.restart); + reinit_completion(&io->ctx.restart); + + r = crypt_convert(cc, &io->ctx, true, false); + if (r) + io->error = r; + + if (atomic_dec_and_test(&io->ctx.cc_pending)) + kcryptd_crypt_read_done(io); + + crypt_dec_pending(io); +} + static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) { struct crypt_config *cc = io->cc; @@ -2009,7 +2093,16 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) io->sector); r = crypt_convert(cc, &io->ctx, - test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)); + test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true); + /* + * Crypto API backlogged the request, because its queue was full + * and we're in softirq context, so continue from a workqueue + */ + if (r == BLK_STS_DEV_RESOURCE) { + INIT_WORK(&io->work, kcryptd_crypt_read_continue); + queue_work(cc->crypt_queue, &io->work); + return; + } if (r) io->error = r; -- GitLab From d68b29584c25dbacd01ed44a3e45abb35353f1de Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 4 Jan 2021 14:59:48 +0000 Subject: [PATCH 0202/2012] dm crypt: use GFP_ATOMIC when allocating crypto requests from softirq Commit 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") made it possible for some code paths in dm-crypt to be executed in softirq context, when the underlying driver processes IO requests in interrupt/softirq context. In this case sometimes when allocating a new crypto request we may get a stacktrace like below: [ 210.103008][ C0] BUG: sleeping function called from invalid context at mm/mempool.c:381 [ 210.104746][ C0] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2602, name: fio [ 210.106599][ C0] CPU: 0 PID: 2602 Comm: fio Tainted: G W 5.10.0+ #50 [ 210.108331][ C0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 [ 210.110212][ C0] Call Trace: [ 210.110921][ C0] [ 210.111527][ C0] dump_stack+0x7d/0xa3 [ 210.112411][ C0] ___might_sleep.cold+0x122/0x151 [ 210.113527][ C0] mempool_alloc+0x16b/0x2f0 [ 210.114524][ C0] ? __queue_work+0x515/0xde0 [ 210.115553][ C0] ? mempool_resize+0x700/0x700 [ 210.116586][ C0] ? crypt_endio+0x91/0x180 [ 210.117479][ C0] ? blk_update_request+0x757/0x1150 [ 210.118513][ C0] ? blk_mq_end_request+0x4b/0x480 [ 210.119572][ C0] ? blk_done_softirq+0x21d/0x340 [ 210.120628][ C0] ? __do_softirq+0x190/0x611 [ 210.121626][ C0] crypt_convert+0x29f9/0x4c00 [ 210.122668][ C0] ? _raw_spin_lock_irqsave+0x87/0xe0 [ 210.123824][ C0] ? kasan_set_track+0x1c/0x30 [ 210.124858][ C0] ? crypt_iv_tcw_ctr+0x4a0/0x4a0 [ 210.125930][ C0] ? kmem_cache_free+0x104/0x470 [ 210.126973][ C0] ? crypt_endio+0x91/0x180 [ 210.127947][ C0] kcryptd_crypt_read_convert+0x30e/0x420 [ 210.129165][ C0] blk_update_request+0x757/0x1150 [ 210.130231][ C0] blk_mq_end_request+0x4b/0x480 [ 210.131294][ C0] blk_done_softirq+0x21d/0x340 [ 210.132332][ C0] ? _raw_spin_lock+0x81/0xd0 [ 210.133289][ C0] ? blk_mq_stop_hw_queue+0x30/0x30 [ 210.134399][ C0] ? _raw_read_lock_irq+0x40/0x40 [ 210.135458][ C0] __do_softirq+0x190/0x611 [ 210.136409][ C0] ? handle_edge_irq+0x221/0xb60 [ 210.137447][ C0] asm_call_irq_on_stack+0x12/0x20 [ 210.138507][ C0] [ 210.139118][ C0] do_softirq_own_stack+0x37/0x40 [ 210.140191][ C0] irq_exit_rcu+0x110/0x1b0 [ 210.141151][ C0] common_interrupt+0x74/0x120 [ 210.142171][ C0] asm_common_interrupt+0x1e/0x40 Fix this by allocating crypto requests with GFP_ATOMIC mask in interrupt context. Fixes: 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") Cc: stable@vger.kernel.org # v5.9+ Reported-by: Maciej S. Szmigiero Signed-off-by: Ignat Korchagin Acked-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 527b1475b7a07..d2d6d3000f5bd 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1454,13 +1454,16 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc, static void kcryptd_async_done(struct crypto_async_request *async_req, int error); -static void crypt_alloc_req_skcipher(struct crypt_config *cc, +static int crypt_alloc_req_skcipher(struct crypt_config *cc, struct convert_context *ctx) { unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); - if (!ctx->r.req) - ctx->r.req = mempool_alloc(&cc->req_pool, GFP_NOIO); + if (!ctx->r.req) { + ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO); + if (!ctx->r.req) + return -ENOMEM; + } skcipher_request_set_tfm(ctx->r.req, cc->cipher_tfm.tfms[key_index]); @@ -1471,13 +1474,18 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc, skcipher_request_set_callback(ctx->r.req, CRYPTO_TFM_REQ_MAY_BACKLOG, kcryptd_async_done, dmreq_of_req(cc, ctx->r.req)); + + return 0; } -static void crypt_alloc_req_aead(struct crypt_config *cc, +static int crypt_alloc_req_aead(struct crypt_config *cc, struct convert_context *ctx) { - if (!ctx->r.req_aead) - ctx->r.req_aead = mempool_alloc(&cc->req_pool, GFP_NOIO); + if (!ctx->r.req) { + ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO); + if (!ctx->r.req) + return -ENOMEM; + } aead_request_set_tfm(ctx->r.req_aead, cc->cipher_tfm.tfms_aead[0]); @@ -1488,15 +1496,17 @@ static void crypt_alloc_req_aead(struct crypt_config *cc, aead_request_set_callback(ctx->r.req_aead, CRYPTO_TFM_REQ_MAY_BACKLOG, kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead)); + + return 0; } -static void crypt_alloc_req(struct crypt_config *cc, +static int crypt_alloc_req(struct crypt_config *cc, struct convert_context *ctx) { if (crypt_integrity_aead(cc)) - crypt_alloc_req_aead(cc, ctx); + return crypt_alloc_req_aead(cc, ctx); else - crypt_alloc_req_skcipher(cc, ctx); + return crypt_alloc_req_skcipher(cc, ctx); } static void crypt_free_req_skcipher(struct crypt_config *cc, @@ -1545,7 +1555,12 @@ static blk_status_t crypt_convert(struct crypt_config *cc, while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) { - crypt_alloc_req(cc, ctx); + r = crypt_alloc_req(cc, ctx); + if (r) { + complete(&ctx->restart); + return BLK_STS_DEV_RESOURCE; + } + atomic_inc(&ctx->cc_pending); if (crypt_integrity_aead(cc)) -- GitLab From a0a6df9afcaf439a6b4c88a3b522e3d05fdef46f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 4 Jan 2021 15:25:34 -0500 Subject: [PATCH 0203/2012] umount(2): move the flag validity checks first Unfortunately, there's userland code that used to rely upon these checks being done before anything else to check for UMOUNT_NOFOLLOW support. That broke in 41525f56e256 ("fs: refactor ksys_umount"). Separate those from the rest of checks and move them to ksys_umount(); unlike everything else in there, this can be sanely done there. Reported-by: Sargun Dhillon Fixes: 41525f56e256 ("fs: refactor ksys_umount") Signed-off-by: Al Viro --- fs/namespace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index d2db7dfe232b3..9d33909d0f9e3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1713,8 +1713,6 @@ static int can_umount(const struct path *path, int flags) { struct mount *mnt = real_mount(path->mnt); - if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) - return -EINVAL; if (!may_mount()) return -EPERM; if (path->dentry != path->mnt->mnt_root) @@ -1728,6 +1726,7 @@ static int can_umount(const struct path *path, int flags) return 0; } +// caller is responsible for flags being sane int path_umount(struct path *path, int flags) { struct mount *mnt = real_mount(path->mnt); @@ -1749,6 +1748,10 @@ static int ksys_umount(char __user *name, int flags) struct path path; int ret; + // basic validity checks done first + if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) + return -EINVAL; + if (!(flags & UMOUNT_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; ret = user_path_at(AT_FDCWD, name, lookup_flags, &path); -- GitLab From 81b6d05ccad4f3d8a9dfb091fb46ad6978ee40e4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 4 Jan 2021 20:36:35 +0000 Subject: [PATCH 0204/2012] io_uring: synchronise IOPOLL on task_submit fail io_req_task_submit() might be called for IOPOLL, do the fail path under uring_lock to comply with IOPOLL synchronisation based solely on it. Cc: stable@vger.kernel.org # 5.5+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ca46f314640b1..5be33fd8b6bc6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2126,15 +2126,16 @@ static void io_req_task_cancel(struct callback_head *cb) static void __io_req_task_submit(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; + bool fail; - if (!__io_sq_thread_acquire_mm(ctx) && - !__io_sq_thread_acquire_files(ctx)) { - mutex_lock(&ctx->uring_lock); + fail = __io_sq_thread_acquire_mm(ctx) || + __io_sq_thread_acquire_files(ctx); + mutex_lock(&ctx->uring_lock); + if (!fail) __io_queue_sqe(req, NULL); - mutex_unlock(&ctx->uring_lock); - } else { + else __io_req_task_cancel(req, -EFAULT); - } + mutex_unlock(&ctx->uring_lock); } static void io_req_task_submit(struct callback_head *cb) -- GitLab From 6c503150ae33ee19036255cfda0998463613352c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 4 Jan 2021 20:36:36 +0000 Subject: [PATCH 0205/2012] io_uring: patch up IOPOLL overflow_flush sync IOPOLL skips completion locking but keeps it under uring_lock, thus io_cqring_overflow_flush() and so io_cqring_events() need additional locking with uring_lock in some cases for IOPOLL. Remove __io_cqring_overflow_flush() from io_cqring_events(), introduce a wrapper around flush doing needed synchronisation and call it by hand. Cc: stable@vger.kernel.org # 5.5+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 78 +++++++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5be33fd8b6bc6..445035b24a507 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1713,9 +1713,9 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) } /* Returns true if there are no backlogged entries after the flush */ -static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, - struct task_struct *tsk, - struct files_struct *files) +static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, + struct task_struct *tsk, + struct files_struct *files) { struct io_rings *rings = ctx->rings; struct io_kiocb *req, *tmp; @@ -1768,6 +1768,20 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, return all_flushed; } +static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, + struct task_struct *tsk, + struct files_struct *files) +{ + if (test_bit(0, &ctx->cq_check_overflow)) { + /* iopoll syncs against uring_lock, not completion_lock */ + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_lock(&ctx->uring_lock); + __io_cqring_overflow_flush(ctx, force, tsk, files); + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_unlock(&ctx->uring_lock); + } +} + static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) { struct io_ring_ctx *ctx = req->ctx; @@ -2314,20 +2328,8 @@ static void io_double_put_req(struct io_kiocb *req) io_free_req(req); } -static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush) +static unsigned io_cqring_events(struct io_ring_ctx *ctx) { - if (test_bit(0, &ctx->cq_check_overflow)) { - /* - * noflush == true is from the waitqueue handler, just ensure - * we wake up the task, and the next invocation will flush the - * entries. We cannot safely to it from here. - */ - if (noflush) - return -1U; - - io_cqring_overflow_flush(ctx, false, NULL, NULL); - } - /* See comment at the top of this file */ smp_rmb(); return __io_cqring_events(ctx); @@ -2552,7 +2554,9 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) * If we do, we can potentially be spinning for commands that * already triggered a CQE (eg in error). */ - if (io_cqring_events(ctx, false)) + if (test_bit(0, &ctx->cq_check_overflow)) + __io_cqring_overflow_flush(ctx, false, NULL, NULL); + if (io_cqring_events(ctx)) break; /* @@ -6827,7 +6831,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) /* if we have a backlog and couldn't flush it all, return BUSY */ if (test_bit(0, &ctx->sq_check_overflow)) { - if (!io_cqring_overflow_flush(ctx, false, NULL, NULL)) + if (!__io_cqring_overflow_flush(ctx, false, NULL, NULL)) return -EBUSY; } @@ -7090,7 +7094,7 @@ struct io_wait_queue { unsigned nr_timeouts; }; -static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush) +static inline bool io_should_wake(struct io_wait_queue *iowq) { struct io_ring_ctx *ctx = iowq->ctx; @@ -7099,7 +7103,7 @@ static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush) * started waiting. For timeouts, we always want to return to userspace, * regardless of event count. */ - return io_cqring_events(ctx, noflush) >= iowq->to_wait || + return io_cqring_events(ctx) >= iowq->to_wait || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; } @@ -7109,11 +7113,13 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue, wq); - /* use noflush == true, as we can't safely rely on locking context */ - if (!io_should_wake(iowq, true)) - return -1; - - return autoremove_wake_function(curr, mode, wake_flags, key); + /* + * Cannot safely flush overflowed CQEs from here, ensure we wake up + * the task, and the next invocation will do it. + */ + if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->cq_check_overflow)) + return autoremove_wake_function(curr, mode, wake_flags, key); + return -1; } static int io_run_task_work_sig(void) @@ -7150,7 +7156,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, int ret = 0; do { - if (io_cqring_events(ctx, false) >= min_events) + io_cqring_overflow_flush(ctx, false, NULL, NULL); + if (io_cqring_events(ctx) >= min_events) return 0; if (!io_run_task_work()) break; @@ -7178,6 +7185,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); trace_io_uring_cqring_wait(ctx, min_events); do { + io_cqring_overflow_flush(ctx, false, NULL, NULL); prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, TASK_INTERRUPTIBLE); /* make sure we run task_work before checking for signals */ @@ -7186,8 +7194,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, continue; else if (ret < 0) break; - if (io_should_wake(&iowq, false)) + if (io_should_wake(&iowq)) break; + if (test_bit(0, &ctx->cq_check_overflow)) + continue; if (uts) { timeout = schedule_timeout(timeout); if (timeout == 0) { @@ -8625,7 +8635,8 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) smp_rmb(); if (!io_sqring_full(ctx)) mask |= EPOLLOUT | EPOLLWRNORM; - if (io_cqring_events(ctx, false)) + io_cqring_overflow_flush(ctx, false, NULL, NULL); + if (io_cqring_events(ctx)) mask |= EPOLLIN | EPOLLRDNORM; return mask; @@ -8683,7 +8694,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) /* if force is set, the ring is going away. always drop after that */ ctx->cq_overflow_flushed = 1; if (ctx->rings) - io_cqring_overflow_flush(ctx, true, NULL, NULL); + __io_cqring_overflow_flush(ctx, true, NULL, NULL); mutex_unlock(&ctx->uring_lock); io_kill_timeouts(ctx, NULL, NULL); @@ -8857,9 +8868,7 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, } io_cancel_defer_files(ctx, task, files); - io_ring_submit_lock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); io_cqring_overflow_flush(ctx, true, task, files); - io_ring_submit_unlock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); if (!files) __io_uring_cancel_task_requests(ctx, task); @@ -9195,13 +9204,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, */ ret = 0; if (ctx->flags & IORING_SETUP_SQPOLL) { - if (!list_empty_careful(&ctx->cq_overflow_list)) { - bool needs_lock = ctx->flags & IORING_SETUP_IOPOLL; + io_cqring_overflow_flush(ctx, false, NULL, NULL); - io_ring_submit_lock(ctx, needs_lock); - io_cqring_overflow_flush(ctx, false, NULL, NULL); - io_ring_submit_unlock(ctx, needs_lock); - } if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); if (flags & IORING_ENTER_SQ_WAIT) -- GitLab From de7f1d9e99d8b99e4e494ad8fcd91f0c4c5c9357 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 4 Jan 2021 20:43:29 +0000 Subject: [PATCH 0206/2012] io_uring: drop file refs after task cancel io_uring fds marked O_CLOEXEC and we explicitly cancel all requests before going through exec, so we don't want to leave task's file references to not our anymore io_uring instances. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 445035b24a507..85de42c424335 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8958,6 +8958,15 @@ static void io_uring_attempt_task_drop(struct file *file) io_uring_del_task_file(file); } +static void io_uring_remove_task_files(struct io_uring_task *tctx) +{ + struct file *file; + unsigned long index; + + xa_for_each(&tctx->xa, index, file) + io_uring_del_task_file(file); +} + void __io_uring_files_cancel(struct files_struct *files) { struct io_uring_task *tctx = current->io_uring; @@ -8966,16 +8975,12 @@ void __io_uring_files_cancel(struct files_struct *files) /* make sure overflow events are dropped */ atomic_inc(&tctx->in_idle); - - xa_for_each(&tctx->xa, index, file) { - struct io_ring_ctx *ctx = file->private_data; - - io_uring_cancel_task_requests(ctx, files); - if (files) - io_uring_del_task_file(file); - } - + xa_for_each(&tctx->xa, index, file) + io_uring_cancel_task_requests(file->private_data, files); atomic_dec(&tctx->in_idle); + + if (files) + io_uring_remove_task_files(tctx); } static s64 tctx_inflight(struct io_uring_task *tctx) @@ -9038,6 +9043,8 @@ void __io_uring_task_cancel(void) } while (1); atomic_dec(&tctx->in_idle); + + io_uring_remove_task_files(tctx); } static int io_uring_flush(struct file *file, void *data) -- GitLab From 90df08538c07b7135703358a0c8c08d97889a704 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 4 Jan 2021 20:43:30 +0000 Subject: [PATCH 0207/2012] io_uring: cancel more aggressively in exit_work While io_ring_exit_work() is running new requests of all sorts may be issued, so it should do a bit more to cancel them, otherwise they may just get stuck. e.g. in io-wq, in poll lists, etc. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 85de42c424335..5bccb235271f7 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -992,6 +992,9 @@ enum io_mem_account { ACCT_PINNED, }; +static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, + struct task_struct *task); + static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node); static struct fixed_file_ref_node *alloc_fixed_file_ref_node( struct io_ring_ctx *ctx); @@ -8675,7 +8678,7 @@ static void io_ring_exit_work(struct work_struct *work) * as nobody else will be looking for them. */ do { - io_iopoll_try_reap_events(ctx); + __io_uring_cancel_task_requests(ctx, NULL); } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)); io_ring_ctx_free(ctx); } @@ -8830,9 +8833,11 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, enum io_wq_cancel cret; bool ret = false; - cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); - if (cret != IO_WQ_CANCEL_NOTFOUND) - ret = true; + if (ctx->io_wq) { + cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, + &cancel, true); + ret |= (cret != IO_WQ_CANCEL_NOTFOUND); + } /* SQPOLL thread does its own polling */ if (!(ctx->flags & IORING_SETUP_SQPOLL)) { -- GitLab From 6337c2353a069b6f1276dc35421e421ef6c1ead9 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Wed, 2 Dec 2020 19:05:58 +0100 Subject: [PATCH 0208/2012] ARM: dts: imx6qdl-kontron-samx6i: fix pwms for lcd-backlight MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pwms property have to specify the no-/inverted flag since commit fa28d8212ede ("ARM: dts: imx: default to #pwm-cells = <3> in the SoC dtsi files"). Fixes: fa28d8212ede ("ARM: dts: imx: default to #pwm-cells = <3> in the SoC dtsi files") Signed-off-by: Marco Felsch Reviewed-by: Uwe Kleine-König Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi index d6df598bd1c29..36c2f0d9ce164 100644 --- a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi +++ b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi @@ -137,7 +137,7 @@ lcd_backlight: lcd-backlight { compatible = "pwm-backlight"; - pwms = <&pwm4 0 5000000>; + pwms = <&pwm4 0 5000000 0>; pwm-names = "LCD_BKLT_PWM"; brightness-levels = <0 10 20 30 40 50 60 70 80 90 100>; -- GitLab From 75353bcd2184010f08a3ed2f0da019bd9d604e1e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 24 Dec 2020 15:13:57 +0000 Subject: [PATCH 0209/2012] drm/i915: clear the shadow batch The shadow batch is an internal object, which doesn't have any page clearing, and since the batch_len can be smaller than the object, we should take care to clear it. Testcase: igt/gen9_exec_parse/shadow-peek Fixes: 4f7af1948abc ("drm/i915: Support ro ppgtt mapped cmdparser shadow buffers") Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201224151358.401345-1-matthew.auld@intel.com Cc: stable@vger.kernel.org (cherry picked from commit eeb52ee6c4a429ec301faf1dc48988744960786e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_cmd_parser.c | 27 +++++++++----------------- 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 93265951fdbbd..b0899b665e852 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1166,7 +1166,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, } } if (IS_ERR(src)) { - unsigned long x, n; + unsigned long x, n, remain; void *ptr; /* @@ -1177,14 +1177,15 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, * We don't care about copying too much here as we only * validate up to the end of the batch. */ + remain = length; if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - length = round_up(length, + remain = round_up(remain, boot_cpu_data.x86_clflush_size); ptr = dst; x = offset_in_page(offset); - for (n = offset >> PAGE_SHIFT; length; n++) { - int len = min(length, PAGE_SIZE - x); + for (n = offset >> PAGE_SHIFT; remain; n++) { + int len = min(remain, PAGE_SIZE - x); src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); if (needs_clflush) @@ -1193,13 +1194,15 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, kunmap_atomic(src); ptr += len; - length -= len; + remain -= len; x = 0; } } i915_gem_object_unpin_pages(src_obj); + memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32)); + /* dst_obj is returned with vmap pinned */ return dst; } @@ -1392,11 +1395,6 @@ static unsigned long *alloc_whitelist(u32 batch_length) #define LENGTH_BIAS 2 -static bool shadow_needs_clflush(struct drm_i915_gem_object *obj) -{ - return !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); -} - /** * intel_engine_cmd_parser() - parse a batch buffer for privilege violations * @engine: the engine on which the batch is to execute @@ -1538,16 +1536,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, ret = 0; /* allow execution */ } } - - if (shadow_needs_clflush(shadow->obj)) - drm_clflush_virt_range(batch_end, 8); } - if (shadow_needs_clflush(shadow->obj)) { - void *ptr = page_mask_bits(shadow->obj->mm.mapping); - - drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); - } + i915_gem_object_flush_map(shadow->obj); if (!IS_ERR_OR_NULL(jump_whitelist)) kfree(jump_whitelist); -- GitLab From 641382e9b44fba81a0778e1914ee35b8471121f9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 24 Dec 2020 15:13:58 +0000 Subject: [PATCH 0210/2012] drm/i915: clear the gpu reloc batch The reloc batch is short lived but can exist in the user visible ppGTT, and since it's backed by an internal object, which lacks page clearing, we should take care to clear it upfront. Signed-off-by: Matthew Auld Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201224151358.401345-2-matthew.auld@intel.com Cc: stable@vger.kernel.org (cherry picked from commit 26ebc511e799f621357982ccc37a7987a56a00f4) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index bcc80f428172b..bd3046e5a9348 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1046,7 +1046,7 @@ static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cach GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; - __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1)); + i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); intel_gt_chipset_flush(cache->rq->engine->gt); @@ -1296,6 +1296,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto err_pool; } + memset32(cmd, 0, pool->obj->base.size / sizeof(u32)); + batch = i915_vma_instance(pool->obj, vma->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); -- GitLab From 557862535c2cad6de6f6fb12312b7a6d09c06407 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 29 Dec 2020 12:08:28 +0000 Subject: [PATCH 0211/2012] drm/i915/gt: Define guc firmware blob for older Cometlakes When splitting the Coffeelake define to also identify Cometlakes, I missed the double fw_def for Coffeelake. That is only newer Cometlakes use the cml specific guc firmware, older Cometlakes should use kbl firmware. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2859 Fixes: 5f4ae2704d59 ("drm/i915: Identify Cometlake platform") Signed-off-by: Chris Wilson Cc: # v5.9+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20201229120828.29931-1-chris@chris-wilson.co.uk (cherry picked from commit 70960ab27542d8dc322f909f516391f331fbd3f1) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 180c23e2e25e4..602f1a0bc5871 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -53,6 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, fw_def(ELKHARTLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 49, 0, 1), huc_def(icl, 9, 0, 0)) \ fw_def(COMETLAKE, 5, guc_def(cml, 49, 0, 1), huc_def(cml, 4, 0, 0)) \ + fw_def(COMETLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ fw_def(COFFEELAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ fw_def(GEMINILAKE, 0, guc_def(glk, 49, 0, 1), huc_def(glk, 4, 0, 0)) \ fw_def(KABYLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ -- GitLab From 9397d66212cdf7a21c66523f1583e5d63a609e84 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 30 Dec 2020 20:23:09 +0000 Subject: [PATCH 0212/2012] drm/i915/dp: Track pm_qos per connector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since multiple connectors may run intel_dp_aux_xfer conncurrently, a single global pm_qos does not suffice. (One connector may disable the dma-latency boost prematurely while the second is still depending on it.) Instead of a single global pm_qos, track the pm_qos request for each intel_dp. v2: Move the pm_qos setup/teardown to intel_dp_aux_init/fini Fixes: 9ee32fea5fe8 ("drm/i915: irq-drive the dp aux communication") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Imre Deak Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20201230202309.23982-1-chris@chris-wilson.co.uk (cherry picked from commit b3304591f14b437b6bccd8dbff06006c11837031) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_display_types.h | 3 +++ drivers/gpu/drm/i915/display/intel_dp.c | 8 ++++++-- drivers/gpu/drm/i915/i915_drv.c | 5 ----- drivers/gpu/drm/i915/i915_drv.h | 3 --- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index ce82d654d0f24..34d78c654df3b 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1436,6 +1436,9 @@ struct intel_dp { bool ycbcr_444_to_420; } dfp; + /* To control wakeup latency, e.g. for irq-driven dp aux transfers. */ + struct pm_qos_request pm_qos; + /* Display stream compression testing */ bool force_dsc_en; diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 2165398d2c7cd..37f1a10fd0217 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1489,7 +1489,7 @@ intel_dp_aux_xfer(struct intel_dp *intel_dp, * lowest possible wakeup latency and so prevent the cpu from going into * deep sleep states. */ - cpu_latency_qos_update_request(&i915->pm_qos, 0); + cpu_latency_qos_update_request(&intel_dp->pm_qos, 0); intel_dp_check_edp(intel_dp); @@ -1622,7 +1622,7 @@ done: ret = recv_bytes; out: - cpu_latency_qos_update_request(&i915->pm_qos, PM_QOS_DEFAULT_VALUE); + cpu_latency_qos_update_request(&intel_dp->pm_qos, PM_QOS_DEFAULT_VALUE); if (vdd) edp_panel_vdd_off(intel_dp, false); @@ -1898,6 +1898,9 @@ static i915_reg_t tgl_aux_data_reg(struct intel_dp *intel_dp, int index) static void intel_dp_aux_fini(struct intel_dp *intel_dp) { + if (cpu_latency_qos_request_active(&intel_dp->pm_qos)) + cpu_latency_qos_remove_request(&intel_dp->pm_qos); + kfree(intel_dp->aux.name); } @@ -1950,6 +1953,7 @@ intel_dp_aux_init(struct intel_dp *intel_dp) encoder->base.name); intel_dp->aux.transfer = intel_dp_aux_transfer; + cpu_latency_qos_add_request(&intel_dp->pm_qos, PM_QOS_DEFAULT_VALUE); } bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 320856b665a17..88ad754962af1 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -578,8 +578,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) pci_set_master(pdev); - cpu_latency_qos_add_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE); - intel_gt_init_workarounds(dev_priv); /* On the 945G/GM, the chipset reports the MSI capability on the @@ -626,7 +624,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) err_msi: if (pdev->msi_enabled) pci_disable_msi(pdev); - cpu_latency_qos_remove_request(&dev_priv->pm_qos); err_mem_regions: intel_memory_regions_driver_release(dev_priv); err_ggtt: @@ -648,8 +645,6 @@ static void i915_driver_hw_remove(struct drm_i915_private *dev_priv) if (pdev->msi_enabled) pci_disable_msi(pdev); - - cpu_latency_qos_remove_request(&dev_priv->pm_qos); } /** diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0a3ee4f9dc0a7..632c713227dc7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -891,9 +891,6 @@ struct drm_i915_private { bool display_irqs_enabled; - /* To control wakeup latency, e.g. for irq-driven dp aux transfers. */ - struct pm_qos_request pm_qos; - /* Sideband mailbox protection */ struct mutex sb_lock; struct pm_qos_request sb_qos; -- GitLab From 05f6f7271a38c482c5021967433f7b698e102c45 Mon Sep 17 00:00:00 2001 From: Qii Wang Date: Thu, 24 Dec 2020 20:26:07 +0800 Subject: [PATCH 0213/2012] i2c: mediatek: Fix apdma and i2c hand-shake timeout With the apdma remove hand-shake signal, it requirs special operation timing to reset i2c manually, otherwise the interrupt will not be triggered, i2c transmission will be timeout. Fixes: 8426fe70cfa4("i2c: mediatek: Add apdma sync in i2c driver") Signed-off-by: Qii Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 33de99b7bc20c..0818d3e507347 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -38,6 +38,7 @@ #define I2C_IO_CONFIG_OPEN_DRAIN 0x0003 #define I2C_IO_CONFIG_PUSH_PULL 0x0000 #define I2C_SOFT_RST 0x0001 +#define I2C_HANDSHAKE_RST 0x0020 #define I2C_FIFO_ADDR_CLR 0x0001 #define I2C_DELAY_LEN 0x0002 #define I2C_TIME_CLR_VALUE 0x0000 @@ -45,6 +46,7 @@ #define I2C_WRRD_TRANAC_VALUE 0x0002 #define I2C_RD_TRANAC_VALUE 0x0001 #define I2C_SCL_MIS_COMP_VALUE 0x0000 +#define I2C_CHN_CLR_FLAG 0x0000 #define I2C_DMA_CON_TX 0x0000 #define I2C_DMA_CON_RX 0x0001 @@ -54,7 +56,9 @@ #define I2C_DMA_START_EN 0x0001 #define I2C_DMA_INT_FLAG_NONE 0x0000 #define I2C_DMA_CLR_FLAG 0x0000 +#define I2C_DMA_WARM_RST 0x0001 #define I2C_DMA_HARD_RST 0x0002 +#define I2C_DMA_HANDSHAKE_RST 0x0004 #define MAX_SAMPLE_CNT_DIV 8 #define MAX_STEP_CNT_DIV 64 @@ -475,11 +479,24 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c) { u16 control_reg; - writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST); - udelay(50); - writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); - - mtk_i2c_writew(i2c, I2C_SOFT_RST, OFFSET_SOFTRESET); + if (i2c->dev_comp->dma_sync) { + writel(I2C_DMA_WARM_RST, i2c->pdmabase + OFFSET_RST); + udelay(10); + writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); + udelay(10); + writel(I2C_DMA_HANDSHAKE_RST | I2C_DMA_HARD_RST, + i2c->pdmabase + OFFSET_RST); + mtk_i2c_writew(i2c, I2C_HANDSHAKE_RST | I2C_SOFT_RST, + OFFSET_SOFTRESET); + udelay(10); + writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); + mtk_i2c_writew(i2c, I2C_CHN_CLR_FLAG, OFFSET_SOFTRESET); + } else { + writel(I2C_DMA_HARD_RST, i2c->pdmabase + OFFSET_RST); + udelay(50); + writel(I2C_DMA_CLR_FLAG, i2c->pdmabase + OFFSET_RST); + mtk_i2c_writew(i2c, I2C_SOFT_RST, OFFSET_SOFTRESET); + } /* Set ioconfig */ if (i2c->use_push_pull) -- GitLab From d1c5246e08eb64991001d97a3bd119c93edbc79a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 2 Dec 2020 22:28:12 -0800 Subject: [PATCH 0214/2012] x86/mm: Fix leak of pmd ptlock Commit 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") introduced a new location where a pmd was released, but neglected to run the pmd page destructor. In fact, this happened previously for a different pmd release path and was fixed by commit: c283610e44ec ("x86, mm: do not leak page->ptl for pmd page tables"). This issue was hidden until recently because the failure mode is silent, but commit: b2b29d6d0119 ("mm: account PMD tables like PTE tables") turns the failure mode into this signature: BUG: Bad page state in process lt-pmem-ns pfn:15943d page:000000007262ed7b refcount:0 mapcount:-1024 mapping:0000000000000000 index:0x0 pfn:0x15943d flags: 0xaffff800000000() raw: 00affff800000000 dead000000000100 0000000000000000 0000000000000000 raw: 0000000000000000 ffff913a029bcc08 00000000fffffbff 0000000000000000 page dumped because: nonzero mapcount [..] dump_stack+0x8b/0xb0 bad_page.cold+0x63/0x94 free_pcp_prepare+0x224/0x270 free_unref_page+0x18/0xd0 pud_free_pmd_page+0x146/0x160 ioremap_pud_range+0xe3/0x350 ioremap_page_range+0x108/0x160 __ioremap_caller.constprop.0+0x174/0x2b0 ? memremap+0x7a/0x110 memremap+0x7a/0x110 devm_memremap+0x53/0xa0 pmem_attach_disk+0x4ed/0x530 [nd_pmem] ? __devm_release_region+0x52/0x80 nvdimm_bus_probe+0x85/0x210 [libnvdimm] Given this is a repeat occurrence it seemed prudent to look for other places where this destructor might be missing and whether a better helper is needed. try_to_free_pmd_page() looks like a candidate, but testing with setting up and tearing down pmd mappings via the dax unit tests is thus far not triggering the failure. As for a better helper pmd_free() is close, but it is a messy fit due to requiring an @mm arg. Also, ___pmd_free_tlb() wants to call paravirt_tlb_remove_table() instead of free_page(), so open-coded pgtable_pmd_page_dtor() seems the best way forward for now. Debugged together with Matthew Wilcox . Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") Signed-off-by: Dan Williams Signed-off-by: Borislav Petkov Tested-by: Yi Zhang Acked-by: Peter Zijlstra (Intel) Cc: Link: https://lkml.kernel.org/r/160697689204.605323.17629854984697045602.stgit@dwillia2-desk3.amr.corp.intel.com --- arch/x86/mm/pgtable.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index dfd82f51ba66b..f6a9e2e366425 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -829,6 +829,8 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) } free_page((unsigned long)pmd_sv); + + pgtable_pmd_page_dtor(virt_to_page(pmd)); free_page((unsigned long)pmd); return 1; -- GitLab From 4cc99d03757df10a4064ba28bf6021406b04d6a9 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 5 Jan 2021 10:56:51 +0800 Subject: [PATCH 0215/2012] irqchip/loongson-liointc: Fix build warnings Fix build warnings as below: >> drivers/irqchip/irq-loongson-liointc.c:134:12: warning: no previous prototype for 'liointc_of_init' [-Wmissing-prototypes] 134 | int __init liointc_of_init(struct device_node *node, | ^~~~~~~~~~~~~~~ Fixes: dbb152267908c4b2c3639492a94 ("irqchip: Add driver for Loongson I/O Local Interrupt Controller") Reported-by: kernel test robot Signed-off-by: Huacai Chen Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210105025651.772024-1-chenhuacai@loongson.cn --- drivers/irqchip/irq-loongson-liointc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c index 9ed1bc4736634..09b91b81851cc 100644 --- a/drivers/irqchip/irq-loongson-liointc.c +++ b/drivers/irqchip/irq-loongson-liointc.c @@ -142,8 +142,8 @@ static void liointc_resume(struct irq_chip_generic *gc) static const char * const parent_names[] = {"int0", "int1", "int2", "int3"}; -int __init liointc_of_init(struct device_node *node, - struct device_node *parent) +static int __init liointc_of_init(struct device_node *node, + struct device_node *parent) { struct irq_chip_generic *gc; struct irq_domain *domain; -- GitLab From 311bea3cb9ee20ef150ca76fc60a592bf6b159f5 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 17 Dec 2020 16:24:32 -0800 Subject: [PATCH 0216/2012] arm64: link with -z norelro for LLD or aarch64-elf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With GNU binutils 2.35+, linking with BFD produces warnings for vmlinux: aarch64-linux-gnu-ld: warning: -z norelro ignored BFD can produce this warning when the target emulation mode does not support RELRO program headers, and -z relro or -z norelro is passed. Alan Modra clarifies: The default linker emulation for an aarch64-linux ld.bfd is -maarch64linux, the default for an aarch64-elf linker is -maarch64elf. They are not equivalent. If you choose -maarch64elf you get an emulation that doesn't support -z relro. The ARCH=arm64 kernel prefers -maarch64elf, but may fall back to -maarch64linux based on the toolchain configuration. LLD will always create RELRO program header regardless of target emulation. To avoid the above warning when linking with BFD, pass -z norelro only when linking with LLD or with -maarch64linux. Fixes: 3b92fa7485eb ("arm64: link with -z norelro regardless of CONFIG_RELOCATABLE") Fixes: 3bbd3db86470 ("arm64: relocatable: fix inconsistencies in linker script and options") Cc: # 5.0.x- Reported-by: kernelci.org bot Reported-by: Quentin Perret Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Acked-by: Ard Biesheuvel Cc: Alan Modra Cc: Fāng-ruì Sòng Link: https://lore.kernel.org/r/20201218002432.788499-1-ndesaulniers@google.com Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 6be9b37502503..90309208bb28d 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=--no-undefined -X -z norelro +LDFLAGS_vmlinux :=--no-undefined -X ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour @@ -115,16 +115,20 @@ KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ # Prefer the baremetal ELF build target, but not all toolchains include # it so fall back to the standard linux version if needed. -KBUILD_LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb) +KBUILD_LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb -z norelro) UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian CHECKFLAGS += -D__AARCH64EL__ # Same as above, prefer ELF but fall back to linux target if needed. -KBUILD_LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux) +KBUILD_LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux -z norelro) UTS_MACHINE := aarch64 endif +ifeq ($(CONFIG_LD_IS_LLD), y) +KBUILD_LDFLAGS += -z norelro +endif + CHECKFLAGS += -D__aarch64__ ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y) -- GitLab From 96ebc9c871d8a28fb22aa758dd9188a4732df482 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 4 Jan 2021 20:07:15 -0800 Subject: [PATCH 0217/2012] usb: uas: Add PNY USB Portable SSD to unusual_uas Here's another variant PNY Pro Elite USB 3.1 Gen 2 portable SSD that hangs and doesn't respond to ATA_1x pass-through commands. If it doesn't support these commands, it should respond properly to the host. Add it to the unusual uas list to be able to move forward with other operations. Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Acked-by: Oliver Neukum Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/2edc7af892d0913bf06f5b35e49ec463f03d5ed8.1609819418.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 870e9cf3d5dc4..f9677a5ec31b2 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -90,6 +90,13 @@ UNUSUAL_DEV(0x152d, 0x0578, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_BROKEN_FUA), +/* Reported-by: Thinh Nguyen */ +UNUSUAL_DEV(0x154b, 0xf00b, 0x0000, 0x9999, + "PNY", + "Pro Elite SSD", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_ATA_1X), + /* Reported-by: Thinh Nguyen */ UNUSUAL_DEV(0x154b, 0xf00d, 0x0000, 0x9999, "PNY", -- GitLab From 45ba7b195a369f35cb39094fdb32efe5908b34ad Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Mon, 4 Jan 2021 19:38:44 +0800 Subject: [PATCH 0218/2012] arm64: cpufeature: remove non-exist CONFIG_KVM_ARM_HOST Commit d82755b2e781 ("KVM: arm64: Kill off CONFIG_KVM_ARM_HOST") deletes CONFIG_KVM_ARM_HOST option, it should use CONFIG_KVM instead. Just remove CONFIG_KVM_ARM_HOST here. Fixes: d82755b2e781 ("KVM: arm64: Kill off CONFIG_KVM_ARM_HOST") Signed-off-by: Shannon Zhao Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1609760324-92271-1-git-send-email-shannon.zhao@linux.alibaba.com --- arch/arm64/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d96f4554282df..bc3549663957c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2558,7 +2558,7 @@ static void verify_hyp_capabilities(void) int parange, ipa_max; unsigned int safe_vmid_bits, vmid_bits; - if (!IS_ENABLED(CONFIG_KVM) || !IS_ENABLED(CONFIG_KVM_ARM_HOST)) + if (!IS_ENABLED(CONFIG_KVM)) return; safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); -- GitLab From c9c48bb701ba78df7d4652146b12bcf3ad716507 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Tue, 22 Dec 2020 02:47:56 +0100 Subject: [PATCH 0219/2012] speakup: Add github repository URL and bug tracker We have set up a repository for users to try newer releases more easily, and keep records of known bugs. Signed-off-by: Samuel Thibault Link: https://lore.kernel.org/r/20201222014756.ov5vi6fywylbp5n6@function Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9f..62934e7706460 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16710,6 +16710,8 @@ M: Samuel Thibault L: speakup@linux-speakup.org S: Odd Fixes W: http://www.linux-speakup.org/ +W: https://github.com/linux-speakup/speakup +B: https://github.com/linux-speakup/speakup/issues F: drivers/accessibility/speakup/ SPEAR CLOCK FRAMEWORK SUPPORT -- GitLab From dfe94d4086e40e92b1926bddcefa629b791e9b28 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 21 Dec 2020 22:55:41 -0800 Subject: [PATCH 0220/2012] x86/hyperv: Fix kexec panic/hang issues Currently the kexec kernel can panic or hang due to 2 causes: 1) hv_cpu_die() is not called upon kexec, so the hypervisor corrupts the old VP Assist Pages when the kexec kernel runs. The same issue is fixed for hibernation in commit 421f090c819d ("x86/hyperv: Suspend/resume the VP assist page for hibernation"). Now fix it for kexec. 2) hyperv_cleanup() is called too early. In the kexec path, the other CPUs are stopped in hv_machine_shutdown() -> native_machine_shutdown(), so between hv_kexec_handler() and native_machine_shutdown(), the other CPUs can still try to access the hypercall page and cause panic. The workaround "hv_hypercall_pg = NULL;" in hyperv_cleanup() is unreliabe. Move hyperv_cleanup() to a better place. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20201222065541.24312-1-decui@microsoft.com Signed-off-by: Wei Liu --- arch/x86/hyperv/hv_init.c | 4 ++++ arch/x86/include/asm/mshyperv.h | 2 ++ arch/x86/kernel/cpu/mshyperv.c | 18 ++++++++++++++++++ drivers/hv/vmbus_drv.c | 2 -- 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index e04d90af4c27c..4638a52d8eaea 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,8 @@ #include #include +int hyperv_init_cpuhp; + void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); @@ -401,6 +404,7 @@ void __init hyperv_init(void) register_syscore_ops(&hv_syscore_ops); + hyperv_init_cpuhp = cpuhp; return; remove_cpuhp_state: diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index ffc289992d1b0..30f76b9668579 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -74,6 +74,8 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {} #if IS_ENABLED(CONFIG_HYPERV) +extern int hyperv_init_cpuhp; + extern void *hv_hypercall_pg; extern void __percpu **hyperv_pcpu_input_arg; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index f628e3dc150f3..43b54bef5448f 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -135,14 +135,32 @@ static void hv_machine_shutdown(void) { if (kexec_in_progress && hv_kexec_handler) hv_kexec_handler(); + + /* + * Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor + * corrupts the old VP Assist Pages and can crash the kexec kernel. + */ + if (kexec_in_progress && hyperv_init_cpuhp > 0) + cpuhp_remove_state(hyperv_init_cpuhp); + + /* The function calls stop_other_cpus(). */ native_machine_shutdown(); + + /* Disable the hypercall page when there is only 1 active CPU. */ + if (kexec_in_progress) + hyperv_cleanup(); } static void hv_machine_crash_shutdown(struct pt_regs *regs) { if (hv_crash_handler) hv_crash_handler(regs); + + /* The function calls crash_smp_send_stop(). */ native_machine_crash_shutdown(regs); + + /* Disable the hypercall page when there is only 1 active CPU. */ + hyperv_cleanup(); } #endif /* CONFIG_KEXEC_CORE */ #endif /* CONFIG_HYPERV */ diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 502f8cd95f6d4..d491fdcee61f0 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2550,7 +2550,6 @@ static void hv_kexec_handler(void) /* Make sure conn_state is set as hv_synic_cleanup checks for it */ mb(); cpuhp_remove_state(hyperv_cpuhp_online); - hyperv_cleanup(); }; static void hv_crash_handler(struct pt_regs *regs) @@ -2566,7 +2565,6 @@ static void hv_crash_handler(struct pt_regs *regs) cpu = smp_processor_id(); hv_stimer_cleanup(cpu); hv_synic_disable_regs(cpu); - hyperv_cleanup(); }; static int hv_synic_suspend(void) -- GitLab From 3fb6819f411b5a89afb5726afafacf0c4b62844f Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Mon, 28 Dec 2020 15:05:08 +0800 Subject: [PATCH 0221/2012] arm64: traps: remove duplicate include statement asm/exception.h is included more than once. Remove the one that isn't necessary. Signed-off-by: Tian Tao Link: https://lore.kernel.org/r/1609139108-10819-1-git-send-email-tiantao6@hisilicon.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 08156be755691..6895ce777e7f2 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include -- GitLab From e2bba5f92354488c331b7821d873db7c388e31aa Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 30 Dec 2020 14:19:54 -0800 Subject: [PATCH 0222/2012] arm64: vdso: disable .eh_frame_hdr via /DISCARD/ instead of --no-eh-frame-hdr Currently with ld.lld we emit an empty .eh_frame_hdr section (and a corresponding program header) into the vDSO. With ld.bfd the section is not emitted but the program header is, with p_vaddr set to 0. This can lead to unwinders attempting to interpret the data at whichever location the program header happens to point to as an unwind info header. This happens to be mostly harmless as long as the byte at that location (interpreted as a version number) has a value other than 1, causing both libgcc and LLVM libunwind to ignore the section (in libunwind's case, after printing an error message to stderr), but it could lead to worse problems if the byte happened to be 1 or the program header points to non-readable memory (e.g. if the empty section was placed at a page boundary). Instead of disabling .eh_frame_hdr via --no-eh-frame-hdr (which also has the downside of being unsupported by older versions of GNU binutils), disable it by discarding the section, and stop emitting the program header that points to it. I understand that we intend to emit valid unwind info for the vDSO at some point. Once that happens this patch can be reverted. Signed-off-by: Peter Collingbourne Acked-by: Ard Biesheuvel Acked-by: Vincenzo Frascino Link: https://linux-review.googlesource.com/id/If745fd9cadcb31b4010acbf5693727fe111b0863 Link: https://lore.kernel.org/r/20201230221954.2007257-1-pcc@google.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso/Makefile | 3 +-- arch/arm64/kernel/vdso/vdso.lds.S | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index a8f8e409e2bfb..cd9c3fa25902f 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -24,8 +24,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ - -Bsymbolic $(call ld-option, --no-eh-frame-hdr) --build-id=sha1 -n \ - $(btildflags-y) -T + -Bsymbolic --build-id=sha1 -n $(btildflags-y) -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index d808ad31e01f7..61dbb4c838ef7 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -40,9 +40,6 @@ SECTIONS PROVIDE (_etext = .); PROVIDE (etext = .); - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text - .dynamic : { *(.dynamic) } :text :dynamic .rodata : { *(.rodata*) } :text @@ -54,6 +51,7 @@ SECTIONS *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.eh_frame .eh_frame_hdr) } } @@ -66,7 +64,6 @@ PHDRS text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ note PT_NOTE FLAGS(4); /* PF_R */ - eh_frame_hdr PT_GNU_EH_FRAME; } /* -- GitLab From f34d93f30d6a72f6b15ba24b6994b746df0c30de Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 4 Jan 2021 12:15:41 +0000 Subject: [PATCH 0223/2012] arm64: kasan: Set TCR_EL1.TBID1 when KASAN_HW_TAGS is enabled Commit 49b3cf035edc ("kasan: arm64: set TCR_EL1.TBID1 when enabled") set the TBID1 bit for the KASAN_SW_TAGS configuration, freeing up 8 bits to be used by PAC. With in-kernel MTE now in mainline, also set this bit for the KASAN_HW_TAGS configuration. Signed-off-by: Catalin Marinas Cc: Peter Collingbourne Acked-by: Vincenzo Frascino Acked-by: Andrey Konovalov --- arch/arm64/mm/proc.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 37a54b57178a7..1f7ee8c8b7b81 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -46,7 +46,7 @@ #endif #ifdef CONFIG_KASAN_HW_TAGS -#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 +#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1 #else #define TCR_KASAN_HW_FLAGS 0 #endif -- GitLab From a8f7e08a81708920a928664a865208fdf451c49f Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Tue, 5 Jan 2021 08:33:11 -0800 Subject: [PATCH 0224/2012] x86/sev-es: Fix SEV-ES OUT/IN immediate opcode vc handling The IN and OUT instructions with port address as an immediate operand only use an 8-bit immediate (imm8). The current VC handler uses the entire 32-bit immediate value but these instructions only set the first bytes. Cast the operand to an u8 for that. [ bp: Massage commit message. ] Fixes: 25189d08e5168 ("x86/sev-es: Add support for handling IOIO exceptions") Signed-off-by: Peter Gonda Signed-off-by: Borislav Petkov Acked-by: David Rientjes Link: https://lkml.kernel.org/r/20210105163311.221490-1-pgonda@google.com --- arch/x86/kernel/sev-es-shared.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c index 7d04b356d44d3..cdc04d0912423 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-es-shared.c @@ -305,14 +305,14 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0xe4: case 0xe5: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (u64)insn->immediate.value << 16; + *exitinfo |= (u8)insn->immediate.value << 16; break; /* OUT immediate opcodes */ case 0xe6: case 0xe7: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (u64)insn->immediate.value << 16; + *exitinfo |= (u8)insn->immediate.value << 16; break; /* IN register opcodes */ -- GitLab From d16baa3f1453c14d680c5fee01cd122a22d0e0ce Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 5 Jan 2021 12:37:23 -0500 Subject: [PATCH 0225/2012] blk-iocost: fix NULL iocg deref from racing against initialization When initializing iocost for a queue, its rqos should be registered before the blkcg policy is activated to allow policy data initiailization to lookup the associated ioc. This unfortunately means that the rqos methods can be called on bios before iocgs are attached to all existing blkgs. While the race is theoretically possible on ioc_rqos_throttle(), it mostly happened in ioc_rqos_merge() due to the difference in how they lookup ioc. The former determines it from the passed in @rqos and then bails before dereferencing iocg if the looked up ioc is disabled, which most likely is the case if initialization is still in progress. The latter looked up ioc by dereferencing the possibly NULL iocg making it a lot more prone to actually triggering the bug. * Make ioc_rqos_merge() use the same method as ioc_rqos_throttle() to look up ioc for consistency. * Make ioc_rqos_throttle() and ioc_rqos_merge() test for NULL iocg before dereferencing it. * Explain the danger of NULL iocgs in blk_iocost_init(). Signed-off-by: Tejun Heo Reported-by: Jonathan Lemon Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Jens Axboe --- block/blk-iocost.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index ac6078a349394..98d656bdb42b7 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2551,8 +2551,8 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) bool use_debt, ioc_locked; unsigned long flags; - /* bypass IOs if disabled or for root cgroup */ - if (!ioc->enabled || !iocg->level) + /* bypass IOs if disabled, still initializing, or for root cgroup */ + if (!ioc->enabled || !iocg || !iocg->level) return; /* calculate the absolute vtime cost */ @@ -2679,14 +2679,14 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) { struct ioc_gq *iocg = blkg_to_iocg(bio->bi_blkg); - struct ioc *ioc = iocg->ioc; + struct ioc *ioc = rqos_to_ioc(rqos); sector_t bio_end = bio_end_sector(bio); struct ioc_now now; u64 vtime, abs_cost, cost; unsigned long flags; - /* bypass if disabled or for root cgroup */ - if (!ioc->enabled || !iocg->level) + /* bypass if disabled, still initializing, or for root cgroup */ + if (!ioc->enabled || !iocg || !iocg->level) return; abs_cost = calc_vtime_cost(bio, iocg, true); @@ -2863,6 +2863,12 @@ static int blk_iocost_init(struct request_queue *q) ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); + /* + * rqos must be added before activation to allow iocg_pd_init() to + * lookup the ioc from q. This means that the rqos methods may get + * called before policy activation completion, can't assume that the + * target bio has an iocg associated and need to test for NULL iocg. + */ rq_qos_add(q, rqos); ret = blkcg_activate_policy(q, &blkcg_policy_iocost); if (ret) { -- GitLab From 6d4d273588378c65915acaf7b2ee74e9dd9c130a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 10 Dec 2020 10:44:33 +0100 Subject: [PATCH 0226/2012] bfq: Fix computation of shallow depth BFQ computes number of tags it allows to be allocated for each request type based on tag bitmap. However it uses 1 << bitmap.shift as number of available tags which is wrong. 'shift' is just an internal bitmap value containing logarithm of how many bits bitmap uses in each bitmap word. Thus number of tags allowed for some request types can be far to low. Use proper bitmap.depth which has the number of tags instead. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 9e81d1052091f..9e4eb0fc1c16e 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6332,13 +6332,13 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, * limit 'something'. */ /* no more than 50% of tags for async I/O */ - bfqd->word_depths[0][0] = max((1U << bt->sb.shift) >> 1, 1U); + bfqd->word_depths[0][0] = max(bt->sb.depth >> 1, 1U); /* * no more than 75% of tags for sync writes (25% extra tags * w.r.t. async I/O, to prevent async I/O from starving sync * writes) */ - bfqd->word_depths[0][1] = max(((1U << bt->sb.shift) * 3) >> 2, 1U); + bfqd->word_depths[0][1] = max((bt->sb.depth * 3) >> 2, 1U); /* * In-word depths in case some bfq_queue is being weight- @@ -6348,9 +6348,9 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, * shortage. */ /* no more than ~18% of tags for async I/O */ - bfqd->word_depths[1][0] = max(((1U << bt->sb.shift) * 3) >> 4, 1U); + bfqd->word_depths[1][0] = max((bt->sb.depth * 3) >> 4, 1U); /* no more than ~37% of tags for sync writes (~20% extra tags) */ - bfqd->word_depths[1][1] = max(((1U << bt->sb.shift) * 6) >> 4, 1U); + bfqd->word_depths[1][1] = max((bt->sb.depth * 6) >> 4, 1U); for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) -- GitLab From 170b3bbda08852277b97f4f0516df0785c939764 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Tue, 5 Jan 2021 21:53:40 +0800 Subject: [PATCH 0227/2012] =?UTF-8?q?io=5Furing:=20Delete=20useless=20vari?= =?UTF-8?q?able=20=E2=80=98id=E2=80=99=20in=20io=5Fprep=5Fasync=5Fwork?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix follow warning: fs/io_uring.c:1523:22: warning: variable ‘id’ set but not used [-Wunused-but-set-variable] struct io_identity *id; ^~ Reported-by: Hulk Robot Signed-off-by: Ye Bin Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5bccb235271f7..dc92ca5090a39 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1523,10 +1523,8 @@ static void io_prep_async_work(struct io_kiocb *req) { const struct io_op_def *def = &io_op_defs[req->opcode]; struct io_ring_ctx *ctx = req->ctx; - struct io_identity *id; io_req_init_async(req); - id = req->work.identity; if (req->flags & REQ_F_FORCE_ASYNC) req->work.flags |= IO_WQ_WORK_CONCURRENT; -- GitLab From aebf5db917055b38f4945ed6d621d9f07a44ff30 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 21 Dec 2020 12:33:35 +0800 Subject: [PATCH 0228/2012] block: fix use-after-free in disk_part_iter_next Make sure that bdgrab() is done on the 'block_device' instance before referring to it for avoiding use-after-free. Cc: Reported-by: syzbot+825f0f9657d4e528046e@syzkaller.appspotmail.com Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/genhd.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 73faec438e49a..419548e92d82f 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -246,15 +246,18 @@ struct block_device *disk_part_iter_next(struct disk_part_iter *piter) part = rcu_dereference(ptbl->part[piter->idx]); if (!part) continue; + piter->part = bdgrab(part); + if (!piter->part) + continue; if (!bdev_nr_sectors(part) && !(piter->flags & DISK_PITER_INCL_EMPTY) && !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && - piter->idx == 0)) + piter->idx == 0)) { + bdput(piter->part); + piter->part = NULL; continue; + } - piter->part = bdgrab(part); - if (!piter->part) - continue; piter->idx += inc; break; } -- GitLab From 6775ae901ffd130d0be9c32837f88d1f9d560189 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 22 Dec 2020 17:42:32 +0100 Subject: [PATCH 0229/2012] iommu/iova: fix 'domain' typos Replace misspelled 'doamin' with 'domain' in several comments. Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201222164232.88795-1-sgarzare@redhat.com Signed-off-by: Will Deacon --- drivers/iommu/iova.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 4bb3293ae4d73..d20b8b333d30d 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -358,7 +358,7 @@ static void private_free_iova(struct iova_domain *iovad, struct iova *iova) * @iovad: - iova domain in question. * @pfn: - page frame number * This function finds and returns an iova belonging to the - * given doamin which matches the given pfn. + * given domain which matches the given pfn. */ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) { @@ -601,7 +601,7 @@ void queue_iova(struct iova_domain *iovad, EXPORT_SYMBOL_GPL(queue_iova); /** - * put_iova_domain - destroys the iova doamin + * put_iova_domain - destroys the iova domain * @iovad: - iova domain in question. * All the iova's in that domain are destroyed. */ @@ -712,9 +712,9 @@ EXPORT_SYMBOL_GPL(reserve_iova); /** * copy_reserved_iova - copies the reserved between domains - * @from: - source doamin from where to copy + * @from: - source domain from where to copy * @to: - destination domin where to copy - * This function copies reserved iova's from one doamin to + * This function copies reserved iova's from one domain to * other. */ void -- GitLab From ff2b46d7cff80d27d82f7f3252711f4ca1666129 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Tue, 5 Jan 2021 13:18:37 +0800 Subject: [PATCH 0230/2012] iommu/intel: Fix memleak in intel_irq_remapping_alloc When irq_domain_get_irq_data() or irqd_cfg() fails at i == 0, data allocated by kzalloc() has not been freed before returning, which leads to memleak. Fixes: b106ee63abcc ("irq_remapping/vt-d: Enhance Intel IR driver to support hierarchical irqdomains") Signed-off-by: Dinghao Liu Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20210105051837.32118-1-dinghao.liu@zju.edu.cn Signed-off-by: Will Deacon --- drivers/iommu/intel/irq_remapping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index aeffda92b10b7..685200a5cff0f 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -1353,6 +1353,8 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain, irq_data = irq_domain_get_irq_data(domain, virq + i); irq_cfg = irqd_cfg(irq_data); if (!irq_data || !irq_cfg) { + if (!i) + kfree(data); ret = -EINVAL; goto out_free_data; } -- GitLab From 12bc4570c14e24e6244d66466aeda994f805634b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Jan 2021 01:32:51 +0000 Subject: [PATCH 0231/2012] iommu/amd: Set iommu->int_enabled consistently when interrupts are set up When I made the INTCAPXT support stop gratuitously pretending to be MSI, I missed the fact that iommu_setup_msi() also sets the ->int_enabled flag. I missed this in the iommu_setup_intcapxt() code path, which means that a resume from suspend will try to allocate the IRQ domains again, accidentally re-enabling interrupts as it does, resulting in much sadness. Lift out the bit which sets iommu->int_enabled into the iommu_init_irq() function which is also where it gets checked. Link: https://lore.kernel.org/r/20210104132250.GE32151@zn.tnic/ Fixes: d1adcfbb520c ("iommu/amd: Fix IOMMU interrupt generation in X2APIC mode") Reported-by: Borislav Petkov Signed-off-by: David Woodhouse Tested-by: Borislav Petkov Link: https://lore.kernel.org/r/50cd5f55be8ead0937ac315cd2f5b89364f6a9a5.camel@infradead.org Signed-off-by: Will Deacon --- drivers/iommu/amd/init.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index f54cd79b43e40..6a1f7048dacc6 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1973,8 +1973,6 @@ static int iommu_setup_msi(struct amd_iommu *iommu) return r; } - iommu->int_enabled = true; - return 0; } @@ -2169,6 +2167,7 @@ static int iommu_init_irq(struct amd_iommu *iommu) if (ret) return ret; + iommu->int_enabled = true; enable_faults: iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); -- GitLab From b34f10c2dc5961021850c3c15f46a84b56a0c0e8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 5 Jan 2021 01:36:13 +0000 Subject: [PATCH 0232/2012] iommu/amd: Stop irq_remapping_select() matching when remapping is disabled The AMD IOMMU initialisation registers the IRQ remapping domain for each IOMMU before doing the final sanity check that every I/OAPIC is covered. This means that the AMD irq_remapping_select() function gets invoked even when IRQ remapping has been disabled, eventually leading to a NULL pointer dereference in alloc_irq_table(). Unfortunately, the IVRS isn't fully parsed early enough that the sanity check can be done in time to registering the IRQ domain altogether. Doing that would be nice, but is a larger and more error-prone task. The simple fix is just for irq_remapping_select() to refuse to report a match when IRQ remapping has disabled. Link: https://lore.kernel.org/lkml/ed4be9b4-24ac-7128-c522-7ef359e8185d@gmx.at Fixes: a1a785b57242 ("iommu/amd: Implement select() method on remapping irqdomain") Reported-by: Johnathan Smithinovic Signed-off-by: David Woodhouse Link: https://lore.kernel.org/r/04bbe8bca87f81a3cfa93ec4299e53f47e00e5b3.camel@infradead.org Signed-off-by: Will Deacon --- drivers/iommu/amd/iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 7e2c445a1faec..f0adbc48fd179 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3854,6 +3854,9 @@ static int irq_remapping_select(struct irq_domain *d, struct irq_fwspec *fwspec, struct amd_iommu *iommu; int devid = -1; + if (!amd_iommu_irq_remap) + return 0; + if (x86_fwspec_is_ioapic(fwspec)) devid = get_ioapic_devid(fwspec->param[0]); else if (x86_fwspec_is_hpet(fwspec)) -- GitLab From 8a48c0a3360bf2bf4f40c980d0ec216e770e58ee Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 4 Jan 2021 19:44:53 -0800 Subject: [PATCH 0233/2012] arch/arc: add copy_user_page() to to fix build error on ARC fs/dax.c uses copy_user_page() but ARC does not provide that interface, resulting in a build error. Provide copy_user_page() in . ../fs/dax.c: In function 'copy_cow_page_dax': ../fs/dax.c:702:2: error: implicit declaration of function 'copy_user_page'; did you mean 'copy_to_user_page'? [-Werror=implicit-function-declaration] Reported-by: kernel test robot Signed-off-by: Randy Dunlap Cc: Vineet Gupta Cc: linux-snps-arc@lists.infradead.org Cc: Dan Williams #Acked-by: Vineet Gupta # v1 Cc: Andrew Morton Cc: Matthew Wilcox Cc: Jan Kara Cc: linux-fsdevel@vger.kernel.org Cc: linux-nvdimm@lists.01.org #Reviewed-by: Ira Weiny # v2 Signed-off-by: Vineet Gupta --- arch/arc/include/asm/page.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 23e41e890eda7..ad9b7fe4dba36 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -10,6 +10,7 @@ #ifndef __ASSEMBLY__ #define clear_page(paddr) memset((paddr), 0, PAGE_SIZE) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) #define copy_page(to, from) memcpy((to), (from), PAGE_SIZE) struct vm_area_struct; -- GitLab From f4d9359de8ac0fb64a5ecc9c34833705eb53327b Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 5 Nov 2020 13:22:10 -0800 Subject: [PATCH 0234/2012] include/soc: remove headers for EZChip NPS NPS platform has been removed from ARC port and there are no in-tree user of it now . So RIP ! Signed-off-by: Vineet Gupta --- include/soc/nps/common.h | 172 --------------------------------------- include/soc/nps/mtm.h | 59 -------------- 2 files changed, 231 deletions(-) delete mode 100644 include/soc/nps/common.h delete mode 100644 include/soc/nps/mtm.h diff --git a/include/soc/nps/common.h b/include/soc/nps/common.h deleted file mode 100644 index 8c18dc6d3fde5..0000000000000 --- a/include/soc/nps/common.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2016, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef SOC_NPS_COMMON_H -#define SOC_NPS_COMMON_H - -#ifdef CONFIG_SMP -#define NPS_IPI_IRQ 5 -#endif - -#define NPS_HOST_REG_BASE 0xF6000000 - -#define NPS_MSU_BLKID 0x018 - -#define CTOP_INST_RSPI_GIC_0_R12 0x3C56117E -#define CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST 0x5B60 -#define CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM 0x00010422 - -#ifndef AUX_IENABLE -#define AUX_IENABLE 0x40c -#endif - -#define CTOP_AUX_IACK (0xFFFFF800 + 0x088) - -#ifndef __ASSEMBLY__ - -/* In order to increase compilation test coverage */ -#ifdef CONFIG_ARC -static inline void nps_ack_gic(void) -{ - __asm__ __volatile__ ( - " .word %0\n" - : - : "i"(CTOP_INST_RSPI_GIC_0_R12) - : "memory"); -} -#else -static inline void nps_ack_gic(void) { } -#define write_aux_reg(r, v) -#define read_aux_reg(r) 0 -#endif - -/* CPU global ID */ -struct global_id { - union { - struct { -#ifdef CONFIG_EZNPS_MTM_EXT - u32 __reserved:20, cluster:4, core:4, thread:4; -#else - u32 __reserved:24, cluster:4, core:4; -#endif - }; - u32 value; - }; -}; - -/* - * Convert logical to physical CPU IDs - * - * The conversion swap bits 1 and 2 of cluster id (out of 4 bits) - * Now quad of logical clusters id's are adjacent physically, - * and not like the id's physically came with each cluster. - * Below table is 4x4 mesh of core clusters as it layout on chip. - * Cluster ids are in format: logical (physical) - * - * ----------------- ------------------ - * 3 | 5 (3) 7 (7) | | 13 (11) 15 (15)| - * - * 2 | 4 (2) 6 (6) | | 12 (10) 14 (14)| - * ----------------- ------------------ - * 1 | 1 (1) 3 (5) | | 9 (9) 11 (13)| - * - * 0 | 0 (0) 2 (4) | | 8 (8) 10 (12)| - * ----------------- ------------------ - * 0 1 2 3 - */ -static inline int nps_cluster_logic_to_phys(int cluster) -{ -#ifdef __arc__ - __asm__ __volatile__( - " mov r3,%0\n" - " .short %1\n" - " .word %2\n" - " mov %0,r3\n" - : "+r"(cluster) - : "i"(CTOP_INST_MOV2B_FLIP_R3_B1_B2_INST), - "i"(CTOP_INST_MOV2B_FLIP_R3_B1_B2_LIMM) - : "r3"); -#endif - - return cluster; -} - -#define NPS_CPU_TO_CLUSTER_NUM(cpu) \ - ({ struct global_id gid; gid.value = cpu; \ - nps_cluster_logic_to_phys(gid.cluster); }) - -struct nps_host_reg_address { - union { - struct { - u32 base:8, cl_x:4, cl_y:4, - blkid:6, reg:8, __reserved:2; - }; - u32 value; - }; -}; - -struct nps_host_reg_address_non_cl { - union { - struct { - u32 base:7, blkid:11, reg:12, __reserved:2; - }; - u32 value; - }; -}; - -static inline void *nps_host_reg_non_cl(u32 blkid, u32 reg) -{ - struct nps_host_reg_address_non_cl reg_address; - - reg_address.value = NPS_HOST_REG_BASE; - reg_address.blkid = blkid; - reg_address.reg = reg; - - return (void *)reg_address.value; -} - -static inline void *nps_host_reg(u32 cpu, u32 blkid, u32 reg) -{ - struct nps_host_reg_address reg_address; - u32 cl = NPS_CPU_TO_CLUSTER_NUM(cpu); - - reg_address.value = NPS_HOST_REG_BASE; - reg_address.cl_x = (cl >> 2) & 0x3; - reg_address.cl_y = cl & 0x3; - reg_address.blkid = blkid; - reg_address.reg = reg; - - return (void *)reg_address.value; -} -#endif /* __ASSEMBLY__ */ - -#endif /* SOC_NPS_COMMON_H */ diff --git a/include/soc/nps/mtm.h b/include/soc/nps/mtm.h deleted file mode 100644 index d2f5e7e3703ef..0000000000000 --- a/include/soc/nps/mtm.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef SOC_NPS_MTM_H -#define SOC_NPS_MTM_H - -#define CTOP_INST_HWSCHD_OFF_R3 0x3B6F00BF -#define CTOP_INST_HWSCHD_RESTORE_R3 0x3E6F70C3 - -static inline void hw_schd_save(unsigned int *flags) -{ - __asm__ __volatile__( - " .word %1\n" - " st r3,[%0]\n" - : - : "r"(flags), "i"(CTOP_INST_HWSCHD_OFF_R3) - : "r3", "memory"); -} - -static inline void hw_schd_restore(unsigned int flags) -{ - __asm__ __volatile__( - " mov r3, %0\n" - " .word %1\n" - : - : "r"(flags), "i"(CTOP_INST_HWSCHD_RESTORE_R3) - : "r3"); -} - -#endif /* SOC_NPS_MTM_H */ -- GitLab From 2f9d9a852f426cdc56ebd5c05c2333ea2012cc97 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 29 Dec 2020 13:47:10 -0700 Subject: [PATCH 0235/2012] pinctrl: nomadik: Remove unused variable in nmk_gpio_dbg_show_one Clang warns: drivers/pinctrl/nomadik/pinctrl-nomadik.c:952:8: warning: unused variable 'wake' [-Wunused-variable] bool wake; ^ 1 warning generated. There were two wake declarations added to nmk_gpio_dbg_show_one when converting it to use irq_has_action but only one is used within its scope. Remove the unused one so there is no more warning. Fixes: f3925032d7fd ("pinctrl: nomadik: Use irq_has_action()") Reported-by: Arnd Bergmann Tested-by: Andrew Halaney Reviewed-by: Andrew Halaney Reported-by: Hulk Robot Reported-by: Ye Bin Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20201229204710.1129033-1-natechancellor@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/nomadik/pinctrl-nomadik.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index d4ea10803fd90..abfe11c7b49fb 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -949,7 +949,6 @@ static void nmk_gpio_dbg_show_one(struct seq_file *s, } else { int irq = chip->to_irq(chip, offset); const int pullidx = pull ? 1 : 0; - bool wake; int val; static const char * const pulls[] = { "none ", -- GitLab From 81bd1579b43e0e285cba667399f1b063f1ce7672 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Mon, 28 Dec 2020 17:04:25 +0800 Subject: [PATCH 0236/2012] pinctrl: mediatek: Fix fallback call path Some SoCs, eg. mt8183, are using a pinconfig operation bias_set_combo. The fallback path in mtk_pinconf_adv_pull_set() should also try this operation. Fixes: cafe19db7751 ("pinctrl: mediatek: Backward compatible to previous Mediatek's bias-pull usage") Signed-off-by: Hsin-Yi Wang Acked-by: Sean Wang Link: https://lore.kernel.org/r/20201228090425.2130569-1-hsinyi@chromium.org Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index 7aeb552d16ce9..72f17f26acd80 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -920,6 +920,10 @@ int mtk_pinconf_adv_pull_set(struct mtk_pinctrl *hw, err = hw->soc->bias_set(hw, desc, pullup); if (err) return err; + } else if (hw->soc->bias_set_combo) { + err = hw->soc->bias_set_combo(hw, desc, pullup, arg); + if (err) + return err; } else { return -ENOTSUPP; } -- GitLab From 1d53864c3617f5235f891ca0fbe9347c4cd35d46 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Tue, 22 Dec 2020 15:29:04 +0800 Subject: [PATCH 0237/2012] scsi: ufs: Fix possible power drain during system suspend Currently if device needs to do flush or BKOP operations, the device VCC power is kept during runtime-suspend period. However, if system suspend is happening while device is runtime-suspended, such power may not be disabled successfully. The reasons may be, 1. If current PM level is the same as SPM level, device will keep runtime-suspended by ufshcd_system_suspend(). 2. Flush recheck work may not be scheduled successfully during system suspend period. If it can wake up the system, this is also not the intention of the recheck work. To fix this issue, simply runtime-resume the device if the flush is allowed during runtime suspend period. Flush capability will be disabled while leaving runtime suspend, and also not be allowed in system suspend period. Link: https://lore.kernel.org/r/20201222072905.32221-2-stanley.chu@mediatek.com Fixes: 51dd905bd2f6 ("scsi: ufs: Fix WriteBooster flush during runtime suspend") Reviewed-by: Chaotian Jing Reviewed-by: Can Guo Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 82ad31781bc9e..9db348650f17c 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8938,7 +8938,8 @@ int ufshcd_system_suspend(struct ufs_hba *hba) if ((ufs_get_pm_lvl_to_dev_pwr_mode(hba->spm_lvl) == hba->curr_dev_pwr_mode) && (ufs_get_pm_lvl_to_link_pwr_state(hba->spm_lvl) == - hba->uic_link_state)) + hba->uic_link_state) && + !hba->dev_info.b_rpm_dev_flush_capable) goto out; if (pm_runtime_suspended(hba->dev)) { -- GitLab From 21acf4601cc63cf564c6fc1a74d81b191313c929 Mon Sep 17 00:00:00 2001 From: Stanley Chu Date: Tue, 22 Dec 2020 15:29:05 +0800 Subject: [PATCH 0238/2012] scsi: ufs: Relax the condition of UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL is intended to skip enabling fWriteBoosterBufferFlushEn while WriteBooster is initializing. Therefore it is better to apply the checking during WriteBooster initialization only. Link: https://lore.kernel.org/r/20201222072905.32221-3-stanley.chu@mediatek.com Reviewed-by: Can Guo Signed-off-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 9db348650f17c..04ab6f2ccac6d 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -289,7 +289,8 @@ static inline void ufshcd_wb_config(struct ufs_hba *hba) if (ret) dev_err(hba->dev, "%s: En WB flush during H8: failed: %d\n", __func__, ret); - ufshcd_wb_toggle_flush(hba, true); + if (!(hba->quirks & UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL)) + ufshcd_wb_toggle_flush(hba, true); } static void ufshcd_scsi_unblock_requests(struct ufs_hba *hba) @@ -5436,9 +5437,6 @@ static int ufshcd_wb_toggle_flush_during_h8(struct ufs_hba *hba, bool set) static inline void ufshcd_wb_toggle_flush(struct ufs_hba *hba, bool enable) { - if (hba->quirks & UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL) - return; - if (enable) ufshcd_wb_buf_flush_enable(hba); else -- GitLab From 4ceb06e7c336f4a8d3f3b6ac9a4fea2e9c97dc07 Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Tue, 1 Dec 2020 14:03:29 +0800 Subject: [PATCH 0239/2012] drm/i915/gvt: Fix vfio_edid issue for BXT/APL BXT/APL has different isr/irr/hpd regs compared with other GEN9. If not setting these regs bits correctly according to the emulated monitor (currently a DP on PORT_B), although gvt still triggers a virtual HPD event, the guest driver won't detect a valid HPD pulse thus no full display detection will be executed to read the updated EDID. With this patch, the vfio_edid is enabled again on BXT/APL, which is previously disabled. Fixes: 642403e3599e ("drm/i915/gvt: Temporarily disable vfio_edid for BXT/APL") Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20201201060329.142375-1-colin.xu@intel.com Reviewed-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 81 +++++++++++++++++++++--------- drivers/gpu/drm/i915/gvt/vgpu.c | 5 +- 2 files changed, 59 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index a15f875396576..62a5b0dd2003b 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -217,6 +217,15 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) DDI_BUF_CTL_ENABLE); vgpu_vreg_t(vgpu, DDI_BUF_CTL(port)) |= DDI_BUF_IS_IDLE; } + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~(PORTA_HOTPLUG_ENABLE | PORTA_HOTPLUG_STATUS_MASK); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~(PORTB_HOTPLUG_ENABLE | PORTB_HOTPLUG_STATUS_MASK); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~(PORTC_HOTPLUG_ENABLE | PORTC_HOTPLUG_STATUS_MASK); + /* No hpd_invert set in vgpu vbt, need to clear invert mask */ + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= ~BXT_DDI_HPD_INVERT_MASK; + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~BXT_DE_PORT_HOTPLUG_MASK; vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) &= ~(BIT(0) | BIT(1)); vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) &= @@ -273,6 +282,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_EDP)) |= (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | TRANS_DDI_FUNC_ENABLE); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTA_HOTPLUG_ENABLE; vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); } @@ -301,6 +312,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_B << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTB_HOTPLUG_ENABLE; vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); } @@ -329,6 +342,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | (PORT_B << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTC_HOTPLUG_ENABLE; vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); } @@ -661,44 +676,62 @@ void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected) PORTD_HOTPLUG_STATUS_MASK; intel_vgpu_trigger_virtual_event(vgpu, DP_D_HOTPLUG); } else if (IS_BROXTON(i915)) { - if (connected) { - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { + if (connected) { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); + } else { + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= + ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { - vgpu_vreg_t(vgpu, SFUSE_STRAP) |= - SFUSE_STRAP_DDIB_DETECTED; + vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~PORTA_HOTPLUG_STATUS_MASK; + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTA_HOTPLUG_LONG_DETECT; + intel_vgpu_trigger_virtual_event(vgpu, DP_A_HOTPLUG); + } + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { + if (connected) { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); - } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { vgpu_vreg_t(vgpu, SFUSE_STRAP) |= - SFUSE_STRAP_DDIC_DETECTED; - vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= - GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); - } - } else { - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) { + SFUSE_STRAP_DDIB_DETECTED; + } else { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= - ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_A); - } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { + ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); vgpu_vreg_t(vgpu, SFUSE_STRAP) &= ~SFUSE_STRAP_DDIB_DETECTED; - vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= - ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { - vgpu_vreg_t(vgpu, SFUSE_STRAP) &= - ~SFUSE_STRAP_DDIC_DETECTED; + vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_B); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~PORTB_HOTPLUG_STATUS_MASK; + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTB_HOTPLUG_LONG_DETECT; + intel_vgpu_trigger_virtual_event(vgpu, DP_B_HOTPLUG); + } + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { + if (connected) { + vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); + vgpu_vreg_t(vgpu, SFUSE_STRAP) |= + SFUSE_STRAP_DDIC_DETECTED; + } else { vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); + vgpu_vreg_t(vgpu, SFUSE_STRAP) &= + ~SFUSE_STRAP_DDIC_DETECTED; } + vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |= + GEN8_DE_PORT_HOTPLUG(HPD_PORT_C); + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= + ~PORTC_HOTPLUG_STATUS_MASK; + vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= + PORTC_HOTPLUG_LONG_DETECT; + intel_vgpu_trigger_virtual_event(vgpu, DP_C_HOTPLUG); } - vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |= - PORTB_HOTPLUG_STATUS_MASK; - intel_vgpu_trigger_virtual_event(vgpu, DP_B_HOTPLUG); } } diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index e49944fde3339..cbe5931906e0a 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -437,10 +437,9 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_sched_policy; - if (IS_BROADWELL(dev_priv)) + if (IS_BROADWELL(dev_priv) || IS_BROXTON(dev_priv)) ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_B); - /* FixMe: Re-enable APL/BXT once vfio_edid enabled */ - else if (!IS_BROXTON(dev_priv)) + else ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D); if (ret) goto out_clean_sched_policy; -- GitLab From 6948a96a0d69b7e8203758f44849ce4ab06ff788 Mon Sep 17 00:00:00 2001 From: Kiwoong Kim Date: Sat, 19 Dec 2020 15:40:39 +0900 Subject: [PATCH 0240/2012] scsi: ufs: Relocate flush of exceptional event The current flush location does not guarantee disabling BKOPS for the case of requesting device power off. 1) The exceptional event handler is queued 2) ufs suspend starts with a request of device power off 3) BKOPS is disabled in ufs suspend 4) The queued work for the handler is done and BKOPS is re-enabled Relocate the flush statement to ensure BKOPS remain disabled. Link: https://lore.kernel.org/r/1608360039-16390-1-git-send-email-kwmad.kim@samsung.com Reviewed-by: Can Guo Reviewed-by: Stanley Chu Signed-off-by: Kiwoong Kim Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 04ab6f2ccac6d..00ee8cabf8e00 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8696,6 +8696,8 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) ufshcd_wb_need_flush(hba)); } + flush_work(&hba->eeh_work); + if (req_dev_pwr_mode != hba->curr_dev_pwr_mode) { if (!ufshcd_is_runtime_pm(pm_op)) /* ensure that bkops is disabled */ @@ -8708,8 +8710,6 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) } } - flush_work(&hba->eeh_work); - /* * In the case of DeepSleep, the device is expected to remain powered * with the link off, so do not check for bkops. -- GitLab From 35fc4cd34426c242ab015ef280853b7bff101f48 Mon Sep 17 00:00:00 2001 From: Can Guo Date: Mon, 28 Dec 2020 04:04:36 -0800 Subject: [PATCH 0241/2012] scsi: ufs: Correct the LUN used in eh_device_reset_handler() callback Users can initiate resets to specific SCSI device/target/host through IOCTL. When this happens, the SCSI cmd passed to eh_device/target/host _reset_handler() callbacks is initialized with a request whose tag is -1. In this case it is not right for eh_device_reset_handler() callback to count on the LUN get from hba->lrb[-1]. Fix it by getting LUN from the SCSI device associated with the SCSI cmd. Link: https://lore.kernel.org/r/1609157080-26283-1-git-send-email-cang@codeaurora.org Reviewed-by: Avri Altman Reviewed-by: Stanley Chu Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 00ee8cabf8e00..e31d2c5c7b23b 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -6659,19 +6659,16 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) { struct Scsi_Host *host; struct ufs_hba *hba; - unsigned int tag; u32 pos; int err; - u8 resp = 0xF; - struct ufshcd_lrb *lrbp; + u8 resp = 0xF, lun; unsigned long flags; host = cmd->device->host; hba = shost_priv(host); - tag = cmd->request->tag; - lrbp = &hba->lrb[tag]; - err = ufshcd_issue_tm_cmd(hba, lrbp->lun, 0, UFS_LOGICAL_RESET, &resp); + lun = ufshcd_scsi_to_upiu_lun(cmd->device->lun); + err = ufshcd_issue_tm_cmd(hba, lun, 0, UFS_LOGICAL_RESET, &resp); if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) { if (!err) err = resp; @@ -6680,7 +6677,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) /* clear the commands that were pending for corresponding LUN */ for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) { - if (hba->lrb[pos].lun == lrbp->lun) { + if (hba->lrb[pos].lun == lun) { err = ufshcd_clear_cmd(hba, pos); if (err) break; -- GitLab From d50c7986fbf0e2167279e110a2ed5bd8e811c660 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Thu, 17 Dec 2020 02:51:44 -0800 Subject: [PATCH 0242/2012] scsi: qedi: Correct max length of CHAP secret The CHAP secret displayed garbage characters causing iSCSI login authentication failure. Correct the CHAP password max length. Link: https://lore.kernel.org/r/20201217105144.8055-1-njavali@marvell.com Reviewed-by: Lee Duncan Signed-off-by: Nilesh Javali Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index f5fc7f518f8af..47ad64b066236 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -2245,7 +2245,7 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, chap_name); break; case ISCSI_BOOT_TGT_CHAP_SECRET: - rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, chap_secret); break; case ISCSI_BOOT_TGT_REV_CHAP_NAME: @@ -2253,7 +2253,7 @@ qedi_show_boot_tgt_info(struct qedi_ctx *qedi, int type, mchap_name); break; case ISCSI_BOOT_TGT_REV_CHAP_SECRET: - rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_NAME_MAX_LEN, + rc = sprintf(buf, "%.*s\n", NVM_ISCSI_CFG_CHAP_PWD_MAX_LEN, mchap_secret); break; case ISCSI_BOOT_TGT_FLAGS: -- GitLab From 39718fe7adb1a79f78be23f058299bc038cbe161 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 17 Dec 2020 17:20:19 +0000 Subject: [PATCH 0243/2012] scsi: mpt3sas: Fix spelling mistake in Kconfig "compatiblity" -> "compatibility" There is a spelling mistake in the Kconfig help text. Fix it. Link: https://lore.kernel.org/r/20201217172019.57768-1-colin.king@canonical.com Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/Kconfig b/drivers/scsi/mpt3sas/Kconfig index 86209455172d6..c299f7e078fb9 100644 --- a/drivers/scsi/mpt3sas/Kconfig +++ b/drivers/scsi/mpt3sas/Kconfig @@ -79,5 +79,5 @@ config SCSI_MPT2SAS select SCSI_MPT3SAS depends on PCI && SCSI help - Dummy config option for backwards compatiblity: configure the MPT3SAS + Dummy config option for backwards compatibility: configure the MPT3SAS driver instead. -- GitLab From 3b01d7ea4dae907d34fa0eeb3f17bacd714c6d0c Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 26 Dec 2020 14:15:03 +0800 Subject: [PATCH 0244/2012] scsi: scsi_debug: Fix memleak in scsi_debug_init() When sdeb_zbc_model does not match BLK_ZONED_NONE, BLK_ZONED_HA or BLK_ZONED_HM, we should free sdebug_q_arr to prevent memleak. Also there is no need to execute sdebug_erase_store() on failure of sdeb_zbc_model_str(). Link: https://lore.kernel.org/r/20201226061503.20050-1-dinghao.liu@zju.edu.cn Acked-by: Douglas Gilbert Signed-off-by: Dinghao Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 24c0f7ec03511..4a08c450b756f 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -6740,7 +6740,7 @@ static int __init scsi_debug_init(void) k = sdeb_zbc_model_str(sdeb_zbc_model_s); if (k < 0) { ret = k; - goto free_vm; + goto free_q_arr; } sdeb_zbc_model = k; switch (sdeb_zbc_model) { @@ -6753,7 +6753,8 @@ static int __init scsi_debug_init(void) break; default: pr_err("Invalid ZBC model\n"); - return -EINVAL; + ret = -EINVAL; + goto free_q_arr; } } if (sdeb_zbc_model != BLK_ZONED_NONE) { -- GitLab From e5cc9002caafacbaa8dab878d17a313192c3b03b Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Mon, 7 Dec 2020 17:10:21 -0500 Subject: [PATCH 0245/2012] scsi: sd: Suppress spurious errors when WRITE SAME is being disabled The block layer code will split a large zeroout request into multiple bios and if WRITE SAME is disabled because the storage device reports that it does not support it (or support the length used), we can get an error message from the block layer despite the setting of RQF_QUIET on the first request. This is because more than one request may have already been submitted. Fix this by setting RQF_QUIET when BLK_STS_TARGET is returned to fail the request early, we don't need to log a message because we did not actually submit the command to the device, and the block layer code will handle the error by submitting individual write bios. Link: https://lore.kernel.org/r/20201207221021.28243-1-emilne@redhat.com Reviewed-by: Christoph Hellwig Signed-off-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 679c2c0250476..b766ad54e4a54 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -984,8 +984,10 @@ static blk_status_t sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd) } } - if (sdp->no_write_same) + if (sdp->no_write_same) { + rq->rq_flags |= RQF_QUIET; return BLK_STS_TARGET; + } if (sdkp->ws16 || lba > 0xffffffff || nr_blocks > 0xffff) return sd_setup_write_same16_cmnd(cmd, false); -- GitLab From be2553358cd40c0db11d1aa96f819c07413b2aae Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 14 Dec 2020 10:54:24 +0100 Subject: [PATCH 0246/2012] scsi: sd: Remove obsolete variable in sd_remove() Commit 996e509bbc95 ("sd: use __register_blkdev to avoid a modprobe for an unregistered dev_t") removed blk_register_region(devt, ...) in sd_remove() and since then, devt is unused in sd_remove(). Hence, make W=1 warns: drivers/scsi/sd.c:3516:8: warning: variable 'devt' set but not used [-Wunused-but-set-variable] Simply remove this obsolete variable. [mkp: fixed commit sha] Link: https://lore.kernel.org/r/20201214095424.12479-1-lukas.bulwahn@gmail.com Reviewed-by: Christoph Hellwig Reviewed-by: Nathan Chancellor Acked-by: Martin K. Petersen Signed-off-by: Lukas Bulwahn Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b766ad54e4a54..a3d2d4bc4a3dc 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3512,10 +3512,8 @@ static int sd_probe(struct device *dev) static int sd_remove(struct device *dev) { struct scsi_disk *sdkp; - dev_t devt; sdkp = dev_get_drvdata(dev); - devt = disk_devt(sdkp->disk); scsi_autopm_get_device(sdkp->device); async_synchronize_full_domain(&scsi_sd_pm_domain); -- GitLab From 19fce0470f05031e6af36e49ce222d0f0050d432 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 1 Dec 2020 17:52:43 -0800 Subject: [PATCH 0247/2012] nvme-fc: avoid calling _nvme_fc_abort_outstanding_ios from interrupt context Recent patches changed calling sequences. nvme_fc_abort_outstanding_ios used to be called from a timeout or work context. Now it is being called in an io completion context, which can be an interrupt handler. Unfortunately, the abort outstanding ios routine attempts to stop nvme queues and nested routines that may try to sleep, which is in conflict with the interrupt handler. Correct replacing the direct call with a work element scheduling, and the abort outstanding ios routine will be called in the work element. Fixes: 95ced8a2c72d ("nvme-fc: eliminate terminate_io use by nvme_fc_error_recovery") Signed-off-by: James Smart Reported-by: Daniel Wagner Tested-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 38373a0e86efb..5f36cfa8136c0 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -166,6 +166,7 @@ struct nvme_fc_ctrl { struct blk_mq_tag_set admin_tag_set; struct blk_mq_tag_set tag_set; + struct work_struct ioerr_work; struct delayed_work connect_work; struct kref ref; @@ -1888,6 +1889,15 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, } } +static void +nvme_fc_ctrl_ioerr_work(struct work_struct *work) +{ + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, ioerr_work); + + nvme_fc_error_recovery(ctrl, "transport detected io error"); +} + static void nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) { @@ -2046,7 +2056,7 @@ done: check_error: if (terminate_assoc) - nvme_fc_error_recovery(ctrl, "transport detected io error"); + queue_work(nvme_reset_wq, &ctrl->ioerr_work); } static int @@ -3233,6 +3243,7 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + cancel_work_sync(&ctrl->ioerr_work); cancel_delayed_work_sync(&ctrl->connect_work); /* * kill the association on the link side. this will block @@ -3449,6 +3460,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); + INIT_WORK(&ctrl->ioerr_work, nvme_fc_ctrl_ioerr_work); spin_lock_init(&ctrl->lock); /* io queue count */ @@ -3540,6 +3552,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, fail_ctrl: nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); + cancel_work_sync(&ctrl->ioerr_work); cancel_work_sync(&ctrl->ctrl.reset_work); cancel_delayed_work_sync(&ctrl->connect_work); -- GitLab From 2b54996b7d56badc563755840838614f2fa9c4de Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 7 Dec 2020 12:29:40 -0800 Subject: [PATCH 0248/2012] nvme-fcloop: Fix sscanf type and list_first_entry_or_null warnings Kernel robot had the following warnings: >> fcloop.c:1506:6: warning: %x in format string (no. 1) requires >> 'unsigned int *' but the argument type is 'signed int *'. >> [invalidScanfArgType_int] >> if (sscanf(buf, "%x:%d:%d", &opcode, &starting, &amount) != 3) >> ^ Resolve by changing opcode from and int to an unsigned int and >> fcloop.c:1632:32: warning: Uninitialized variable: lport [uninitvar] >> ret = __wait_localport_unreg(lport); >> ^ >> fcloop.c:1615:28: warning: Uninitialized variable: nport [uninitvar] >> ret = __remoteport_unreg(nport, rport); >> ^ These aren't actual issues as the values are assigned prior to use. It appears the tool doesn't understand list_first_entry_or_null(). Regardless, quiet the tool by initializing the pointers to NULL at declaration. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 733d9363900e4..68213f0a052bb 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -1501,7 +1501,8 @@ static ssize_t fcloop_set_cmd_drop(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - int opcode, starting, amount; + unsigned int opcode; + int starting, amount; if (sscanf(buf, "%x:%d:%d", &opcode, &starting, &amount) != 3) return -EBADRQC; @@ -1588,8 +1589,8 @@ out_destroy_class: static void __exit fcloop_exit(void) { - struct fcloop_lport *lport; - struct fcloop_nport *nport; + struct fcloop_lport *lport = NULL; + struct fcloop_nport *nport = NULL; struct fcloop_tport *tport; struct fcloop_rport *rport; unsigned long flags; -- GitLab From 7ee5c78ca3895d44e918c38332921983ed678be0 Mon Sep 17 00:00:00 2001 From: Gopal Tiwari Date: Fri, 4 Dec 2020 21:46:57 +0530 Subject: [PATCH 0249/2012] nvme-pci: mark Samsung PM1725a as IGNORE_DEV_SUBNQN A system with more than one of these SSDs will only have one usable. Hence the kernel fails to detect nvme devices due to duplicate cntlids. [ 6.274554] nvme nvme1: Duplicate cntlid 33 with nvme0, rejecting [ 6.274566] nvme nvme1: Removing after probe failure status: -22 Adding the NVME_QUIRK_IGNORE_DEV_SUBNQN quirk to resolves the issue. Signed-off-by: Gopal Tiwari Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b4385cb0ff609..553871e6962b8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3196,7 +3196,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ - .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ -- GitLab From 5c11f7d9f843bdd24cd29b95401938bc3f168070 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 21 Dec 2020 00:03:39 -0800 Subject: [PATCH 0250/2012] nvme-tcp: Fix possible race of io_work and direct send We may send a request (with or without its data) from two paths: 1. From our I/O context nvme_tcp_io_work which is triggered from: - queue_rq - r2t reception - socket data_ready and write_space callbacks 2. Directly from queue_rq if the send_list is empty (because we want to save the context switch associated with scheduling our io_work). However, given that now we have the send_mutex, we may run into a race condition where none of these contexts will send the pending payload to the controller. Both io_work send path and queue_rq send path opportunistically attempt to acquire the send_mutex however queue_rq only attempts to send a single request, and if io_work context fails to acquire the send_mutex it will complete without rescheduling itself. The race can trigger with the following sequence: 1. queue_rq sends request (no incapsule data) and blocks 2. RX path receives r2t - prepares data PDU to send, adds h2cdata PDU to the send_list and schedules io_work 3. io_work triggers and cannot acquire the send_mutex - because of (1), ends without self rescheduling 4. queue_rq completes the send, and completes ==> no context will send the h2cdata - timeout. Fix this by having queue_rq sending as much as it can from the send_list such that if it still has any left, its because the socket buffer is full and the socket write_space callback will trigger, thus guaranteeing that a context will be scheduled to send the h2cdata PDU. Fixes: db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq context") Reported-by: Potnuri Bharat Teja Reported-by: Samuel Jones Signed-off-by: Sagi Grimberg Tested-by: Potnuri Bharat Teja Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1ba6599274427..979ee31b8dd1c 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -262,6 +262,16 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req, } } +static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) +{ + int ret; + + /* drain the send queue as much as we can... */ + do { + ret = nvme_tcp_try_send(queue); + } while (ret > 0); +} + static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, bool sync, bool last) { @@ -279,7 +289,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, if (queue->io_cpu == smp_processor_id() && sync && empty && mutex_trylock(&queue->send_mutex)) { queue->more_requests = !last; - nvme_tcp_try_send(queue); + nvme_tcp_send_all(queue); queue->more_requests = false; mutex_unlock(&queue->send_mutex); } else if (last) { -- GitLab From 62df80165d7f197c9c0652e7416164f294a96661 Mon Sep 17 00:00:00 2001 From: Lalithambika Krishnakumar Date: Wed, 23 Dec 2020 14:09:00 -0800 Subject: [PATCH 0251/2012] nvme: avoid possible double fetch in handling CQE While handling the completion queue, keep a local copy of the command id from the DMA-accessible completion entry. This silences a time-of-check to time-of-use (TOCTOU) warning from KF/x[1], with respect to a Thunderclap[2] vulnerability analysis. The double-read impact appears benign. There may be a theoretical window for @command_id to be used as an adversary-controlled array-index-value for mounting a speculative execution attack, but that mitigation is saved for a potential follow-on. A man-in-the-middle attack on the data payload is out of scope for this analysis and is hopefully mitigated by filesystem integrity mechanisms. [1] https://github.com/intel/kernel-fuzzer-for-xen-project [2] http://thunderclap.io/thunderclap-paper-ndss2019.pdf Signed-off-by: Lalithambika Krishna Kumar Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 553871e6962b8..50d9a20568a28 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -967,6 +967,7 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) { struct nvme_completion *cqe = &nvmeq->cqes[idx]; + __u16 command_id = READ_ONCE(cqe->command_id); struct request *req; /* @@ -975,17 +976,17 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvme_is_aen_req(nvmeq->qid, cqe->command_id))) { + if (unlikely(nvme_is_aen_req(nvmeq->qid, command_id))) { nvme_complete_async_event(&nvmeq->dev->ctrl, cqe->status, &cqe->result); return; } - req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id); + req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), command_id); if (unlikely(!req)) { dev_warn(nvmeq->dev->ctrl.device, "invalid id %d completed on queue %d\n", - cqe->command_id, le16_to_cpu(cqe->sq_id)); + command_id, le16_to_cpu(cqe->sq_id)); return; } -- GitLab From 9b66fc02bec0ca613bc6d4c1d0049f727a95567d Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 30 Dec 2020 20:22:44 +0900 Subject: [PATCH 0252/2012] nvme: unexport functions with no external caller There are no callers for nvme_reset_ctrl_sync() and nvme_alloc_request_qid() so that we keep the symbols exported. Unexport those functions, mark them static and update the header file respectively. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 6 ++---- drivers/nvme/host/nvme.h | 3 --- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ce1b615194413..70a63d7c1d028 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -179,7 +179,7 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_reset_ctrl); -int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) +static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) { int ret; @@ -192,7 +192,6 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) return ret; } -EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync); static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl) { @@ -578,7 +577,7 @@ struct request *nvme_alloc_request(struct request_queue *q, } EXPORT_SYMBOL_GPL(nvme_alloc_request); -struct request *nvme_alloc_request_qid(struct request_queue *q, +static struct request *nvme_alloc_request_qid(struct request_queue *q, struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) { struct request *req; @@ -589,7 +588,6 @@ struct request *nvme_alloc_request_qid(struct request_queue *q, nvme_init_request(req, cmd); return req; } -EXPORT_SYMBOL_GPL(nvme_alloc_request_qid); static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7e49f61f81df8..9c4fbfe44c006 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -610,8 +610,6 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl); #define NVME_QID_ANY -1 struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, blk_mq_req_flags_t flags); -struct request *nvme_alloc_request_qid(struct request_queue *q, - struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid); void nvme_cleanup_cmd(struct request *req); blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, struct nvme_command *cmd); @@ -630,7 +628,6 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid, int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); -int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_try_sched_reset(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); -- GitLab From 9ceb7863537748c67fa43ac4f2f565819bbd36e4 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 5 Jan 2021 10:46:54 +0200 Subject: [PATCH 0253/2012] nvmet-rdma: Fix list_del corruption on queue establishment failure When a queue is in NVMET_RDMA_Q_CONNECTING state, it may has some requests at rsp_wait_list. In case a disconnect occurs at this state, no one will empty this list and will return the requests to free_rsps list. Normally nvmet_rdma_queue_established() free those requests after moving the queue to NVMET_RDMA_Q_LIVE state, but in this case __nvmet_rdma_queue_disconnect() is called before. The crash happens at nvmet_rdma_free_rsps() when calling list_del(&rsp->free_list), because the request exists only at the wait list. To fix the issue, simply clear rsp_wait_list when destroying the queue. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 5c1e7cb7fe0de..bdfc22eb2a10f 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1641,6 +1641,16 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) spin_lock_irqsave(&queue->state_lock, flags); switch (queue->state) { case NVMET_RDMA_Q_CONNECTING: + while (!list_empty(&queue->rsp_wait_list)) { + struct nvmet_rdma_rsp *rsp; + + rsp = list_first_entry(&queue->rsp_wait_list, + struct nvmet_rdma_rsp, + wait_list); + list_del(&rsp->wait_list); + nvmet_rdma_put_rsp(rsp); + } + fallthrough; case NVMET_RDMA_Q_LIVE: queue->state = NVMET_RDMA_Q_DISCONNECTING; disconnect = true; -- GitLab From 2b59787a223b79228fed9ade1bf6936194ddb8cd Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 5 Jan 2021 10:34:02 +0000 Subject: [PATCH 0254/2012] nvme: remove the unused status argument from nvme_trace_bio_complete The only used argument in this function is the "req". Signed-off-by: Max Gurtovoy Reviewed-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/nvme.h | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 70a63d7c1d028..f320273fc6727 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -330,7 +330,7 @@ static inline void nvme_end_req(struct request *req) req->__sector = nvme_lba_to_sect(req->q->queuedata, le64_to_cpu(nvme_req(req)->result.u64)); - nvme_trace_bio_complete(req, status); + nvme_trace_bio_complete(req); blk_mq_end_request(req, status); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9c4fbfe44c006..88a6b97247f50 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -672,8 +672,7 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) kblockd_schedule_work(&head->requeue_work); } -static inline void nvme_trace_bio_complete(struct request *req, - blk_status_t status) +static inline void nvme_trace_bio_complete(struct request *req) { struct nvme_ns *ns = req->q->queuedata; @@ -728,8 +727,7 @@ static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) { } -static inline void nvme_trace_bio_complete(struct request *req, - blk_status_t status) +static inline void nvme_trace_bio_complete(struct request *req) { } static inline int nvme_mpath_init(struct nvme_ctrl *ctrl, -- GitLab From 3ce47d95b7346dcafd9bed3556a8d072cb2b8571 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 4 Jan 2021 13:59:53 -0700 Subject: [PATCH 0255/2012] powerpc: Handle .text.{hot,unlikely}.* in linker script Commit eff8728fe698 ("vmlinux.lds.h: Add PGO and AutoFDO input sections") added ".text.unlikely.*" and ".text.hot.*" due to an LLVM change [1]. After another LLVM change [2], these sections are seen in some PowerPC builds, where there is a orphan section warning then build failure: $ make -skj"$(nproc)" \ ARCH=powerpc CROSS_COMPILE=powerpc64le-linux-gnu- LLVM=1 O=out \ distclean powernv_defconfig zImage.epapr ld.lld: warning: kernel/built-in.a(panic.o):(.text.unlikely.) is being placed in '.text.unlikely.' ... ld.lld: warning: address (0xc000000000009314) of section .text is not a multiple of alignment (256) ... ERROR: start_text address is c000000000009400, should be c000000000008000 ERROR: try to enable LD_HEAD_STUB_CATCH config option ERROR: see comments in arch/powerpc/tools/head_check.sh ... Explicitly handle these sections like in the main linker script so there is no more build failure. [1]: https://reviews.llvm.org/D79600 [2]: https://reviews.llvm.org/D92493 Fixes: 83a092cf95f2 ("powerpc: Link warning for orphan sections") Cc: stable@vger.kernel.org Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://github.com/ClangBuiltLinux/linux/issues/1218 Link: https://lore.kernel.org/r/20210104205952.1399409-1-natechancellor@gmail.com --- arch/powerpc/kernel/vmlinux.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 0318ba436f34e..8e0b1298bf19b 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -85,7 +85,7 @@ SECTIONS ALIGN_FUNCTION(); #endif /* careful! __ftr_alt_* sections need to be close to .text */ - *(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text); + *(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text); #ifdef CONFIG_PPC64 *(.tramp.ftrace.text); #endif -- GitLab From ad0a6bad44758afa3b440c254a24999a0c7e35d5 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Tue, 5 Jan 2021 17:50:43 +0000 Subject: [PATCH 0256/2012] x86/hyperv: check cpu mask after interrupt has been disabled We've observed crashes due to an empty cpu mask in hyperv_flush_tlb_others. Obviously the cpu mask in question is changed between the cpumask_empty call at the beginning of the function and when it is actually used later. One theory is that an interrupt comes in between and a code path ends up changing the mask. Move the check after interrupt has been disabled to see if it fixes the issue. Signed-off-by: Wei Liu Cc: stable@kernel.org Link: https://lore.kernel.org/r/20210105175043.28325-1-wei.liu@kernel.org Reviewed-by: Michael Kelley --- arch/x86/hyperv/mmu.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 5208ba49c89a9..2c87350c1fb09 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -66,11 +66,17 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, if (!hv_hypercall_pg) goto do_native; - if (cpumask_empty(cpus)) - return; - local_irq_save(flags); + /* + * Only check the mask _after_ interrupt has been disabled to avoid the + * mask changing under our feet. + */ + if (cpumask_empty(cpus)) { + local_irq_restore(flags); + return; + } + flush_pcpu = (struct hv_tlb_flush **) this_cpu_ptr(hyperv_pcpu_input_arg); -- GitLab From cb7f4a8b1fb426a175d1708f05581939c61329d4 Mon Sep 17 00:00:00 2001 From: Ying-Tsun Huang Date: Tue, 15 Dec 2020 15:07:20 +0800 Subject: [PATCH 0257/2012] x86/mtrr: Correct the range check before performing MTRR type lookups In mtrr_type_lookup(), if the input memory address region is not in the MTRR, over 4GB, and not over the top of memory, a write-back attribute is returned. These condition checks are for ensuring the input memory address region is actually mapped to the physical memory. However, if the end address is just aligned with the top of memory, the condition check treats the address is over the top of memory, and write-back attribute is not returned. And this hits in a real use case with NVDIMM: the nd_pmem module tries to map NVDIMMs as cacheable memories when NVDIMMs are connected. If a NVDIMM is the last of the DIMMs, the performance of this NVDIMM becomes very low since it is aligned with the top of memory and its memory type is uncached-minus. Move the input end address change to inclusive up into mtrr_type_lookup(), before checking for the top of memory in either mtrr_type_lookup_{variable,fixed}() helpers. [ bp: Massage commit message. ] Fixes: 0cc705f56e40 ("x86/mm/mtrr: Clean up mtrr_type_lookup()") Signed-off-by: Ying-Tsun Huang Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20201215070721.4349-1-ying-tsun.huang@amd.com --- arch/x86/kernel/cpu/mtrr/generic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 23ad8e953dfb1..a29997e6cf9e6 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -167,9 +167,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, *repeat = 0; *uniform = 1; - /* Make end inclusive instead of exclusive */ - end--; - prev_match = MTRR_TYPE_INVALID; for (i = 0; i < num_var_ranges; ++i) { unsigned short start_state, end_state, inclusive; @@ -261,6 +258,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform) int repeat; u64 partial_end; + /* Make end inclusive instead of exclusive */ + end--; + if (!mtrr_state_set) return MTRR_TYPE_INVALID; -- GitLab From 3e2224c5867fead6c0b94b84727cc676ac6353a3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 6 Jan 2021 16:09:26 +0000 Subject: [PATCH 0258/2012] io_uring: Fix return value from alloc_fixed_file_ref_node alloc_fixed_file_ref_node() currently returns an ERR_PTR on failure. io_sqe_files_unregister() expects it to return NULL and since it can only return -ENOMEM, it makes more sense to change alloc_fixed_file_ref_node() to behave that way. Fixes: 1ffc54220c44 ("io_uring: fix io_sqe_files_unregister() hangs") Reported-by: Dan Carpenter Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Jens Axboe --- fs/io_uring.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index dc92ca5090a39..27a8c226abf8c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7696,12 +7696,12 @@ static struct fixed_file_ref_node *alloc_fixed_file_ref_node( ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); if (!ref_node) - return ERR_PTR(-ENOMEM); + return NULL; if (percpu_ref_init(&ref_node->refs, io_file_data_ref_zero, 0, GFP_KERNEL)) { kfree(ref_node); - return ERR_PTR(-ENOMEM); + return NULL; } INIT_LIST_HEAD(&ref_node->node); INIT_LIST_HEAD(&ref_node->file_list); @@ -7795,9 +7795,9 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, } ref_node = alloc_fixed_file_ref_node(ctx); - if (IS_ERR(ref_node)) { + if (!ref_node) { io_sqe_files_unregister(ctx); - return PTR_ERR(ref_node); + return -ENOMEM; } io_sqe_files_set_node(file_data, ref_node); @@ -7897,8 +7897,8 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, return -EINVAL; ref_node = alloc_fixed_file_ref_node(ctx); - if (IS_ERR(ref_node)) - return PTR_ERR(ref_node); + if (!ref_node) + return -ENOMEM; done = 0; fds = u64_to_user_ptr(up->fds); -- GitLab From 00b8c557d096f0930d5c07df618223d3d06902d6 Mon Sep 17 00:00:00 2001 From: Matthias Maennich Date: Wed, 6 Jan 2021 15:52:01 +0000 Subject: [PATCH 0259/2012] staging: ION: remove some references to CONFIG_ION With commit e722a295cf49 ("staging: ion: remove from the tree"), ION and its corresponding config CONFIG_ION is gone. Remove stale references from drivers/staging/media/atomisp/pci and from the recommended Android kernel config. Fixes: e722a295cf49 ("staging: ion: remove from the tree") Cc: Hridya Valsaraju Cc: Rob Herring Cc: linux-media@vger.kernel.org Cc: devel@driverdev.osuosl.org Signed-off-by: Matthias Maennich Link: https://lore.kernel.org/r/20210106155201.2845319-1-maennich@google.com Signed-off-by: Greg Kroah-Hartman --- .../media/atomisp/pci/atomisp_subdev.c | 20 ------------------- kernel/configs/android-recommended.config | 1 - 2 files changed, 21 deletions(-) diff --git a/drivers/staging/media/atomisp/pci/atomisp_subdev.c b/drivers/staging/media/atomisp/pci/atomisp_subdev.c index 52b9fb18c87f0..b666cb23e5ca1 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_subdev.c +++ b/drivers/staging/media/atomisp/pci/atomisp_subdev.c @@ -1062,26 +1062,6 @@ static const struct v4l2_ctrl_config ctrl_select_isp_version = { .def = 0, }; -#if 0 /* #ifdef CONFIG_ION */ -/* - * Control for ISP ion device fd - * - * userspace will open ion device and pass the fd to kernel. - * this fd will be used to map shared fd to buffer. - */ -/* V4L2_CID_ATOMISP_ION_DEVICE_FD is not defined */ -static const struct v4l2_ctrl_config ctrl_ion_dev_fd = { - .ops = &ctrl_ops, - .id = V4L2_CID_ATOMISP_ION_DEVICE_FD, - .type = V4L2_CTRL_TYPE_INTEGER, - .name = "Ion Device Fd", - .min = -1, - .max = 1024, - .step = 1, - .def = ION_FD_UNSET -}; -#endif - static void atomisp_init_subdev_pipe(struct atomisp_sub_device *asd, struct atomisp_video_pipe *pipe, enum v4l2_buf_type buf_type) { diff --git a/kernel/configs/android-recommended.config b/kernel/configs/android-recommended.config index 53d688bdd894c..eb0029c9a6a63 100644 --- a/kernel/configs/android-recommended.config +++ b/kernel/configs/android-recommended.config @@ -81,7 +81,6 @@ CONFIG_INPUT_JOYSTICK=y CONFIG_INPUT_MISC=y CONFIG_INPUT_TABLET=y CONFIG_INPUT_UINPUT=y -CONFIG_ION=y CONFIG_JOYSTICK_XPAD=y CONFIG_JOYSTICK_XPAD_FF=y CONFIG_JOYSTICK_XPAD_LEDS=y -- GitLab From 3d1a90ab0ed93362ec8ac85cf291243c87260c21 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Fri, 11 Dec 2020 05:12:51 -0500 Subject: [PATCH 0260/2012] NFS4: Fix use-after-free in trace_event_raw_event_nfs4_set_lock It is only safe to call the tracepoint before rpc_put_task() because 'data' is freed inside nfs4_lock_release (rpc_release). Fixes: 48c9579a1afe ("Adding stateid information to tracepoints") Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0ce04e0e5d829..14acd2f791072 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7111,9 +7111,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f data->arg.new_lock_owner, ret); } else data->cancelled = true; + trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); rpc_put_task(task); dprintk("%s: done, ret = %d!\n", __func__, ret); - trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); return ret; } -- GitLab From 0e61f09af48beb41be0954e7be7d3ba2d18c9946 Mon Sep 17 00:00:00 2001 From: Xiaojian Du Date: Mon, 14 Dec 2020 17:05:55 +0800 Subject: [PATCH 0261/2012] drm/amd/pm: correct the sensor value of power for vangogh This patch is to correct the sensor value of power for vangogh. Signed-off-by: Xiaojian Du Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 8cb4fcee9a2c3..5c1482d4ca43e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -252,7 +252,8 @@ static int vangogh_get_smu_metrics_data(struct smu_context *smu, *value = metrics->UvdActivity; break; case METRICS_AVERAGE_SOCKETPOWER: - *value = metrics->CurrentSocketPower; + *value = (metrics->CurrentSocketPower << 8) / + 1000 ; break; case METRICS_TEMPERATURE_EDGE: *value = metrics->GfxTemperature / 100 * -- GitLab From 37030aba0f362cf8b16eb2347c7430b2e9ef719e Mon Sep 17 00:00:00 2001 From: Xiaojian Du Date: Fri, 18 Dec 2020 14:32:02 +0800 Subject: [PATCH 0262/2012] drm/amd/pm: improve the fine grain tuning function for RV/RV2/PCO This patch is to improve the fine grain tuning function for RV/RV2/PCO. This patch adds two new commands: "restore" and "commit". This function uses the pp_od_clk_voltage sysfs file to configure the min and max value of gfx clock frequency manually or restore the default value. Command guide: echo "s level value" > pp_od_clk_voltage "s" - set the sclk frequency "level" - 0 or 1, "0" represents the min value, "1" represents the max value "value" - the target value of sclk frequency, it should be limited in the safe range echo "r" > pp_od_clk_voltage "r" - reset the sclk frequency, restore the default value instantly echo "c" > pp_od_clk_voltage "c" - commit the min and max value of sclk frequency to the system only after the commit command, the target values set by "s" command will take effect. Example: 1)change power profile from "auto" to "manual" $ cat power_dpm_force_performance_level auto $ echo "manual" > power_dpm_force_performance_level $ cat power_dpm_force_performance_level manual 2)check the default sclk frequency $ cat pp_od_clk_voltage OD_SCLK: 0: 200Mhz 1: 1400Mhz OD_RANGE: SCLK: 200MHz 1400MHz 3)use "s" -- set command to configure the min and max sclk frequency $ echo "s 0 600" > pp_od_clk_voltage $ echo "s 1 1000" > pp_od_clk_voltage $ echo "c" > pp_od_clk_voltage $ cat pp_od_clk_voltage OD_SCLK: 0: 600Mhz 1: 1000Mhz OD_RANGE: SCLK: 200MHz 1400MHz 4)use "r" -- reset command to restore the min or max sclk frequency $ echo "r" > pp_od_clk_voltage $ cat pp_od_clk_voltage OD_SCLK: 0: 200Mhz 1: 1400Mhz OD_RANGE: SCLK: 200MHz 1400MHz Signed-off-by: Xiaojian Du Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- .../drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c | 114 +++++++++++++++--- .../drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h | 1 + 2 files changed, 98 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index e57e64bbacdc2..0cf899566e31c 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -251,7 +251,7 @@ static int smu10_set_hard_min_gfxclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t cl smu10_data->gfx_actual_soft_min_freq = clock; smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, - smu10_data->gfx_actual_soft_min_freq, + clock, NULL); } return 0; @@ -948,6 +948,8 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, struct smu10_voltage_dependency_table *mclk_table = data->clock_vol_info.vdd_dep_on_fclk; uint32_t i, now, size = 0; + uint32_t min_freq, max_freq = 0; + uint32_t ret = 0; switch (type) { case PP_SCLK: @@ -983,18 +985,28 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, break; case OD_SCLK: if (hwmgr->od_enabled) { - size = sprintf(buf, "%s:\n", "OD_SCLK"); + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + if (ret) + return ret; + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + if (ret) + return ret; + size = sprintf(buf, "%s:\n", "OD_SCLK"); size += sprintf(buf + size, "0: %10uMhz\n", - (data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : data->gfx_min_freq_limit/100); - size += sprintf(buf + size, "1: %10uMhz\n", data->gfx_max_freq_limit/100); + (data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : min_freq); + size += sprintf(buf + size, "1: %10uMhz\n", + (data->gfx_actual_soft_max_freq > 0) ? data->gfx_actual_soft_max_freq : max_freq); } break; case OD_RANGE: if (hwmgr->od_enabled) { - uint32_t min_freq, max_freq = 0; - smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); - smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + if (ret) + return ret; + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + if (ret) + return ret; size = sprintf(buf, "%s:\n", "OD_RANGE"); size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n", @@ -1414,23 +1426,91 @@ static int smu10_set_fine_grain_clk_vol(struct pp_hwmgr *hwmgr, enum PP_OD_DPM_TABLE_COMMAND type, long *input, uint32_t size) { + uint32_t min_freq, max_freq = 0; + struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + int ret = 0; + if (!hwmgr->od_enabled) { pr_err("Fine grain not support\n"); return -EINVAL; } - if (size != 2) { - pr_err("Input parameter number not correct\n"); - return -EINVAL; - } - if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { - if (input[0] == 0) - smu10_set_hard_min_gfxclk_by_freq(hwmgr, input[1]); - else if (input[0] == 1) - smu10_set_soft_max_gfxclk_by_freq(hwmgr, input[1]); - else + if (size != 2) { + pr_err("Input parameter number not correct\n"); + return -EINVAL; + } + + if (input[0] == 0) { + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + if (input[1] < min_freq) { + pr_err("Fine grain setting minimum sclk (%ld) MHz is less than the minimum allowed (%d) MHz\n", + input[1], min_freq); + return -EINVAL; + } + smu10_data->gfx_actual_soft_min_freq = input[1]; + } else if (input[0] == 1) { + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + if (input[1] > max_freq) { + pr_err("Fine grain setting maximum sclk (%ld) MHz is greater than the maximum allowed (%d) MHz\n", + input[1], max_freq); + return -EINVAL; + } + smu10_data->gfx_actual_soft_max_freq = input[1]; + } else { + return -EINVAL; + } + } else if (type == PP_OD_RESTORE_DEFAULT_TABLE) { + if (size != 0) { + pr_err("Input parameter number not correct\n"); + return -EINVAL; + } + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + + smu10_data->gfx_actual_soft_min_freq = min_freq; + smu10_data->gfx_actual_soft_max_freq = max_freq; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + min_freq, + NULL); + if (ret) + return ret; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + max_freq, + NULL); + if (ret) + return ret; + } else if (type == PP_OD_COMMIT_DPM_TABLE) { + if (size != 0) { + pr_err("Input parameter number not correct\n"); + return -EINVAL; + } + + if (smu10_data->gfx_actual_soft_min_freq > smu10_data->gfx_actual_soft_max_freq) { + pr_err("The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n", + smu10_data->gfx_actual_soft_min_freq, smu10_data->gfx_actual_soft_max_freq); return -EINVAL; + } + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + smu10_data->gfx_actual_soft_min_freq, + NULL); + if (ret) + return ret; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + smu10_data->gfx_actual_soft_max_freq, + NULL); + if (ret) + return ret; + } else { + return -EINVAL; } return 0; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h index 6c9b5f060902b..28d86d354d50b 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h @@ -283,6 +283,7 @@ struct smu10_hwmgr { uint32_t vclk_soft_min; uint32_t dclk_soft_min; uint32_t gfx_actual_soft_min_freq; + uint32_t gfx_actual_soft_max_freq; uint32_t gfx_min_freq_limit; uint32_t gfx_max_freq_limit; /* in 10Khz*/ -- GitLab From fc996f952df1c63b57e3a08ac612db53bf8abadc Mon Sep 17 00:00:00 2001 From: John Clements Date: Fri, 25 Dec 2020 12:22:51 +0800 Subject: [PATCH 0263/2012] drm/amd/pm: updated PM to I2C controller port on sienna cichlid sienna cichlid interfaces with RAS eeprom on I2C controller port 1 Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 9608745d732fb..12b36eb0ff6a5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2372,7 +2372,7 @@ static void sienna_cichlid_fill_i2c_req(SwI2cRequest_t *req, bool write, { int i; - req->I2CcontrollerPort = 0; + req->I2CcontrollerPort = 1; req->I2CSpeed = 2; req->SlaveAddress = address; req->NumCmds = numbytes; -- GitLab From a7b5d9dd57298333e6e9f4c167f01385d922bbfb Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Tue, 29 Dec 2020 14:10:28 +0800 Subject: [PATCH 0264/2012] drm/amd/display: fix sysfs amdgpu_current_backlight_pwm NULL pointer issue fix NULL pointer issue when read sysfs amdgpu_current_backlight_pwm sysfs node. Call Trace: [ 248.273833] BUG: kernel NULL pointer dereference, address: 0000000000000130 [ 248.273930] #PF: supervisor read access in kernel mode [ 248.273993] #PF: error_code(0x0000) - not-present page [ 248.274054] PGD 0 P4D 0 [ 248.274092] Oops: 0000 [#1] SMP PTI [ 248.274138] CPU: 2 PID: 1377 Comm: cat Tainted: G OE 5.9.0-rc5-drm-next-5.9+ #1 [ 248.274233] Hardware name: System manufacturer System Product Name/Z170-A, BIOS 3802 03/15/2018 [ 248.274641] RIP: 0010:dc_link_get_backlight_level+0x5/0x70 [amdgpu] [ 248.274718] Code: 67 ff ff ff 41 b9 03 00 00 00 e9 45 ff ff ff d1 ea e9 55 ff ff ff 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <48> 8b 87 30 01 00 00 48 8b 00 48 8b 88 88 03 00 00 48 8d 81 e8 01 [ 248.274919] RSP: 0018:ffffb5ad809b3df0 EFLAGS: 00010203 [ 248.274982] RAX: ffffa0f77d1c0010 RBX: ffffa0f793ae9168 RCX: 0000000000000001 [ 248.275064] RDX: ffffa0f79753db00 RSI: 0000000000000001 RDI: 0000000000000000 [ 248.275145] RBP: ffffb5ad809b3e00 R08: ffffb5ad809b3da0 R09: 0000000000000000 [ 248.275225] R10: ffffb5ad809b3e68 R11: 0000000000000000 R12: ffffa0f793ae9190 [ 248.275306] R13: ffffb5ad809b3ef0 R14: 0000000000000001 R15: ffffa0f793ae9168 [ 248.275388] FS: 00007f5f1ec4d540(0000) GS:ffffa0f79ec80000(0000) knlGS:0000000000000000 [ 248.275480] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 248.275547] CR2: 0000000000000130 CR3: 000000042a03c005 CR4: 00000000003706e0 [ 248.275628] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 248.275708] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 248.275789] Call Trace: [ 248.276124] ? current_backlight_read+0x24/0x40 [amdgpu] [ 248.276194] seq_read+0xc3/0x3f0 [ 248.276240] full_proxy_read+0x5c/0x90 [ 248.276290] vfs_read+0xa7/0x190 [ 248.276334] ksys_read+0xa7/0xe0 [ 248.276379] __x64_sys_read+0x1a/0x20 [ 248.276429] do_syscall_64+0x37/0x80 [ 248.276477] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 248.276538] RIP: 0033:0x7f5f1e75c191 [ 248.276585] Code: fe ff ff 48 8d 3d b7 9d 0a 00 48 83 ec 08 e8 46 4d 02 00 66 0f 1f 44 00 00 48 8d 05 71 07 2e 00 8b 00 85 c0 75 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 41 54 55 49 89 d4 53Hw [ 248.276784] RSP: 002b:00007ffcb1fc3f38 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 248.276872] RAX: ffffffffffffffda RBX: 0000000000020000 RCX: 00007f5f1e75c191 [ 248.276953] RDX: 0000000000020000 RSI: 00007f5f1ec2b000 RDI: 0000000000000003 [ 248.277034] RBP: 0000000000020000 R08: 00000000ffffffff R09: 0000000000000000 [ 248.277115] R10: 0000000000000022 R11: 0000000000000246 R12: 00007f5f1ec2b000 [ 248.277195] R13: 0000000000000003 R14: 00007f5f1ec2b00f R15: 0000000000020000 [ 248.277279] Modules linked in: amdgpu(OE) iommu_v2 gpu_sched ttm(OE) drm_kms_helper cec drm i2c_algo_bit fb_sys_fops syscopyarea sysfillrect sysimgblt rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs lockd grace fscache nls_iso8859_1 snd_hda_codec_realtek snd_hda_codec_hdmi snd_hda_codec_generic ledtrig_audio intel_rapl_msr intel_rapl_common snd_hda_intel snd_intel_dspcfg x86_pkg_temp_thermal intel_powerclamp snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_seq_midi snd_seq_midi_event mei_hdcp coretemp snd_rawmidi snd_seq kvm_intel kvm snd_seq_device snd_timer irqbypass joydev snd input_leds soundcore crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper rapl intel_cstate mac_hid mei_me serio_raw mei eeepc_wmi wmi_bmof asus_wmi mxm_wmi intel_wmi_thunderbolt acpi_pad sparse_keymap efi_pstore sch_fq_codel parport_pc ppdev lp parport sunrpc ip_tables x_tables autofs4 hid_logitech_hidpp hid_logitech_dj hid_generic usbhid hid e1000e psmouse ahci libahci wmi video [ 248.278211] CR2: 0000000000000130 [ 248.278221] ---[ end trace 1fbe72fe6f91091d ]--- [ 248.357226] RIP: 0010:dc_link_get_backlight_level+0x5/0x70 [amdgpu] [ 248.357272] Code: 67 ff ff ff 41 b9 03 00 00 00 e9 45 ff ff ff d1 ea e9 55 ff ff ff 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <48> 8b 87 30 01 00 00 48 8b 00 48 8b 88 88 03 00 00 48 8d 81 e8 01 Signed-off-by: Kevin Wang Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 9e1071b2181ff..f4a2088ab1792 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2487,9 +2487,14 @@ enum dc_status dc_link_validate_mode_timing( static struct abm *get_abm_from_stream_res(const struct dc_link *link) { int i; - struct dc *dc = link->ctx->dc; + struct dc *dc = NULL; struct abm *abm = NULL; + if (!link || !link->ctx) + return NULL; + + dc = link->ctx->dc; + for (i = 0; i < MAX_PIPES; i++) { struct pipe_ctx pipe_ctx = dc->current_state->res_ctx.pipe_ctx[i]; struct dc_stream_state *stream = pipe_ctx.stream; -- GitLab From 8ae291cc95e49011b736b641b0cfad502b7a1526 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 5 Jan 2021 13:13:27 +0200 Subject: [PATCH 0265/2012] RDMA/ucma: Do not miss ctx destruction steps in some cases The destruction flow is very complicated here because the cm_id can be destroyed from the event handler at any time if the device is hot-removed. This leaves behind a partial ctx with no cm_id in the xarray, and will let user space leak memory. Make everything consistent in this flow in all places: - Return the xarray back to XA_ZERO_ENTRY before beginning any destruction. The thread that reaches this first is responsible to kfree, everyone else does nothing. - Test the xarray during the special hot-removal case to block the queue_work, this has much simpler locking and doesn't require a 'destroying' - Fix the ref initialization so that it is only positive if cm_id != NULL, then rely on that to guide the destruction process in all cases. Now the new ucma_destroy_private_ctx() can be called in all places that want to free the ctx, including all the error unwinds, and none of the details are missed. Fixes: a1d33b70dbbc ("RDMA/ucma: Rework how new connections are passed through event delivery") Link: https://lore.kernel.org/r/20210105111327.230270-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucma.c | 135 ++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 63 deletions(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 7dab9a27a145a..da2512c30ffd5 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -95,8 +95,6 @@ struct ucma_context { u64 uid; struct list_head list; - /* sync between removal event and id destroy, protected by file mut */ - int destroying; struct work_struct close_work; }; @@ -122,7 +120,7 @@ static DEFINE_XARRAY_ALLOC(ctx_table); static DEFINE_XARRAY_ALLOC(multicast_table); static const struct file_operations ucma_fops; -static int __destroy_id(struct ucma_context *ctx); +static int ucma_destroy_private_ctx(struct ucma_context *ctx); static inline struct ucma_context *_ucma_find_context(int id, struct ucma_file *file) @@ -179,19 +177,14 @@ static void ucma_close_id(struct work_struct *work) /* once all inflight tasks are finished, we close all underlying * resources. The context is still alive till its explicit destryoing - * by its creator. + * by its creator. This puts back the xarray's reference. */ ucma_put_ctx(ctx); wait_for_completion(&ctx->comp); /* No new events will be generated after destroying the id. */ rdma_destroy_id(ctx->cm_id); - /* - * At this point ctx->ref is zero so the only place the ctx can be is in - * a uevent or in __destroy_id(). Since the former doesn't touch - * ctx->cm_id and the latter sync cancels this, there is no races with - * this store. - */ + /* Reading the cm_id without holding a positive ref is not allowed */ ctx->cm_id = NULL; } @@ -204,7 +197,6 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) return NULL; INIT_WORK(&ctx->close_work, ucma_close_id); - refcount_set(&ctx->ref, 1); init_completion(&ctx->comp); /* So list_del() will work if we don't do ucma_finish_ctx() */ INIT_LIST_HEAD(&ctx->list); @@ -218,6 +210,13 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) return ctx; } +static void ucma_set_ctx_cm_id(struct ucma_context *ctx, + struct rdma_cm_id *cm_id) +{ + refcount_set(&ctx->ref, 1); + ctx->cm_id = cm_id; +} + static void ucma_finish_ctx(struct ucma_context *ctx) { lockdep_assert_held(&ctx->file->mut); @@ -303,7 +302,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, ctx = ucma_alloc_ctx(listen_ctx->file); if (!ctx) goto err_backlog; - ctx->cm_id = cm_id; + ucma_set_ctx_cm_id(ctx, cm_id); uevent = ucma_create_uevent(listen_ctx, event); if (!uevent) @@ -321,8 +320,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, return 0; err_alloc: - xa_erase(&ctx_table, ctx->id); - kfree(ctx); + ucma_destroy_private_ctx(ctx); err_backlog: atomic_inc(&listen_ctx->backlog); /* Returning error causes the new ID to be destroyed */ @@ -356,8 +354,12 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, wake_up_interruptible(&ctx->file->poll_wait); } - if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL && !ctx->destroying) - queue_work(system_unbound_wq, &ctx->close_work); + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) { + xa_lock(&ctx_table); + if (xa_load(&ctx_table, ctx->id) == ctx) + queue_work(system_unbound_wq, &ctx->close_work); + xa_unlock(&ctx_table); + } return 0; } @@ -461,13 +463,12 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, ret = PTR_ERR(cm_id); goto err1; } - ctx->cm_id = cm_id; + ucma_set_ctx_cm_id(ctx, cm_id); resp.id = ctx->id; if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) { - xa_erase(&ctx_table, ctx->id); - __destroy_id(ctx); + ucma_destroy_private_ctx(ctx); return -EFAULT; } @@ -477,8 +478,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, return 0; err1: - xa_erase(&ctx_table, ctx->id); - kfree(ctx); + ucma_destroy_private_ctx(ctx); return ret; } @@ -516,68 +516,73 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc) rdma_unlock_handler(mc->ctx->cm_id); } -/* - * ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At - * this point, no new events will be reported from the hardware. However, we - * still need to cleanup the UCMA context for this ID. Specifically, there - * might be events that have not yet been consumed by the user space software. - * mutex. After that we release them as needed. - */ -static int ucma_free_ctx(struct ucma_context *ctx) +static int ucma_cleanup_ctx_events(struct ucma_context *ctx) { int events_reported; struct ucma_event *uevent, *tmp; LIST_HEAD(list); - ucma_cleanup_multicast(ctx); - - /* Cleanup events not yet reported to the user. */ + /* Cleanup events not yet reported to the user.*/ mutex_lock(&ctx->file->mut); list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { - if (uevent->ctx == ctx || uevent->conn_req_ctx == ctx) + if (uevent->ctx != ctx) + continue; + + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && + xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id, + uevent->conn_req_ctx, XA_ZERO_ENTRY, + GFP_KERNEL) == uevent->conn_req_ctx) { list_move_tail(&uevent->list, &list); + continue; + } + list_del(&uevent->list); + kfree(uevent); } list_del(&ctx->list); events_reported = ctx->events_reported; mutex_unlock(&ctx->file->mut); /* - * If this was a listening ID then any connections spawned from it - * that have not been delivered to userspace are cleaned up too. - * Must be done outside any locks. + * If this was a listening ID then any connections spawned from it that + * have not been delivered to userspace are cleaned up too. Must be done + * outside any locks. */ list_for_each_entry_safe(uevent, tmp, &list, list) { - list_del(&uevent->list); - if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && - uevent->conn_req_ctx != ctx) - __destroy_id(uevent->conn_req_ctx); + ucma_destroy_private_ctx(uevent->conn_req_ctx); kfree(uevent); } - - mutex_destroy(&ctx->mutex); - kfree(ctx); return events_reported; } -static int __destroy_id(struct ucma_context *ctx) +/* + * When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie + * the ctx is not public to the user). This either because: + * - ucma_finish_ctx() hasn't been called + * - xa_cmpxchg() succeed to remove the entry (only one thread can succeed) + */ +static int ucma_destroy_private_ctx(struct ucma_context *ctx) { + int events_reported; + /* - * If the refcount is already 0 then ucma_close_id() has already - * destroyed the cm_id, otherwise holding the refcount keeps cm_id - * valid. Prevent queue_work() from being called. + * Destroy the underlying cm_id. New work queuing is prevented now by + * the removal from the xarray. Once the work is cancled ref will either + * be 0 because the work ran to completion and consumed the ref from the + * xarray, or it will be positive because we still have the ref from the + * xarray. This can also be 0 in cases where cm_id was never set */ - if (refcount_inc_not_zero(&ctx->ref)) { - rdma_lock_handler(ctx->cm_id); - ctx->destroying = 1; - rdma_unlock_handler(ctx->cm_id); - ucma_put_ctx(ctx); - } - cancel_work_sync(&ctx->close_work); - /* At this point it's guaranteed that there is no inflight closing task */ - if (ctx->cm_id) + if (refcount_read(&ctx->ref)) ucma_close_id(&ctx->close_work); - return ucma_free_ctx(ctx); + + events_reported = ucma_cleanup_ctx_events(ctx); + ucma_cleanup_multicast(ctx); + + WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL, + GFP_KERNEL) != NULL); + mutex_destroy(&ctx->mutex); + kfree(ctx); + return events_reported; } static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, @@ -596,14 +601,17 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, xa_lock(&ctx_table); ctx = _ucma_find_context(cmd.id, file); - if (!IS_ERR(ctx)) - __xa_erase(&ctx_table, ctx->id); + if (!IS_ERR(ctx)) { + if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, + GFP_KERNEL) != ctx) + ctx = ERR_PTR(-ENOENT); + } xa_unlock(&ctx_table); if (IS_ERR(ctx)) return PTR_ERR(ctx); - resp.events_reported = __destroy_id(ctx); + resp.events_reported = ucma_destroy_private_ctx(ctx); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; @@ -1777,15 +1785,16 @@ static int ucma_close(struct inode *inode, struct file *filp) * prevented by this being a FD release function. The list_add_tail() in * ucma_connect_event_handler() can run concurrently, however it only * adds to the list *after* a listening ID. By only reading the first of - * the list, and relying on __destroy_id() to block + * the list, and relying on ucma_destroy_private_ctx() to block * ucma_connect_event_handler(), no additional locking is needed. */ while (!list_empty(&file->ctx_list)) { struct ucma_context *ctx = list_first_entry( &file->ctx_list, struct ucma_context, list); - xa_erase(&ctx_table, ctx->id); - __destroy_id(ctx); + WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY, + GFP_KERNEL) != ctx); + ucma_destroy_private_ctx(ctx); } kfree(file); return 0; -- GitLab From ed1df58585632dff96cc01e14857175dfdf67376 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 31 Dec 2020 13:05:09 +0800 Subject: [PATCH 0266/2012] drm/amdgpu: switched to cached noretry setting for vangogh global noretry setting is cached to gmc.noretry Signed-off-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index b72c8e4ca36bd..07104a1de3082 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -310,7 +310,7 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev) /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !amdgpu_noretry); + !adev->gmc.noretry); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, -- GitLab From 9a029a3facc4d333100308a8e283d9210a36b94c Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Wed, 30 Dec 2020 10:27:42 +0800 Subject: [PATCH 0267/2012] drm/amdgpu: fix a memory protection fault when remove amdgpu device ASD and TA share the same firmware in SIENNA_CICHLID and only TA firmware is requested during boot, so only need release TA firmware when remove device. [ 83.877150] general protection fault, probably for non-canonical address 0x1269f97e6ed04095: 0000 [#1] SMP PTI [ 83.888076] CPU: 0 PID: 1312 Comm: modprobe Tainted: G W OE 5.9.0-rc5-deli-amd-vangogh-0.0.6.6-114-gdd99d5669a96-dirty #2 [ 83.901160] Hardware name: System manufacturer System Product Name/TUF Z370-PLUS GAMING II, BIOS 0411 09/21/2018 [ 83.912353] RIP: 0010:free_fw_priv+0xc/0x120 [ 83.917531] Code: e8 99 cd b0 ff b8 a1 ff ff ff eb 9f 4c 89 f7 e8 8a cd b0 ff b8 f4 ff ff ff eb 90 0f 1f 00 0f 1f 44 00 00 55 48 89 e5 41 54 53 <4c> 8b 67 18 48 89 fb 4c 89 e7 e8 45 94 41 00 b8 ff ff ff ff f0 0f [ 83.937576] RSP: 0018:ffffbc34c13a3ce0 EFLAGS: 00010206 [ 83.943699] RAX: ffffffffbb681850 RBX: ffffa047f117eb60 RCX: 0000000080800055 [ 83.951879] RDX: ffffbc34c1d5f000 RSI: 0000000080800055 RDI: 1269f97e6ed04095 [ 83.959955] RBP: ffffbc34c13a3cf0 R08: 0000000000000000 R09: 0000000000000001 [ 83.968107] R10: ffffbc34c13a3cc8 R11: 00000000ffffff00 R12: ffffa047d6b23378 [ 83.976166] R13: ffffa047d6b23338 R14: ffffa047d6b240c8 R15: 0000000000000000 [ 83.984295] FS: 00007f74f6712540(0000) GS:ffffa047fbe00000(0000) knlGS:0000000000000000 [ 83.993323] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 84.000056] CR2: 0000556a1cca4e18 CR3: 000000021faa8004 CR4: 00000000003706f0 [ 84.008128] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 84.016155] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 84.024174] Call Trace: [ 84.027514] release_firmware.part.11+0x4b/0x70 [ 84.033017] release_firmware+0x13/0x20 [ 84.037803] psp_sw_fini+0x77/0xb0 [amdgpu] [ 84.042857] amdgpu_device_fini+0x38c/0x5d0 [amdgpu] [ 84.048815] amdgpu_driver_unload_kms+0x43/0x70 [amdgpu] [ 84.055055] drm_dev_unregister+0x73/0xb0 [drm] [ 84.060499] drm_dev_unplug+0x28/0x30 [drm] [ 84.065598] amdgpu_dev_uninit+0x1b/0x40 [amdgpu] [ 84.071223] amdgpu_pci_remove+0x4e/0x70 [amdgpu] [ 84.076835] pci_device_remove+0x3e/0xc0 [ 84.081609] device_release_driver_internal+0xfb/0x1c0 [ 84.087558] driver_detach+0x4d/0xa0 [ 84.092041] bus_remove_driver+0x5f/0xe0 [ 84.096854] driver_unregister+0x2f/0x50 [ 84.101594] pci_unregister_driver+0x22/0xa0 [ 84.106806] amdgpu_exit+0x15/0x2b [amdgpu] Signed-off-by: Dennis Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 523d22db094b6..5d6fc369e32c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -563,7 +563,7 @@ static int psp_asd_load(struct psp_context *psp) * add workaround to bypass it for sriov now. * TODO: add version check to make it common */ - if (amdgpu_sriov_vf(psp->adev) || !psp->asd_fw) + if (amdgpu_sriov_vf(psp->adev) || !psp->asd_ucode_size) return 0; cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); @@ -2589,11 +2589,10 @@ static int parse_ta_bin_descriptor(struct psp_context *psp, switch (desc->fw_type) { case TA_FW_TYPE_PSP_ASD: - psp->asd_fw_version = le32_to_cpu(desc->fw_version); + psp->asd_fw_version = le32_to_cpu(desc->fw_version); psp->asd_feature_version = le32_to_cpu(desc->fw_version); - psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); + psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); psp->asd_start_addr = ucode_start_addr; - psp->asd_fw = psp->ta_fw; break; case TA_FW_TYPE_PSP_XGMI: psp->ta_xgmi_ucode_version = le32_to_cpu(desc->fw_version); -- GitLab From 88e21af1b3f887d217f2fb14fc7e7d3cd87ebf57 Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Wed, 30 Dec 2020 19:45:15 +0800 Subject: [PATCH 0268/2012] drm/amdgpu: fix a GPU hang issue when remove device When GFXOFF is enabled and GPU is idle, driver will fail to access some registers. Therefore change to disable power gating before all access registers with MMIO. Dmesg log is as following: amdgpu 0000:03:00.0: amdgpu: amdgpu: finishing device. amdgpu: cp queue pipe 4 queue 0 preemption failed amdgpu 0000:03:00.0: amdgpu: failed to write reg 2890 wait reg 28a2 amdgpu 0000:03:00.0: amdgpu: failed to write reg 1a6f4 wait reg 1a706 amdgpu 0000:03:00.0: amdgpu: failed to write reg 2890 wait reg 28a2 amdgpu 0000:03:00.0: amdgpu: failed to write reg 1a6f4 wait reg 1a706 Signed-off-by: Dennis Li Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1cb7d73f7317b..b69c34074d8d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2548,11 +2548,11 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (adev->gmc.xgmi.num_physical_nodes > 1) amdgpu_xgmi_remove_device(adev); - amdgpu_amdkfd_device_fini(adev); - amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + amdgpu_amdkfd_device_fini(adev); + /* need to disable SMC first */ for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.hw) -- GitLab From 44cb39e19a05ca711bcb6e776e0a4399223204a0 Mon Sep 17 00:00:00 2001 From: Xiaojian Du Date: Wed, 30 Dec 2020 18:08:23 +0800 Subject: [PATCH 0269/2012] drm/amd/pm: fix the failure when change power profile for renoir This patch is to fix the failure when change power profile to "profile_peak" for renoir. Signed-off-by: Xiaojian Du Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 1 + drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index dc75db8af3715..f743685a20e8a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -188,6 +188,7 @@ static int renoir_get_dpm_clk_limited(struct smu_context *smu, enum smu_clk_type return -EINVAL; *freq = clk_table->SocClocks[dpm_level].Freq; break; + case SMU_UCLK: case SMU_MCLK: if (dpm_level >= NUM_FCLK_DPM_LEVELS) return -EINVAL; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index 522d550046555..06abf2a7ce9e9 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c @@ -225,6 +225,7 @@ int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_ break; case SMU_FCLK: case SMU_MCLK: + case SMU_UCLK: ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinFclkByFreq, min, NULL); if (ret) return ret; -- GitLab From 98b64762080b96b0f8608da5fe161f1a7ab6f5de Mon Sep 17 00:00:00 2001 From: Xiaojian Du Date: Tue, 29 Dec 2020 17:19:37 +0800 Subject: [PATCH 0270/2012] drm/amd/pm: improve the fine grain tuning function for RV/RV2/PCO This patch is to improve the fine grain tuning function for RV/RV2/PCO. The fine grain tuning function uses the sysfs node -- pp_od_clk_voltage to config gfxclk. Meanwhile, another sysfs node -- power_dpm_force_perfomance_level also affects the gfx clk. It will cause confusion when these two sysfs nodes works together. So this patch adds one flag to avoid this confusion, the flag will make these two sysfs nodes work separately. The flag is set as "disabled" by default, so the fine grain tuning function will be disabled by default. Only when power_dpm_force_perfomance_level is changed to "manual" mode, the flag will be set as "enabled", and the fine grain tuning function will be enabled. In other profile modes, including "auto", "high", "low", "profile_peak", "profile_standard", "profile_min_sclk", "profile_min_mclk", the flag will be set as "disabled", and the od range of fine grain tuning function will be restored default value. Signed-off-by: Xiaojian Du Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- .../drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c | 58 ++++++++++++++++++- .../drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h | 2 + 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index 0cf899566e31c..88322781e447b 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -558,7 +558,8 @@ static int smu10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) /* enable the pp_od_clk_voltage sysfs file */ hwmgr->od_enabled = 1; - + /* disabled fine grain tuning function by default */ + data->fine_grain_enabled = 0; return result; } @@ -597,6 +598,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, uint32_t min_mclk = hwmgr->display_config->min_mem_set_clock/100; uint32_t index_fclk = data->clock_vol_info.vdd_dep_on_fclk->count - 1; uint32_t index_socclk = data->clock_vol_info.vdd_dep_on_socclk->count - 1; + uint32_t fine_grain_min_freq = 0, fine_grain_max_freq = 0; if (hwmgr->smu_version < 0x1E3700) { pr_info("smu firmware version too old, can not set dpm level\n"); @@ -613,6 +615,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, switch (level) { case AMD_DPM_FORCED_LEVEL_HIGH: case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, data->gfx_max_freq_limit/100, @@ -648,6 +658,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, min_sclk, @@ -658,6 +676,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, min_mclk, @@ -668,6 +694,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, SMU10_UMD_PSTATE_GFXCLK, @@ -703,6 +737,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_AUTO: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, min_sclk, @@ -741,6 +783,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_LOW: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, data->gfx_min_freq_limit/100, @@ -759,6 +809,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_MANUAL: + data->fine_grain_enabled = 1; case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: default: break; @@ -1435,6 +1486,11 @@ static int smu10_set_fine_grain_clk_vol(struct pp_hwmgr *hwmgr, return -EINVAL; } + if (!smu10_data->fine_grain_enabled) { + pr_err("Fine grain not started\n"); + return -EINVAL; + } + if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { if (size != 2) { pr_err("Input parameter number not correct\n"); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h index 28d86d354d50b..808e0ecbe1f09 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h @@ -300,6 +300,8 @@ struct smu10_hwmgr { bool need_min_deep_sleep_dcefclk; uint32_t deep_sleep_dcefclk; uint32_t num_active_display; + + bool fine_grain_enabled; }; struct pp_hwmgr; -- GitLab From 4f6a05501eb9c57fb4c9efed70840aee523a393b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 15:02:32 +0100 Subject: [PATCH 0271/2012] drm/amd/display: Fix unused variable warning Some of the newly added code is hidden inside of #ifdef blocks, but one variable is unused when debugfs is disabled: drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm.c:8370:8: error: unused variable 'configure_crc' [-Werror,-Wunused-variable] Change the #ifdef to an if(IS_ENABLED()) check to fix the warning and avoid adding more #ifdefs. Fixes: c920888c604d ("drm/amd/display: Expose new CRC window property") Reviewed-by: Wayne Lin Signed-off-by: Arnd Bergmann Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +--- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 519080e9a2338..3fb6baf9b0bae 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8379,8 +8379,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) acrtc->dm_irq_params.stream = dm_new_crtc_state->stream; manage_dm_interrupts(adev, acrtc, true); } -#ifdef CONFIG_DEBUG_FS - if (new_crtc_state->active && + if (IS_ENABLED(CONFIG_DEBUG_FS) && new_crtc_state->active && amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) { /** * Frontend may have changed so reapply the CRC capture @@ -8401,7 +8400,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) amdgpu_dm_crtc_configure_crc_source( crtc, dm_new_crtc_state, dm_new_crtc_state->crc_src); } -#endif } for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index 0235bfb246e5d..eba2f1d35d075 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -46,13 +46,13 @@ static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source } /* amdgpu_dm_crc.c */ -#ifdef CONFIG_DEBUG_FS bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state); bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state, struct dm_crtc_state *dm_old_crtc_state); int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, struct dm_crtc_state *dm_crtc_state, enum amdgpu_dm_pipe_crc_source source); +#ifdef CONFIG_DEBUG_FS int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name); int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, const char *src_name, -- GitLab From e6d5c64efaa34aae3815a9afeb1314a976142e83 Mon Sep 17 00:00:00 2001 From: Jiawei Gu Date: Tue, 29 Dec 2020 20:35:33 +0800 Subject: [PATCH 0272/2012] drm/amdgpu: fix potential memory leak during navi12 deinitialization Navi12 HDCP & DTM deinitialization needs continue to free bo if already created though initialized flag is not set. Reviewed-by: Alex Deucher Signed-off-by: Jiawei Gu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 5d6fc369e32c1..347fec6694248 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1315,8 +1315,12 @@ static int psp_hdcp_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->hdcp_context.hdcp_initialized) - return 0; + if (!psp->hdcp_context.hdcp_initialized) { + if (psp->hdcp_context.hdcp_shared_buf) + goto out; + else + return 0; + } ret = psp_hdcp_unload(psp); if (ret) @@ -1324,6 +1328,7 @@ static int psp_hdcp_terminate(struct psp_context *psp) psp->hdcp_context.hdcp_initialized = false; +out: /* free hdcp shared memory */ amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo, &psp->hdcp_context.hdcp_shared_mc_addr, @@ -1462,8 +1467,12 @@ static int psp_dtm_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->dtm_context.dtm_initialized) - return 0; + if (!psp->dtm_context.dtm_initialized) { + if (psp->dtm_context.dtm_shared_buf) + goto out; + else + return 0; + } ret = psp_dtm_unload(psp); if (ret) @@ -1471,6 +1480,7 @@ static int psp_dtm_terminate(struct psp_context *psp) psp->dtm_context.dtm_initialized = false; +out: /* free hdcp shared memory */ amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo, &psp->dtm_context.dtm_shared_mc_addr, -- GitLab From 8a82b347e8732fd2b68d26a6e9f0d9a1c397560d Mon Sep 17 00:00:00 2001 From: Dennis Li Date: Tue, 5 Jan 2021 08:37:21 +0800 Subject: [PATCH 0273/2012] drm/amdgpu: fix no bad_pages issue after umc ue injection old code wrongly used the bad page status as the function return value, which cause amdgpu_ras_badpages_read always return failed. Signed-off-by: Dennis Li Reviewed-by: Guchun Chen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index c136bd4497446..82e952696d24f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1518,7 +1518,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; int i = 0; - int ret = 0; + int ret = 0, status; if (!con || !con->eh_data || !bps || !count) return -EINVAL; @@ -1543,12 +1543,12 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, .size = AMDGPU_GPU_PAGE_SIZE, .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, }; - ret = amdgpu_vram_mgr_query_page_status( + status = amdgpu_vram_mgr_query_page_status( ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM), data->bps[i].retired_page); - if (ret == -EBUSY) + if (status == -EBUSY) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; - else if (ret == -ENOENT) + else if (status == -ENOENT) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT; } -- GitLab From 3851c90b7aa8f0c275d14636f0e7ccca69a2bf84 Mon Sep 17 00:00:00 2001 From: John Clements Date: Tue, 5 Jan 2021 14:53:14 +0800 Subject: [PATCH 0274/2012] drm/amdgpu: enable ras eeprom support for sienna cichlid added I2C address and asic support flag Reviewed-by: Hawking Zhang Signed-off-by: John Clements Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 1dd040166c635..19d9aa76cfbfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -30,6 +30,7 @@ #define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342 0xA0 +#define EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID 0xA0 /* * The 2 macros bellow represent the actual size in bytes that @@ -62,7 +63,8 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) { if ((adev->asic_type == CHIP_VEGA20) || - (adev->asic_type == CHIP_ARCTURUS)) + (adev->asic_type == CHIP_ARCTURUS) || + (adev->asic_type == CHIP_SIENNA_CICHLID)) return true; return false; @@ -100,6 +102,10 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, case CHIP_ARCTURUS: return __get_eeprom_i2c_addr_arct(adev, i2c_addr); + case CHIP_SIENNA_CICHLID: + *i2c_addr = EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID; + break; + default: return false; } -- GitLab From c241ed2f0ea549c18cff62a3708b43846b84dae3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 4 Jan 2021 11:24:20 -0500 Subject: [PATCH 0275/2012] drm/amdgpu/display: drop DCN support for aarch64 From Ard: "Simply disabling -mgeneral-regs-only left and right is risky, given that the standard AArch64 ABI permits the use of FP/SIMD registers anywhere, and GCC is known to use SIMD registers for spilling, and may invent other uses of the FP/SIMD register file that have nothing to do with the floating point code in question. Note that putting kernel_neon_begin() and kernel_neon_end() around the code that does use FP is not sufficient here, the problem is in all the other code that may be emitted with references to SIMD registers in it. So the only way to do this properly is to put all floating point code in a separate compilation unit, and only compile that unit with -mgeneral-regs-only." Disable support until the code can be properly refactored to support this properly on aarch64. Acked-by: Will Deacon Reported-by: Ard Biesheuvel Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/Kconfig | 2 +- drivers/gpu/drm/amd/display/dc/calcs/Makefile | 4 ---- .../gpu/drm/amd/display/dc/clk_mgr/Makefile | 21 ------------------- drivers/gpu/drm/amd/display/dc/dcn10/Makefile | 7 ------- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 7 ------- drivers/gpu/drm/amd/display/dc/dcn20/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/dcn21/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 5 ----- .../gpu/drm/amd/display/dc/dcn301/Makefile | 4 ---- .../gpu/drm/amd/display/dc/dcn302/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/dml/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/dsc/Makefile | 4 ---- drivers/gpu/drm/amd/display/dc/os_types.h | 4 ---- 13 files changed, 1 insertion(+), 73 deletions(-) diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 797b5d4b43e5e..e509a175ed175 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -6,7 +6,7 @@ config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y select SND_HDA_COMPONENT if SND_HDA_CORE - select DRM_AMD_DC_DCN if (X86 || PPC64 || (ARM64 && KERNEL_MODE_NEON)) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) + select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 64f515d744103..f3c00f479e1cb 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -33,10 +33,6 @@ ifdef CONFIG_PPC64 calcs_ccflags := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -calcs_rcflags := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index d59b380e7b7fb..ff96bee57bfc5 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -104,13 +104,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) endif -# prevent build errors: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# this file is unused on arm64, just like on ppc64 -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := -mgeneral-regs-only -endif - AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) @@ -125,13 +118,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) endif -# prevent build errors: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# this file is unused on arm64, just like on ppc64 -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := -mgeneral-regs-only -endif - AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30) @@ -146,13 +132,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) endif -# prevent build errors: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# this file is unused on arm64, just like on ppc64 -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := -mgeneral-regs-only -endif - AMD_DAL_CLK_MGR_DCN301 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn301/,$(CLK_MGR_DCN301)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN301) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 733e6e6e43bd6..62ad1a11bff9c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -31,11 +31,4 @@ DCN10 = dcn10_init.o dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o \ AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10)) -# fix: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# aarch64 does not support soft-float, so use hard-float and handle this in code -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn10/dcn10_resource.o := -mgeneral-regs-only -endif - AMD_DISPLAY_FILES += $(AMD_DAL_DCN10) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index bdc37831535e4..36745193c391c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -1534,15 +1534,8 @@ static bool dcn10_resource_construct( memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults)); memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults)); -#if defined(CONFIG_ARM64) - /* Aarch64 does not support -msoft-float/-mfloat-abi=soft */ - DC_FP_START(); - dcn10_resource_construct_fp(dc); - DC_FP_END(); -#else /* Other architectures we build for build this with soft-float */ dcn10_resource_construct_fp(dc); -#endif pool->base.pp_smu = dcn10_pp_smu_create(ctx); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index 624cb1341ef14..5fcaf78334ff9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -17,10 +17,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index 1ee5fc03b7b3d..bb8c951410822 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -13,10 +13,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index 248c2711aacef..c20331eb62e01 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -41,11 +41,6 @@ CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -maltivec CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mgeneral-regs-only -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile index 2fd5d34e4ba6f..3ca7d911d25c4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile @@ -21,10 +21,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile index 36e44e1b07faf..8d4924b7dc221 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile @@ -20,10 +20,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index a02a33dcd70bd..6bb7f2905821b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -33,10 +33,6 @@ ifdef CONFIG_PPC64 dml_ccflags := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -dml_rcflags := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index f2624a1156e5c..8d31eb75c6a6e 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -10,10 +10,6 @@ ifdef CONFIG_PPC64 dsc_ccflags := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -dsc_rcflags := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index 95cb56929e79e..126c2f3a4dd3b 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -55,10 +55,6 @@ #include #define DC_FP_START() kernel_fpu_begin() #define DC_FP_END() kernel_fpu_end() -#elif defined(CONFIG_ARM64) -#include -#define DC_FP_START() kernel_neon_begin() -#define DC_FP_END() kernel_neon_end() #elif defined(CONFIG_PPC64) #include #include -- GitLab From 5efc1f4b454c6179d35e7b0c3eda0ad5763a00fc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 5 Jan 2021 11:42:08 -0500 Subject: [PATCH 0276/2012] Revert "drm/amd/display: Fix memory leaks in S3 resume" This reverts commit a135a1b4c4db1f3b8cbed9676a40ede39feb3362. This leads to blank screens on some boards after replugging a display. Revert until we understand the root cause and can fix both the leak and the blank screen after replug. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=211033 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1427 Cc: Stylon Wang Cc: Harry Wentland Cc: Nicholas Kazlauskas Cc: Andre Tomt Cc: Oleksandr Natalenko Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3fb6baf9b0bae..146486071d01d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2386,8 +2386,7 @@ void amdgpu_dm_update_connector_after_detect( drm_connector_update_edid_property(connector, aconnector->edid); - aconnector->num_modes = drm_add_edid_modes(connector, aconnector->edid); - drm_connector_list_update(connector); + drm_add_edid_modes(connector, aconnector->edid); if (aconnector->dc_link->aux_mode) drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, -- GitLab From fcc42338375a1e67b8568dbb558f8b784d0f3b01 Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Mon, 28 Dec 2020 07:14:07 +0000 Subject: [PATCH 0277/2012] dm snapshot: flush merged data before committing metadata If the origin device has a volatile write-back cache and the following events occur: 1: After finishing merge operation of one set of exceptions, merge_callback() is invoked. 2: Update the metadata in COW device tracking the merge completion. This update to COW device is flushed cleanly. 3: System crashes and the origin device's cache where the recent merge was completed has not been flushed. During the next cycle when we read the metadata from the COW device, we will skip reading those metadata whose merge was completed in step (1). This will lead to data loss/corruption. To address this, flush the origin device post merge IO before updating the metadata. Cc: stable@vger.kernel.org Signed-off-by: Akilesh Kailash Signed-off-by: Mike Snitzer --- drivers/md/dm-snap.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 4668b2cd98f4e..11890db71f3fe 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -141,6 +141,11 @@ struct dm_snapshot { * for them to be committed. */ struct bio_list bios_queued_during_merge; + + /* + * Flush data after merge. + */ + struct bio flush_bio; }; /* @@ -1121,6 +1126,17 @@ shut: static void error_bios(struct bio *bio); +static int flush_data(struct dm_snapshot *s) +{ + struct bio *flush_bio = &s->flush_bio; + + bio_reset(flush_bio); + bio_set_dev(flush_bio, s->origin->bdev); + flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + + return submit_bio_wait(flush_bio); +} + static void merge_callback(int read_err, unsigned long write_err, void *context) { struct dm_snapshot *s = context; @@ -1134,6 +1150,11 @@ static void merge_callback(int read_err, unsigned long write_err, void *context) goto shut; } + if (flush_data(s) < 0) { + DMERR("Flush after merge failed: shutting down merge"); + goto shut; + } + if (s->store->type->commit_merge(s->store, s->num_merging_chunks) < 0) { DMERR("Write error in exception store: shutting down merge"); @@ -1318,6 +1339,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->first_merging_chunk = 0; s->num_merging_chunks = 0; bio_list_init(&s->bios_queued_during_merge); + bio_init(&s->flush_bio, NULL, 0); /* Allocate hash table for COW data */ if (init_hash_tables(s)) { @@ -1504,6 +1526,8 @@ static void snapshot_dtr(struct dm_target *ti) dm_exception_store_destroy(s->store); + bio_uninit(&s->flush_bio); + dm_put_device(ti, s->cow); dm_put_device(ti, s->origin); -- GitLab From f477a538c14d07f8c45e554c8c5208d588514e98 Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Thu, 17 Sep 2020 18:45:48 +0300 Subject: [PATCH 0278/2012] sh: dma: fix kconfig dependency for G2_DMA When G2_DMA is enabled and SH_DMA is disabled, it results in the following Kbuild warning: WARNING: unmet direct dependencies detected for SH_DMA_API Depends on [n]: SH_DMA [=n] Selected by [y]: - G2_DMA [=y] && SH_DREAMCAST [=y] The reason is that G2_DMA selects SH_DMA_API without depending on or selecting SH_DMA while SH_DMA_API depends on SH_DMA. When G2_DMA was first introduced with commit 40f49e7ed77f ("sh: dma: Make G2 DMA configurable."), this wasn't an issue since SH_DMA_API didn't have such dependency, and this way was the only way to enable it since SH_DMA_API was non-visible. However, later SH_DMA_API was made visible and dependent on SH_DMA with commit d8902adcc1a9 ("dmaengine: sh: Add Support SuperH DMA Engine driver"). Let G2_DMA depend on SH_DMA_API instead to avoid Kbuild issues. Fixes: d8902adcc1a9 ("dmaengine: sh: Add Support SuperH DMA Engine driver") Signed-off-by: Necip Fazil Yildiran Signed-off-by: Rich Felker --- arch/sh/drivers/dma/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig index d0de378beefe5..7d54f284ce10f 100644 --- a/arch/sh/drivers/dma/Kconfig +++ b/arch/sh/drivers/dma/Kconfig @@ -63,8 +63,7 @@ config PVR2_DMA config G2_DMA tristate "G2 Bus DMA support" - depends on SH_DREAMCAST - select SH_DMA_API + depends on SH_DREAMCAST && SH_DMA_API help This enables support for the DMA controller for the Dreamcast's G2 bus. Drivers that want this will generally enable this on -- GitLab From 7fb0a1a5e56779c427b409d6e53889d46519755e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 17 Sep 2020 19:14:04 -0700 Subject: [PATCH 0279/2012] arch/sh: hyphenate Non-Uniform in Kconfig prompt Hyphenate Non-Uniform in the NUMA kconfig prompt. Signed-off-by: Randy Dunlap Cc: Yoshinori Sato Cc: Rich Felker Cc: linux-sh@vger.kernel.org Cc: Jiri Kosina Signed-off-by: Rich Felker --- arch/sh/mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig index 703d3069997cb..77aa2f802d8d1 100644 --- a/arch/sh/mm/Kconfig +++ b/arch/sh/mm/Kconfig @@ -105,7 +105,7 @@ config VSYSCALL (the default value) say Y. config NUMA - bool "Non Uniform Memory Access (NUMA) Support" + bool "Non-Uniform Memory Access (NUMA) Support" depends on MMU && SYS_SUPPORTS_NUMA select ARCH_WANT_NUMA_VARIABLE_LOCALITY default n -- GitLab From 5c5dc5f8dccbafaacc8c97bbe7762986bdda6f63 Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Sat, 19 Sep 2020 10:52:06 +0800 Subject: [PATCH 0280/2012] sh: intc: Convert to DEFINE_SHOW_ATTRIBUTE Use DEFINE_SHOW_ATTRIBUTE macro to simplify the code. Signed-off-by: Qinglang Miao Signed-off-by: Rich Felker --- drivers/sh/intc/virq-debugfs.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/sh/intc/virq-debugfs.c b/drivers/sh/intc/virq-debugfs.c index 9e62ba9311f07..939915a07d999 100644 --- a/drivers/sh/intc/virq-debugfs.c +++ b/drivers/sh/intc/virq-debugfs.c @@ -16,7 +16,7 @@ #include #include "internals.h" -static int intc_irq_xlate_debug(struct seq_file *m, void *priv) +static int intc_irq_xlate_show(struct seq_file *m, void *priv) { int i; @@ -37,17 +37,7 @@ static int intc_irq_xlate_debug(struct seq_file *m, void *priv) return 0; } -static int intc_irq_xlate_open(struct inode *inode, struct file *file) -{ - return single_open(file, intc_irq_xlate_debug, inode->i_private); -} - -static const struct file_operations intc_irq_xlate_fops = { - .open = intc_irq_xlate_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(intc_irq_xlate); static int __init intc_irq_xlate_init(void) { -- GitLab From a1153636e904faf2b30fae3fb6ee3f4f4d0175c8 Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Sat, 19 Sep 2020 10:52:05 +0800 Subject: [PATCH 0281/2012] sh: mm: Convert to DEFINE_SHOW_ATTRIBUTE Use DEFINE_SHOW_ATTRIBUTE macro to simplify the code. Signed-off-by: Qinglang Miao Signed-off-by: Rich Felker --- arch/sh/mm/asids-debugfs.c | 15 ++------------- arch/sh/mm/cache-debugfs.c | 15 ++------------- arch/sh/mm/pmb.c | 15 ++------------- 3 files changed, 6 insertions(+), 39 deletions(-) diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c index 4c1ca197e9c5f..d16d6f5ec7749 100644 --- a/arch/sh/mm/asids-debugfs.c +++ b/arch/sh/mm/asids-debugfs.c @@ -26,7 +26,7 @@ #include #include -static int asids_seq_show(struct seq_file *file, void *iter) +static int asids_debugfs_show(struct seq_file *file, void *iter) { struct task_struct *p; @@ -48,18 +48,7 @@ static int asids_seq_show(struct seq_file *file, void *iter) return 0; } -static int asids_debugfs_open(struct inode *inode, struct file *file) -{ - return single_open(file, asids_seq_show, inode->i_private); -} - -static const struct file_operations asids_debugfs_fops = { - .owner = THIS_MODULE, - .open = asids_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(asids_debugfs); static int __init asids_debugfs_init(void) { diff --git a/arch/sh/mm/cache-debugfs.c b/arch/sh/mm/cache-debugfs.c index 17d7807944971..b0f185169dfa0 100644 --- a/arch/sh/mm/cache-debugfs.c +++ b/arch/sh/mm/cache-debugfs.c @@ -22,7 +22,7 @@ enum cache_type { CACHE_TYPE_UNIFIED, }; -static int cache_seq_show(struct seq_file *file, void *iter) +static int cache_debugfs_show(struct seq_file *file, void *iter) { unsigned int cache_type = (unsigned int)file->private; struct cache_info *cache; @@ -94,18 +94,7 @@ static int cache_seq_show(struct seq_file *file, void *iter) return 0; } -static int cache_debugfs_open(struct inode *inode, struct file *file) -{ - return single_open(file, cache_seq_show, inode->i_private); -} - -static const struct file_operations cache_debugfs_fops = { - .owner = THIS_MODULE, - .open = cache_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(cache_debugfs); static int __init cache_debugfs_init(void) { diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c index b20aba6e1b37c..68eb7cc6e5643 100644 --- a/arch/sh/mm/pmb.c +++ b/arch/sh/mm/pmb.c @@ -812,7 +812,7 @@ bool __in_29bit_mode(void) return (__raw_readl(PMB_PASCR) & PASCR_SE) == 0; } -static int pmb_seq_show(struct seq_file *file, void *iter) +static int pmb_debugfs_show(struct seq_file *file, void *iter) { int i; @@ -846,18 +846,7 @@ static int pmb_seq_show(struct seq_file *file, void *iter) return 0; } -static int pmb_debugfs_open(struct inode *inode, struct file *file) -{ - return single_open(file, pmb_seq_show, NULL); -} - -static const struct file_operations pmb_debugfs_fops = { - .owner = THIS_MODULE, - .open = pmb_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(pmb_debugfs); static int __init pmb_debugfs_init(void) { -- GitLab From b7aaf16d10bd9f1fbc5beefb9496e029fd1424ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Sep 2020 06:31:39 +0200 Subject: [PATCH 0282/2012] sh: remove CONFIG_IDE from most defconfig Remove CONFIG_IDE from defconfigs that did not actually select chipset drivers, and switch ones that have libata drivers to libata. Signed-off-by: Christoph Hellwig Signed-off-by: Rich Felker --- arch/sh/configs/landisk_defconfig | 9 ++++----- arch/sh/configs/microdev_defconfig | 2 -- arch/sh/configs/sdk7780_defconfig | 6 ++---- arch/sh/configs/sdk7786_defconfig | 3 --- arch/sh/configs/se7750_defconfig | 1 - arch/sh/configs/sh03_defconfig | 3 --- 6 files changed, 6 insertions(+), 18 deletions(-) diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index ba6ec042606f5..e6c5ddf070c00 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -27,13 +27,12 @@ CONFIG_NETFILTER=y CONFIG_ATALK=m CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_OFFBOARD=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_BLK_DEV_AEC62XX=y +CONFIG_ATA=y +CONFIG_ATA_GENERIC=y +CONFIG_PATA_ATP867X=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y +CONFIG_BLK_DEV_SR=y CONFIG_SCSI_MULTI_LUN=y CONFIG_MD=y CONFIG_BLK_DEV_MD=m diff --git a/arch/sh/configs/microdev_defconfig b/arch/sh/configs/microdev_defconfig index c65667d00313b..e9825196dd66a 100644 --- a/arch/sh/configs/microdev_defconfig +++ b/arch/sh/configs/microdev_defconfig @@ -20,8 +20,6 @@ CONFIG_IP_PNP=y # CONFIG_IPV6 is not set # CONFIG_FW_LOADER is not set CONFIG_BLK_DEV_RAM=y -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_SMC91X=y diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig index d10a0414123a5..d00376eb044f8 100644 --- a/arch/sh/configs/sdk7780_defconfig +++ b/arch/sh/configs/sdk7780_defconfig @@ -44,16 +44,14 @@ CONFIG_NET_SCHED=y CONFIG_PARPORT=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_PLATFORM=y -CONFIG_BLK_DEV_GENERIC=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_SPI_ATTRS=y CONFIG_SCSI_FC_ATTRS=y CONFIG_ATA=y +CONFIG_ATA_GENERIC=y +CONFIG_PATA_PLATFORM=y CONFIG_MD=y CONFIG_BLK_DEV_DM=y CONFIG_NETDEVICES=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index 61bec46ebd66a..4a44cac640bc9 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -116,9 +116,6 @@ CONFIG_MTD_UBI_GLUEBI=m CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=y CONFIG_BLK_DEV_RAM=y -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=y -CONFIG_BLK_DEV_PLATFORM=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_SCSI_MULTI_LUN=y diff --git a/arch/sh/configs/se7750_defconfig b/arch/sh/configs/se7750_defconfig index 3f1c13799d799..4defc7628a498 100644 --- a/arch/sh/configs/se7750_defconfig +++ b/arch/sh/configs/se7750_defconfig @@ -29,7 +29,6 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_ROM=y -CONFIG_IDE=y CONFIG_SCSI=y CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index f0073ed399477..48b457d59e790 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -39,9 +39,6 @@ CONFIG_IP_PNP_RARP=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y -CONFIG_IDE=y -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m CONFIG_SCSI=m CONFIG_BLK_DEV_SD=m CONFIG_BLK_DEV_SR=m -- GitLab From 19170492735be935747b0545b7eed8bb40cc1209 Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Mon, 12 Oct 2020 11:50:24 +0800 Subject: [PATCH 0283/2012] sh: Remove unused HAVE_COPY_THREAD_TLS macro Fixes: e1cc9d8d596e ("sh: switch to copy_thread_tls()") Signed-off-by: Jinyang He Signed-off-by: Rich Felker --- arch/sh/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 5fa580219a864..52646f52f130f 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -29,7 +29,6 @@ config SUPERH select HAVE_ARCH_KGDB select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK - select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK select HAVE_DYNAMIC_FTRACE -- GitLab From 542baf5108e052684c3abdeea57861f12f89a6b9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 12 Oct 2020 18:40:50 +0300 Subject: [PATCH 0284/2012] sh: Drop ARCH_NR_GPIOS definition The default by generic header is the same, hence drop unnecessary definition. Signed-off-by: Andy Shevchenko Signed-off-by: Rich Felker --- arch/sh/include/asm/gpio.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sh/include/asm/gpio.h b/arch/sh/include/asm/gpio.h index 351918894e867..d643250f0a0fa 100644 --- a/arch/sh/include/asm/gpio.h +++ b/arch/sh/include/asm/gpio.h @@ -16,7 +16,6 @@ #include #endif -#define ARCH_NR_GPIOS 512 #include #ifdef CONFIG_GPIOLIB -- GitLab From 7a202ec74c151e30edc1d17e3209fe6d6fe50eee Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Mon, 9 Nov 2020 10:45:51 +0800 Subject: [PATCH 0285/2012] arch: sh: remove duplicate include Remove duplicate header which is included twice. Signed-off-by: Wang Qing Signed-off-by: Rich Felker --- arch/sh/kernel/cpu/sh3/entry.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S index 25eb809054160..e48b3dd996f58 100644 --- a/arch/sh/kernel/cpu/sh3/entry.S +++ b/arch/sh/kernel/cpu/sh3/entry.S @@ -14,7 +14,6 @@ #include #include #include -#include ! NOTE: ! GNU as (as of 2.9.1) changes bf/s into bt/s and bra, when the address -- GitLab From a118584e7e60fa72ee441055b33b41c3354dba7e Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Mon, 9 Nov 2020 10:35:01 +0800 Subject: [PATCH 0286/2012] sh: mach-sh03: remove duplicate include Remove duplicate header which is included twice. Signed-off-by: Wang Qing Signed-off-by: Rich Felker --- arch/sh/boards/mach-sh03/rtc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sh/boards/mach-sh03/rtc.c b/arch/sh/boards/mach-sh03/rtc.c index 8b23ed7c201c6..7fb474844a2d1 100644 --- a/arch/sh/boards/mach-sh03/rtc.c +++ b/arch/sh/boards/mach-sh03/rtc.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include -- GitLab From b89bc060b53e7054e5c8ca11feea4bc884d83611 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 10 Nov 2020 16:49:39 +0100 Subject: [PATCH 0287/2012] sh/intc: Restore devm_ioremap() alignment Restore alignment of the continuation of the devm_ioremap() call in register_intc_controller(). Fixes: 4bdc0d676a643140 ("remove ioremap_nocache and devm_ioremap_nocache") Signed-off-by: Geert Uytterhoeven Signed-off-by: Rich Felker --- drivers/sh/intc/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c index f8e070d67fa32..a14684ffe4c1a 100644 --- a/drivers/sh/intc/core.c +++ b/drivers/sh/intc/core.c @@ -214,7 +214,7 @@ int __init register_intc_controller(struct intc_desc *desc) d->window[k].phys = res->start; d->window[k].size = resource_size(res); d->window[k].virt = ioremap(res->start, - resource_size(res)); + resource_size(res)); if (!d->window[k].virt) goto err2; } -- GitLab From b33cf814b1eb65ef5c939ced8266bb3df18444ef Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Mon, 7 Dec 2020 14:53:24 +0800 Subject: [PATCH 0288/2012] arm64: dts: imx8mn: Fix duplicate node name Error log: sysfs: cannot create duplicate filename '/bus/platform/devices/30000000.bus' The spba bus name is duplicate with aips bus name. Refine spba bus name to fix this issue. Fixes: 970406eaef3a ("arm64: dts: imx8mn: Enable Asynchronous Sample Rate Converter") Signed-off-by: Shengjiu Wang Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index ee1790230490d..2a79e89f821ee 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -253,7 +253,7 @@ #size-cells = <1>; ranges; - spba: bus@30000000 { + spba: spba-bus@30000000 { compatible = "fsl,spba-bus", "simple-bus"; #address-cells = <1>; #size-cells = <1>; -- GitLab From a876e7e2a8e62712425be178d483ffdff09f0853 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 15 Dec 2020 06:54:54 -0800 Subject: [PATCH 0289/2012] HID: uclogic: remove h from printk format specifier See Documentation/core-api/printk-formats.rst. h should no longer be used in the format specifier for printk. Signed-off-by: Tom Rix Signed-off-by: Jiri Kosina --- drivers/hid/hid-uclogic-params.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c index d26d8cd98efcf..56406cee401ff 100644 --- a/drivers/hid/hid-uclogic-params.c +++ b/drivers/hid/hid-uclogic-params.c @@ -90,7 +90,7 @@ static int uclogic_params_get_str_desc(__u8 **pbuf, struct hid_device *hdev, goto cleanup; } else if (rc < 0) { hid_err(hdev, - "failed retrieving string descriptor #%hhu: %d\n", + "failed retrieving string descriptor #%u: %d\n", idx, rc); goto cleanup; } -- GitLab From 4d2b71634b5ad142617e430bc6ef659331a576d0 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 15 Dec 2020 06:59:28 -0800 Subject: [PATCH 0290/2012] HID: wiimote: remove h from printk format specifier See Documentation/core-api/printk-formats.rst. h should no longer be used in the format specifier for printk. Signed-off-by: Tom Rix Signed-off-by: Jiri Kosina --- drivers/hid/hid-wiimote-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-wiimote-core.c b/drivers/hid/hid-wiimote-core.c index 41012681cafd1..4399d6c6afef2 100644 --- a/drivers/hid/hid-wiimote-core.c +++ b/drivers/hid/hid-wiimote-core.c @@ -1482,7 +1482,7 @@ static void handler_return(struct wiimote_data *wdata, const __u8 *payload) wdata->state.cmd_err = err; wiimote_cmd_complete(wdata); } else if (err) { - hid_warn(wdata->hdev, "Remote error %hhu on req %hhu\n", err, + hid_warn(wdata->hdev, "Remote error %u on req %u\n", err, cmd); } } -- GitLab From 91bc156817a3c2007332b64b4f85c32aafbbbea6 Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Wed, 6 Jan 2021 21:05:46 +0800 Subject: [PATCH 0291/2012] ALSA: hda/realtek: fix right sounds and mute/micmute LEDs for HP machines * The HP ZBook Fury 15/17 G7 Mobile Workstation are using ALC285 codec which is using 0x04 to control mute LED and 0x01 to control micmute LED. * The right channel speaker is no sound and it needs to expose GPIO1 for initialing AMP. Add quirks to support them. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20210106130549.100532-1-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3c1d2a3fb1a4f..dd82ff2bd5d65 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7970,6 +7970,10 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877d, "HP", ALC236_FIXUP_HP_MUTE_LED), + SND_PCI_QUIRK(0x103c, 0x8780, "HP ZBook Fury 17 G7 Mobile Workstation", + ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x8783, "HP ZBook Fury 15 G7 Mobile Workstation", + ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED), -- GitLab From 83b5bd628f65e6b4d1924b307d6a88a57827bdb0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 4 Jan 2021 12:36:14 +0000 Subject: [PATCH 0292/2012] arm64: Move PSTATE.TCO setting to separate functions For consistency with __uaccess_{disable,enable}_hw_pan(), move the PSTATE.TCO setting into dedicated __uaccess_{disable,enable}_tco() functions. Signed-off-by: Catalin Marinas Acked-by: Vincenzo Frascino Acked-by: Mark Rutland --- arch/arm64/include/asm/uaccess.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 6f986e09a7815..f0fe0cc6abe0b 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -176,10 +176,21 @@ static inline void __uaccess_enable_hw_pan(void) * The Tag check override (TCO) bit disables temporarily the tag checking * preventing the issue. */ -static inline void uaccess_disable_privileged(void) +static inline void __uaccess_disable_tco(void) { asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(0), ARM64_MTE, CONFIG_KASAN_HW_TAGS)); +} + +static inline void __uaccess_enable_tco(void) +{ + asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1), + ARM64_MTE, CONFIG_KASAN_HW_TAGS)); +} + +static inline void uaccess_disable_privileged(void) +{ + __uaccess_disable_tco(); if (uaccess_ttbr0_disable()) return; @@ -189,8 +200,7 @@ static inline void uaccess_disable_privileged(void) static inline void uaccess_enable_privileged(void) { - asm volatile(ALTERNATIVE("nop", SET_PSTATE_TCO(1), - ARM64_MTE, CONFIG_KASAN_HW_TAGS)); + __uaccess_enable_tco(); if (uaccess_ttbr0_enable()) return; -- GitLab From c14556fc0c7c115ffb4a287560e1ec9f7869aac3 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 17 Dec 2020 12:39:11 +0300 Subject: [PATCH 0293/2012] thunderbolt: Drop duplicated 0x prefix from format string The tb_dbg() call is using %#x that already adds the 0x prefix so don't duplicate it. Fixes: 9039387e166e ("thunderbolt: Add USB4 router operation proxy for firmware connection manager") Signed-off-by: Mika Westerberg Reviewed-by: Yehezkel Bernat --- drivers/thunderbolt/icm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c index 8b7f941a9bb7f..b8c4159bc32d0 100644 --- a/drivers/thunderbolt/icm.c +++ b/drivers/thunderbolt/icm.c @@ -2316,7 +2316,7 @@ static int icm_usb4_switch_nvm_authenticate_status(struct tb_switch *sw, if (auth && auth->reply.route_hi == sw->config.route_hi && auth->reply.route_lo == sw->config.route_lo) { - tb_dbg(tb, "NVM_AUTH found for %llx flags 0x%#x status %#x\n", + tb_dbg(tb, "NVM_AUTH found for %llx flags %#x status %#x\n", tb_route(sw), auth->reply.hdr.flags, auth->reply.status); if (auth->reply.hdr.flags & ICM_FLAGS_ERROR) ret = -EIO; -- GitLab From 05cd84691eafcd7959a1e120d5e72c0dd98c5d91 Mon Sep 17 00:00:00 2001 From: Charan Teja Reddy Date: Tue, 5 Jan 2021 20:06:39 +0530 Subject: [PATCH 0294/2012] dmabuf: fix use-after-free of dmabuf's file->f_inode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is observed 'use-after-free' on the dmabuf's file->f_inode with the race between closing the dmabuf file and reading the dmabuf's debug info. Consider the below scenario where P1 is closing the dma_buf file and P2 is reading the dma_buf's debug info in the system: P1 P2 dma_buf_debug_show() dma_buf_put() __fput() file->f_op->release() dput() .... dentry_unlink_inode() iput(dentry->d_inode) (where the inode is freed) mutex_lock(&db_list.lock) read 'dma_buf->file->f_inode' (the same inode is freed by P1) mutex_unlock(&db_list.lock) dentry->d_op->d_release()--> dma_buf_release() ..... mutex_lock(&db_list.lock) removes the dmabuf from the list mutex_unlock(&db_list.lock) In the above scenario, when dma_buf_put() is called on a dma_buf, it first frees the dma_buf's file->f_inode(=dentry->d_inode) and then removes this dma_buf from the system db_list. In between P2 traversing the db_list tries to access this dma_buf's file->f_inode that was freed by P1 which is a use-after-free case. Since, __fput() calls f_op->release first and then later calls the d_op->d_release, move the dma_buf's db_list removal from d_release() to f_op->release(). This ensures that dma_buf's file->f_inode is not accessed after it is released. Cc: # 5.4.x- Fixes: 4ab59c3c638c ("dma-buf: Move dma_buf_release() from fops to dentry_ops") Acked-by: Christian König Signed-off-by: Charan Teja Reddy Signed-off-by: Sumit Semwal Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/1609857399-31549-1-git-send-email-charante@codeaurora.org --- drivers/dma-buf/dma-buf.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index e63684d4cd904..9ad6397aaa97e 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -76,10 +76,6 @@ static void dma_buf_release(struct dentry *dentry) dmabuf->ops->release(dmabuf); - mutex_lock(&db_list.lock); - list_del(&dmabuf->list_node); - mutex_unlock(&db_list.lock); - if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) dma_resv_fini(dmabuf->resv); @@ -88,6 +84,22 @@ static void dma_buf_release(struct dentry *dentry) kfree(dmabuf); } +static int dma_buf_file_release(struct inode *inode, struct file *file) +{ + struct dma_buf *dmabuf; + + if (!is_dma_buf_file(file)) + return -EINVAL; + + dmabuf = file->private_data; + + mutex_lock(&db_list.lock); + list_del(&dmabuf->list_node); + mutex_unlock(&db_list.lock); + + return 0; +} + static const struct dentry_operations dma_buf_dentry_ops = { .d_dname = dmabuffs_dname, .d_release = dma_buf_release, @@ -413,6 +425,7 @@ static void dma_buf_show_fdinfo(struct seq_file *m, struct file *file) } static const struct file_operations dma_buf_fops = { + .release = dma_buf_file_release, .mmap = dma_buf_mmap_internal, .llseek = dma_buf_llseek, .poll = dma_buf_poll, -- GitLab From e89eed02a5f1b864fa5abafc8e8e71bd9fd66d1f Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Tue, 5 Jan 2021 20:53:42 +0100 Subject: [PATCH 0295/2012] kcov, usb: hide in_serving_softirq checks in __usb_hcd_giveback_urb Done opencode in_serving_softirq() checks in in_serving_softirq() to avoid cluttering the code, hide them in kcov helpers instead. Fixes: aee9ddb1d371 ("kcov, usb: only collect coverage from __usb_hcd_giveback_urb in softirq") Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/aeb430c5bb90b0ccdf1ec302c70831c1a47b9c45.1609876340.git.andreyknvl@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 8 +++----- include/linux/kcov.h | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 60886a7464c3a..ad5a0f405a75c 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1649,14 +1649,12 @@ static void __usb_hcd_giveback_urb(struct urb *urb) urb->status = status; /* * This function can be called in task context inside another remote - * coverage collection section, but KCOV doesn't support that kind of + * coverage collection section, but kcov doesn't support that kind of * recursion yet. Only collect coverage in softirq context for now. */ - if (in_serving_softirq()) - kcov_remote_start_usb((u64)urb->dev->bus->busnum); + kcov_remote_start_usb_softirq((u64)urb->dev->bus->busnum); urb->complete(urb); - if (in_serving_softirq()) - kcov_remote_stop(); + kcov_remote_stop_softirq(); usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); diff --git a/include/linux/kcov.h b/include/linux/kcov.h index a10e84707d820..4e3037dc12048 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -52,6 +52,25 @@ static inline void kcov_remote_start_usb(u64 id) kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_USB, id)); } +/* + * The softirq flavor of kcov_remote_*() functions is introduced as a temporary + * work around for kcov's lack of nested remote coverage sections support in + * task context. Adding suport for nested sections is tracked in: + * https://bugzilla.kernel.org/show_bug.cgi?id=210337 + */ + +static inline void kcov_remote_start_usb_softirq(u64 id) +{ + if (in_serving_softirq()) + kcov_remote_start_usb(id); +} + +static inline void kcov_remote_stop_softirq(void) +{ + if (in_serving_softirq()) + kcov_remote_stop(); +} + #else static inline void kcov_task_init(struct task_struct *t) {} @@ -66,6 +85,8 @@ static inline u64 kcov_common_handle(void) } static inline void kcov_remote_start_common(u64 id) {} static inline void kcov_remote_start_usb(u64 id) {} +static inline void kcov_remote_start_usb_softirq(u64 id) {} +static inline void kcov_remote_stop_softirq(void) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ -- GitLab From e2459108b5a0604c4b472cae2b3cb8d3444c77fb Mon Sep 17 00:00:00 2001 From: "taehyun.cho" Date: Thu, 7 Jan 2021 00:46:25 +0900 Subject: [PATCH 0296/2012] usb: gadget: enable super speed plus Enable Super speed plus in configfs to support USB3.1 Gen2. This ensures that when a USB gadget is plugged in, it is enumerated as Gen 2 and connected at 10 Gbps if the host and cable are capable of it. Many in-tree gadget functions (fs, midi, acm, ncm, mass_storage, etc.) already have SuperSpeed Plus support. Tested: plugged gadget into Linux host and saw: [284907.385986] usb 8-2: new SuperSpeedPlus Gen 2 USB device number 3 using xhci_hcd Tested-by: Lorenzo Colitti Acked-by: Felipe Balbi Signed-off-by: taehyun.cho Signed-off-by: Lorenzo Colitti Link: https://lore.kernel.org/r/20210106154625.2801030-1-lorenzo@google.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 408a5332a975b..36ffb43f9c1a0 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1543,7 +1543,7 @@ static const struct usb_gadget_driver configfs_driver_template = { .suspend = configfs_composite_suspend, .resume = configfs_composite_resume, - .max_speed = USB_SPEED_SUPER, + .max_speed = USB_SPEED_SUPER_PLUS, .driver = { .owner = THIS_MODULE, .name = "configfs-gadget", @@ -1583,7 +1583,7 @@ static struct config_group *gadgets_make( gi->composite.unbind = configfs_do_nothing; gi->composite.suspend = NULL; gi->composite.resume = NULL; - gi->composite.max_speed = USB_SPEED_SUPER; + gi->composite.max_speed = USB_SPEED_SUPER_PLUS; spin_lock_init(&gi->spinlock); mutex_init(&gi->lock); -- GitLab From 41952a66015466c3208aac96b14ffd92e0943589 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 6 Jan 2021 00:16:05 +0000 Subject: [PATCH 0297/2012] usb: typec: Fix copy paste error for NVIDIA alt-mode description The name of the module for the NVIDIA alt-mode is incorrect as it looks to be a copy-paste error from the entry above, update it to the correct typec_nvidia module name. Cc: Ajay Gupta Cc: Heikki Krogerus Signed-off-by: Peter Robinson Link: https://lore.kernel.org/r/20210106001605.167917-1-pbrobinson@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/altmodes/Kconfig b/drivers/usb/typec/altmodes/Kconfig index 187690fd1a5bd..60d375e9c3c7c 100644 --- a/drivers/usb/typec/altmodes/Kconfig +++ b/drivers/usb/typec/altmodes/Kconfig @@ -20,6 +20,6 @@ config TYPEC_NVIDIA_ALTMODE to enable support for VirtualLink devices with NVIDIA GPUs. To compile this driver as a module, choose M here: the - module will be called typec_displayport. + module will be called typec_nvidia. endmenu -- GitLab From 6c75c2bad36cfb43b144e6a0a76a69993c72097f Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Wed, 6 Jan 2021 19:49:04 -0800 Subject: [PATCH 0298/2012] usb: typec: Send uevent for num_altmodes update Generate a change uevent when the "number_of_alternate_modes" sysfs file for partners and plugs is updated by a port driver. Cc: Heikki Krogerus Cc: Benson Leung Signed-off-by: Prashant Malani Link: https://lore.kernel.org/r/20210107034904.4112029-1-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/class.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index ebfd3113a9a80..8f77669f9cf4f 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -766,6 +766,7 @@ int typec_partner_set_num_altmodes(struct typec_partner *partner, int num_altmod return ret; sysfs_notify(&partner->dev.kobj, NULL, "number_of_alternate_modes"); + kobject_uevent(&partner->dev.kobj, KOBJ_CHANGE); return 0; } @@ -923,6 +924,7 @@ int typec_plug_set_num_altmodes(struct typec_plug *plug, int num_altmodes) return ret; sysfs_notify(&plug->dev.kobj, NULL, "number_of_alternate_modes"); + kobject_uevent(&plug->dev.kobj, KOBJ_CHANGE); return 0; } -- GitLab From a5c7682aaaa10e42928d73de1c9e1e02d2b14c2e Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 4 Jan 2021 22:42:39 -0800 Subject: [PATCH 0299/2012] usb: dwc3: gadget: Clear wait flag on dequeue If an active transfer is dequeued, then the endpoint is freed to start a new transfer. Make sure to clear the endpoint's transfer wait flag for this case. Fixes: e0d19563eb6c ("usb: dwc3: gadget: Wait for transfer completion") Cc: stable@vger.kernel.org Acked-by: Felipe Balbi Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/b81cd5b5281cfbfdadb002c4bcf5c9be7c017cfd.1609828485.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 25f654b79e480..ee44321fee386 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1763,6 +1763,8 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, list_for_each_entry_safe(r, t, &dep->started_list, list) dwc3_gadget_move_cancelled_request(r); + dep->flags &= ~DWC3_EP_WAIT_TRANSFER_COMPLETE; + goto out; } } -- GitLab From e0658f970a7f3d85431c6803b7d5169444fb11b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 5 Jan 2021 18:55:47 +0100 Subject: [PATCH 0300/2012] drm/radeon: stop re-init the TTM page pool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers are not supposed to init the page pool directly any more. Signed-off-by: Christian König Reviewed-by: Huang Rui Link: https://patchwork.freedesktop.org/patch/412153/ --- drivers/gpu/drm/radeon/radeon_ttm.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index d59ef6e92a402..23195d5d4e919 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -730,9 +730,6 @@ int radeon_ttm_init(struct radeon_device *rdev) } rdev->mman.initialized = true; - ttm_pool_init(&rdev->mman.bdev.pool, rdev->dev, rdev->need_swiotlb, - dma_addressing_limited(&rdev->pdev->dev)); - r = radeon_ttm_init_vram(rdev); if (r) { DRM_ERROR("Failed initializing VRAM heap.\n"); -- GitLab From a73858ef4d5e1d425e171f0f6a52864176a6a979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 5 Jan 2021 18:56:56 +0100 Subject: [PATCH 0301/2012] drm/ttm: unexport ttm_pool_init/fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers are not supposed to use this directly any more. Signed-off-by: Christian König Reviewed-by: Huang Rui Link: https://patchwork.freedesktop.org/patch/412156/ --- drivers/gpu/drm/ttm/ttm_pool.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 7b2f60616750d..a00b7ab9c14cf 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -507,7 +507,6 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev, ttm_pool_type_init(&pool->caching[i].orders[j], pool, i, j); } -EXPORT_SYMBOL(ttm_pool_init); /** * ttm_pool_fini - Cleanup a pool @@ -525,7 +524,6 @@ void ttm_pool_fini(struct ttm_pool *pool) for (j = 0; j < MAX_ORDER; ++j) ttm_pool_type_fini(&pool->caching[i].orders[j]); } -EXPORT_SYMBOL(ttm_pool_fini); #ifdef CONFIG_DEBUG_FS /* Count the number of pages available in a pool_type */ -- GitLab From 1efd17e7acb6692bffc6c58718f41f27fdfd62f5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 31 Dec 2020 08:53:19 +0800 Subject: [PATCH 0302/2012] iommu/vt-d: Fix misuse of ALIGN in qi_flush_piotlb() Use IS_ALIGNED() instead. Otherwise, an unaligned address will be ignored. Fixes: 33cd6e642d6a ("iommu/vt-d: Flush PASID-based iotlb for iova over first level") Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20201231005323.2178523-1-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/dmar.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index b46dbfa6d0ed6..004feaed3c72c 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1461,8 +1461,8 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, int mask = ilog2(__roundup_pow_of_two(npages)); unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask)); - if (WARN_ON_ONCE(!ALIGN(addr, align))) - addr &= ~(align - 1); + if (WARN_ON_ONCE(!IS_ALIGNED(addr, align))) + addr = ALIGN_DOWN(addr, align); desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) | -- GitLab From 4df7b2268ad81a74168130e1fb04550a8bc980e1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 31 Dec 2020 08:53:22 +0800 Subject: [PATCH 0303/2012] Revert "iommu: Add quirk for Intel graphic devices in map_sg" This reverts commit 65f746e8285f0a67d43517d86fedb9e29ead49f2. As commit 8a473dbadccf ("drm/i915: Fix DMA mapped scatterlist walks") and commit 934941ed5a30 ("drm/i915: Fix DMA mapped scatterlist lookup") fixed the DMA scatterlist limitations in the i915 driver, remove this temporary workaround. Cc: Tvrtko Ursulin Cc: Tom Murphy Cc: Logan Gunthorpe Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20201231005323.2178523-4-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/dma-iommu.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f0305e6aac1b8..4078358ed66ea 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -863,33 +863,6 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents, unsigned int cur_len = 0, max_len = dma_get_max_seg_size(dev); int i, count = 0; - /* - * The Intel graphic driver is used to assume that the returned - * sg list is not combound. This blocks the efforts of converting - * Intel IOMMU driver to dma-iommu api's. Add this quirk to make the - * device driver work and should be removed once it's fixed in i915 - * driver. - */ - if (IS_ENABLED(CONFIG_DRM_I915) && dev_is_pci(dev) && - to_pci_dev(dev)->vendor == PCI_VENDOR_ID_INTEL && - (to_pci_dev(dev)->class >> 16) == PCI_BASE_CLASS_DISPLAY) { - for_each_sg(sg, s, nents, i) { - unsigned int s_iova_off = sg_dma_address(s); - unsigned int s_length = sg_dma_len(s); - unsigned int s_iova_len = s->length; - - s->offset += s_iova_off; - s->length = s_length; - sg_dma_address(s) = dma_addr + s_iova_off; - sg_dma_len(s) = s_length; - dma_addr += s_iova_len; - - pr_info_once("sg combining disabled due to i915 driver\n"); - } - - return nents; - } - for_each_sg(sg, s, nents, i) { /* Restore this segment's original unaligned fields first */ unsigned int s_iova_off = sg_dma_address(s); -- GitLab From 420d42f6f9db27d88bc4f83e3e668fcdacbf7e29 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 31 Dec 2020 08:53:23 +0800 Subject: [PATCH 0304/2012] iommu/vt-d: Fix lockdep splat in sva bind()/unbind() Lock(&iommu->lock) without disabling irq causes lockdep warnings. ======================================================== WARNING: possible irq lock inversion dependency detected 5.11.0-rc1+ #828 Not tainted -------------------------------------------------------- kworker/0:1H/120 just changed the state of lock: ffffffffad9ea1b8 (device_domain_lock){..-.}-{2:2}, at: iommu_flush_dev_iotlb.part.0+0x32/0x120 but this lock took another, SOFTIRQ-unsafe lock in the past: (&iommu->lock){+.+.}-{2:2} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&iommu->lock); local_irq_disable(); lock(device_domain_lock); lock(&iommu->lock); lock(device_domain_lock); *** DEADLOCK *** Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20201231005323.2178523-5-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/svm.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 4fa248b98031c..9bcedd360235d 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -281,6 +281,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, struct dmar_domain *dmar_domain; struct device_domain_info *info; struct intel_svm *svm = NULL; + unsigned long iflags; int ret = 0; if (WARN_ON(!iommu) || !data) @@ -381,12 +382,12 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, * each bind of a new device even with an existing PASID, we need to * call the nested mode setup function here. */ - spin_lock(&iommu->lock); + spin_lock_irqsave(&iommu->lock, iflags); ret = intel_pasid_setup_nested(iommu, dev, (pgd_t *)(uintptr_t)data->gpgd, data->hpasid, &data->vendor.vtd, dmar_domain, data->addr_width); - spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&iommu->lock, iflags); if (ret) { dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n", data->hpasid, ret); @@ -486,6 +487,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, struct device_domain_info *info; struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; + unsigned long iflags; int pasid_max; int ret; @@ -605,14 +607,14 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, } } - spin_lock(&iommu->lock); + spin_lock_irqsave(&iommu->lock, iflags); ret = intel_pasid_setup_first_level(iommu, dev, mm ? mm->pgd : init_mm.pgd, svm->pasid, FLPT_DEFAULT_DID, (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | (cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0)); - spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&iommu->lock, iflags); if (ret) { if (mm) mmu_notifier_unregister(&svm->notifier, mm); @@ -632,14 +634,14 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, * Binding a new device with existing PASID, need to setup * the PASID entry. */ - spin_lock(&iommu->lock); + spin_lock_irqsave(&iommu->lock, iflags); ret = intel_pasid_setup_first_level(iommu, dev, mm ? mm->pgd : init_mm.pgd, svm->pasid, FLPT_DEFAULT_DID, (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) | (cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0)); - spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&iommu->lock, iflags); if (ret) { kfree(sdev); goto out; -- GitLab From aded8c7c2b72f846a07a2c736b8e75bb8cf50a87 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 5 Jan 2021 16:50:38 -0800 Subject: [PATCH 0305/2012] iommu/arm-smmu-qcom: Initialize SCTLR of the bypass context On SM8150 it's occasionally observed that the boot hangs in between the writing of SMEs and context banks in arm_smmu_device_reset(). The problem seems to coincide with a display refresh happening after updating the stream mapping, but before clearing - and there by disabling translation - the context bank picked to emulate translation bypass. Resolve this by explicitly disabling the bypass context already in cfg_probe. Fixes: f9081b8ff593 ("iommu/arm-smmu-qcom: Implement S2CR quirk") Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210106005038.4152731-1-bjorn.andersson@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 5dff7ffbef119..1b83d140742f3 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -196,6 +196,8 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) set_bit(qsmmu->bypass_cbndx, smmu->context_map); + arm_smmu_cb_write(smmu, qsmmu->bypass_cbndx, ARM_SMMU_CB_SCTLR, 0); + reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, CBAR_TYPE_S1_TRANS_S2_BYPASS); arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(qsmmu->bypass_cbndx), reg); } -- GitLab From 9ad9f45b3b91162b33abfe175ae75ab65718dbf5 Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Thu, 7 Jan 2021 00:03:55 +0800 Subject: [PATCH 0306/2012] iommu/vt-d: Move intel_iommu info from struct intel_svm to struct intel_svm_dev 'struct intel_svm' is shared by all devices bound to a give process, but records only a single pointer to a 'struct intel_iommu'. Consequently, cache invalidations may only be applied to a single DMAR unit, and are erroneously skipped for the other devices. In preparation for fixing this, rework the structures so that the iommu pointer resides in 'struct intel_svm_dev', allowing 'struct intel_svm' to track them in its device list. Fixes: 1c4f88b7f1f9 ("iommu/vt-d: Shared virtual address in scalable mode") Cc: Lu Baolu Cc: Jacob Pan Cc: Raj Ashok Cc: David Woodhouse Reported-by: Guo Kaijie Reported-by: Xin Zeng Signed-off-by: Guo Kaijie Signed-off-by: Xin Zeng Signed-off-by: Liu Yi L Tested-by: Guo Kaijie Cc: stable@vger.kernel.org # v5.0+ Acked-by: Lu Baolu Link: https://lore.kernel.org/r/1609949037-25291-2-git-send-email-yi.l.liu@intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/svm.c | 9 +++++---- include/linux/intel-iommu.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 9bcedd360235d..790ef3497e7e3 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -142,7 +142,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(svm->iommu, &desc, 1, 0); + qi_submit_sync(sdev->iommu, &desc, 1, 0); if (sdev->dev_iotlb) { desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) | @@ -166,7 +166,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } desc.qw2 = 0; desc.qw3 = 0; - qi_submit_sync(svm->iommu, &desc, 1, 0); + qi_submit_sync(sdev->iommu, &desc, 1, 0); } } @@ -211,7 +211,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) */ rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) - intel_pasid_tear_down_entry(svm->iommu, sdev->dev, + intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, svm->pasid, true); rcu_read_unlock(); @@ -364,6 +364,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev, } sdev->dev = dev; sdev->sid = PCI_DEVID(info->bus, info->devfn); + sdev->iommu = iommu; /* Only count users if device has aux domains */ if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX)) @@ -548,6 +549,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, goto out; } sdev->dev = dev; + sdev->iommu = iommu; ret = intel_iommu_enable_pasid(iommu, dev); if (ret) { @@ -577,7 +579,6 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags, kfree(sdev); goto out; } - svm->iommu = iommu; if (pasid_max > intel_pasid_max_id) pasid_max = intel_pasid_max_id; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index d956987ed032d..94522685a0d94 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -758,6 +758,7 @@ struct intel_svm_dev { struct list_head list; struct rcu_head rcu; struct device *dev; + struct intel_iommu *iommu; struct svm_dev_ops *ops; struct iommu_sva sva; u32 pasid; @@ -771,7 +772,6 @@ struct intel_svm { struct mmu_notifier notifier; struct mm_struct *mm; - struct intel_iommu *iommu; unsigned int flags; u32 pasid; int gpasid; /* In case that guest PASID is different from host PASID */ -- GitLab From 18abda7a2d555783d28ea1701f3ec95e96237a86 Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Thu, 7 Jan 2021 00:03:56 +0800 Subject: [PATCH 0307/2012] iommu/vt-d: Fix general protection fault in aux_detach_device() The aux-domain attach/detach are not tracked, some data structures might be used after free. This causes general protection faults when multiple subdevices are created and assigned to a same guest machine: | general protection fault, probably for non-canonical address 0xdead000000000100: 0000 [#1] SMP NOPTI | RIP: 0010:intel_iommu_aux_detach_device+0x12a/0x1f0 | [...] | Call Trace: | iommu_aux_detach_device+0x24/0x70 | vfio_mdev_detach_domain+0x3b/0x60 | ? vfio_mdev_set_domain+0x50/0x50 | iommu_group_for_each_dev+0x4f/0x80 | vfio_iommu_detach_group.isra.0+0x22/0x30 | vfio_iommu_type1_detach_group.cold+0x71/0x211 | ? find_exported_symbol_in_section+0x4a/0xd0 | ? each_symbol_section+0x28/0x50 | __vfio_group_unset_container+0x4d/0x150 | vfio_group_try_dissolve_container+0x25/0x30 | vfio_group_put_external_user+0x13/0x20 | kvm_vfio_group_put_external_user+0x27/0x40 [kvm] | kvm_vfio_destroy+0x45/0xb0 [kvm] | kvm_put_kvm+0x1bb/0x2e0 [kvm] | kvm_vm_release+0x22/0x30 [kvm] | __fput+0xcc/0x260 | ____fput+0xe/0x10 | task_work_run+0x8f/0xb0 | do_exit+0x358/0xaf0 | ? wake_up_state+0x10/0x20 | ? signal_wake_up_state+0x1a/0x30 | do_group_exit+0x47/0xb0 | __x64_sys_exit_group+0x18/0x20 | do_syscall_64+0x57/0x1d0 | entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix the crash by tracking the subdevices when attaching and detaching aux-domains. Fixes: 67b8e02b5e76 ("iommu/vt-d: Aux-domain specific domain attach/detach") Co-developed-by: Xin Zeng Signed-off-by: Xin Zeng Signed-off-by: Liu Yi L Acked-by: Lu Baolu Link: https://lore.kernel.org/r/1609949037-25291-3-git-send-email-yi.l.liu@intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 95 +++++++++++++++++++++++++++---------- include/linux/intel-iommu.h | 16 +++++-- 2 files changed, 82 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 788119c5b021b..d7720a8362682 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1877,6 +1877,7 @@ static struct dmar_domain *alloc_domain(int flags) domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); + INIT_LIST_HEAD(&domain->subdevices); return domain; } @@ -2547,7 +2548,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, info->iommu = iommu; info->pasid_table = NULL; info->auxd_enabled = 0; - INIT_LIST_HEAD(&info->auxiliary_domains); + INIT_LIST_HEAD(&info->subdevices); if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); @@ -4475,33 +4476,61 @@ is_aux_domain(struct device *dev, struct iommu_domain *domain) domain->type == IOMMU_DOMAIN_UNMANAGED; } -static void auxiliary_link_device(struct dmar_domain *domain, - struct device *dev) +static inline struct subdev_domain_info * +lookup_subdev_info(struct dmar_domain *domain, struct device *dev) +{ + struct subdev_domain_info *sinfo; + + if (!list_empty(&domain->subdevices)) { + list_for_each_entry(sinfo, &domain->subdevices, link_domain) { + if (sinfo->pdev == dev) + return sinfo; + } + } + + return NULL; +} + +static int auxiliary_link_device(struct dmar_domain *domain, + struct device *dev) { struct device_domain_info *info = get_domain_info(dev); + struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev); assert_spin_locked(&device_domain_lock); if (WARN_ON(!info)) - return; + return -EINVAL; + + if (!sinfo) { + sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC); + sinfo->domain = domain; + sinfo->pdev = dev; + list_add(&sinfo->link_phys, &info->subdevices); + list_add(&sinfo->link_domain, &domain->subdevices); + } - domain->auxd_refcnt++; - list_add(&domain->auxd, &info->auxiliary_domains); + return ++sinfo->users; } -static void auxiliary_unlink_device(struct dmar_domain *domain, - struct device *dev) +static int auxiliary_unlink_device(struct dmar_domain *domain, + struct device *dev) { struct device_domain_info *info = get_domain_info(dev); + struct subdev_domain_info *sinfo = lookup_subdev_info(domain, dev); + int ret; assert_spin_locked(&device_domain_lock); - if (WARN_ON(!info)) - return; + if (WARN_ON(!info || !sinfo || sinfo->users <= 0)) + return -EINVAL; - list_del(&domain->auxd); - domain->auxd_refcnt--; + ret = --sinfo->users; + if (!ret) { + list_del(&sinfo->link_phys); + list_del(&sinfo->link_domain); + kfree(sinfo); + } - if (!domain->auxd_refcnt && domain->default_pasid > 0) - ioasid_put(domain->default_pasid); + return ret; } static int aux_domain_add_dev(struct dmar_domain *domain, @@ -4530,6 +4559,19 @@ static int aux_domain_add_dev(struct dmar_domain *domain, } spin_lock_irqsave(&device_domain_lock, flags); + ret = auxiliary_link_device(domain, dev); + if (ret <= 0) + goto link_failed; + + /* + * Subdevices from the same physical device can be attached to the + * same domain. For such cases, only the first subdevice attachment + * needs to go through the full steps in this function. So if ret > + * 1, just goto out. + */ + if (ret > 1) + goto out; + /* * iommu->lock must be held to attach domain to iommu and setup the * pasid entry for second level translation. @@ -4548,10 +4590,9 @@ static int aux_domain_add_dev(struct dmar_domain *domain, domain->default_pasid); if (ret) goto table_failed; - spin_unlock(&iommu->lock); - - auxiliary_link_device(domain, dev); + spin_unlock(&iommu->lock); +out: spin_unlock_irqrestore(&device_domain_lock, flags); return 0; @@ -4560,8 +4601,10 @@ table_failed: domain_detach_iommu(domain, iommu); attach_failed: spin_unlock(&iommu->lock); + auxiliary_unlink_device(domain, dev); +link_failed: spin_unlock_irqrestore(&device_domain_lock, flags); - if (!domain->auxd_refcnt && domain->default_pasid > 0) + if (list_empty(&domain->subdevices) && domain->default_pasid > 0) ioasid_put(domain->default_pasid); return ret; @@ -4581,14 +4624,18 @@ static void aux_domain_remove_dev(struct dmar_domain *domain, info = get_domain_info(dev); iommu = info->iommu; - auxiliary_unlink_device(domain, dev); - - spin_lock(&iommu->lock); - intel_pasid_tear_down_entry(iommu, dev, domain->default_pasid, false); - domain_detach_iommu(domain, iommu); - spin_unlock(&iommu->lock); + if (!auxiliary_unlink_device(domain, dev)) { + spin_lock(&iommu->lock); + intel_pasid_tear_down_entry(iommu, dev, + domain->default_pasid, false); + domain_detach_iommu(domain, iommu); + spin_unlock(&iommu->lock); + } spin_unlock_irqrestore(&device_domain_lock, flags); + + if (list_empty(&domain->subdevices) && domain->default_pasid > 0) + ioasid_put(domain->default_pasid); } static int prepare_domain_attach_device(struct iommu_domain *domain, diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 94522685a0d94..09c6a0bf38928 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -533,11 +533,10 @@ struct dmar_domain { /* Domain ids per IOMMU. Use u16 since * domain ids are 16 bit wide according * to VT-d spec, section 9.3 */ - unsigned int auxd_refcnt; /* Refcount of auxiliary attaching */ bool has_iotlb_device; struct list_head devices; /* all devices' list */ - struct list_head auxd; /* link to device's auxiliary list */ + struct list_head subdevices; /* all subdevices' list */ struct iova_domain iovad; /* iova's that belong to this domain */ struct dma_pte *pgd; /* virtual address */ @@ -610,14 +609,21 @@ struct intel_iommu { struct dmar_drhd_unit *drhd; }; +/* Per subdevice private data */ +struct subdev_domain_info { + struct list_head link_phys; /* link to phys device siblings */ + struct list_head link_domain; /* link to domain siblings */ + struct device *pdev; /* physical device derived from */ + struct dmar_domain *domain; /* aux-domain */ + int users; /* user count */ +}; + /* PCI domain-device relationship */ struct device_domain_info { struct list_head link; /* link to domain siblings */ struct list_head global; /* link to global list */ struct list_head table; /* link to pasid table */ - struct list_head auxiliary_domains; /* auxiliary domains - * attached to this device - */ + struct list_head subdevices; /* subdevices sibling */ u32 segment; /* PCI segment number */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ -- GitLab From 7c29ada5e70083805bc3a68daa23441df421fbee Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Thu, 7 Jan 2021 00:03:57 +0800 Subject: [PATCH 0308/2012] iommu/vt-d: Fix ineffective devTLB invalidation for subdevices iommu_flush_dev_iotlb() is called to invalidate caches on a device but only loops over the devices which are fully-attached to the domain. For sub-devices, this is ineffective and can result in invalid caching entries left on the device. Fix the missing invalidation by adding a loop over the subdevices and ensuring that 'domain->has_iotlb_device' is updated when attaching to subdevices. Fixes: 67b8e02b5e76 ("iommu/vt-d: Aux-domain specific domain attach/detach") Signed-off-by: Liu Yi L Acked-by: Lu Baolu Link: https://lore.kernel.org/r/1609949037-25291-4-git-send-email-yi.l.liu@intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 53 ++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index d7720a8362682..65cf06d70bf4e 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -719,6 +719,8 @@ static int domain_update_device_node(struct dmar_domain *domain) return nid; } +static void domain_update_iotlb(struct dmar_domain *domain); + /* Some capabilities may be different across iommus */ static void domain_update_iommu_cap(struct dmar_domain *domain) { @@ -744,6 +746,8 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1); else domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); + + domain_update_iotlb(domain); } struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, @@ -1464,17 +1468,22 @@ static void domain_update_iotlb(struct dmar_domain *domain) assert_spin_locked(&device_domain_lock); - list_for_each_entry(info, &domain->devices, link) { - struct pci_dev *pdev; - - if (!info->dev || !dev_is_pci(info->dev)) - continue; - - pdev = to_pci_dev(info->dev); - if (pdev->ats_enabled) { + list_for_each_entry(info, &domain->devices, link) + if (info->ats_enabled) { has_iotlb_device = true; break; } + + if (!has_iotlb_device) { + struct subdev_domain_info *sinfo; + + list_for_each_entry(sinfo, &domain->subdevices, link_domain) { + info = get_domain_info(sinfo->pdev); + if (info && info->ats_enabled) { + has_iotlb_device = true; + break; + } + } } domain->has_iotlb_device = has_iotlb_device; @@ -1555,25 +1564,37 @@ static void iommu_disable_dev_iotlb(struct device_domain_info *info) #endif } +static void __iommu_flush_dev_iotlb(struct device_domain_info *info, + u64 addr, unsigned int mask) +{ + u16 sid, qdep; + + if (!info || !info->ats_enabled) + return; + + sid = info->bus << 8 | info->devfn; + qdep = info->ats_qdep; + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, addr, mask); +} + static void iommu_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, unsigned mask) { - u16 sid, qdep; unsigned long flags; struct device_domain_info *info; + struct subdev_domain_info *sinfo; if (!domain->has_iotlb_device) return; spin_lock_irqsave(&device_domain_lock, flags); - list_for_each_entry(info, &domain->devices, link) { - if (!info->ats_enabled) - continue; + list_for_each_entry(info, &domain->devices, link) + __iommu_flush_dev_iotlb(info, addr, mask); - sid = info->bus << 8 | info->devfn; - qdep = info->ats_qdep; - qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, - qdep, addr, mask); + list_for_each_entry(sinfo, &domain->subdevices, link_domain) { + info = get_domain_info(sinfo->pdev); + __iommu_flush_dev_iotlb(info, addr, mask); } spin_unlock_irqrestore(&device_domain_lock, flags); } -- GitLab From ff474acc4b1a9a15e29c42a41942e6932fa4f01f Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 11 Dec 2020 23:21:28 +0100 Subject: [PATCH 0309/2012] media: ccs-pll: Fix link frequency for C-PHY The highest fundamental frequency signal for C-PHY is half of the symbol rate which is similar to D-PHY. Take this into account in ccs-pll. Also remove the outdated comment. Fixes: 8030aa4f9c51 ("media: ccs-pll: Add C-PHY support") Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/i2c/ccs-pll.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/media/i2c/ccs-pll.c b/drivers/media/i2c/ccs-pll.c index eb7b6f01f6231..58ca47e904a14 100644 --- a/drivers/media/i2c/ccs-pll.c +++ b/drivers/media/i2c/ccs-pll.c @@ -772,14 +772,8 @@ int ccs_pll_calculate(struct device *dev, const struct ccs_pll_limits *lim, switch (pll->bus_type) { case CCS_PLL_BUS_TYPE_CSI2_DPHY: - /* CSI transfers 2 bits per clock per lane; thus times 2 */ - op_sys_clk_freq_hz_sdr = pll->link_freq * 2 - * (pll->flags & CCS_PLL_FLAG_LANE_SPEED_MODEL ? - 1 : pll->csi2.lanes); - break; case CCS_PLL_BUS_TYPE_CSI2_CPHY: - op_sys_clk_freq_hz_sdr = - pll->link_freq + op_sys_clk_freq_hz_sdr = pll->link_freq * 2 * (pll->flags & CCS_PLL_FLAG_LANE_SPEED_MODEL ? 1 : pll->csi2.lanes); break; -- GitLab From 1bc0b1baf26efa23c0fd6fdcc24297e7d94f37ac Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 15 Dec 2020 21:31:12 +0100 Subject: [PATCH 0310/2012] media: ccs: Get static data version minor correctly Fix obtaining CCS static data version minor part correctly. Instead, the upper 8 bits were obtained from the major version number. Fixes: a6b396f410b1 ("media: ccs: Add CCS static data parser library") Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/i2c/ccs/ccs-data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/ccs/ccs-data.c b/drivers/media/i2c/ccs/ccs-data.c index 9a6097b088bdf..6555bd4b325a6 100644 --- a/drivers/media/i2c/ccs/ccs-data.c +++ b/drivers/media/i2c/ccs/ccs-data.c @@ -152,7 +152,7 @@ static int ccs_data_parse_version(struct bin_container *bin, vv->version_major = ((u16)v->static_data_version_major[0] << 8) + v->static_data_version_major[1]; vv->version_minor = ((u16)v->static_data_version_minor[0] << 8) + - v->static_data_version_major[1]; + v->static_data_version_minor[1]; vv->date_year = ((u16)v->year[0] << 8) + v->year[1]; vv->date_month = v->month; vv->date_day = v->day; -- GitLab From e99a8f0f6344fee25cd30907c30ac0ca2f02804d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 4 Dec 2020 18:38:33 +0100 Subject: [PATCH 0311/2012] media: rcar-vin: fix return, use ret instead of zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the return error code is in ret is being assigned but not used. It and should be returned by the return statement and currently just 0 is being returned. Fix this. Addresses-Coverity: ("Unused value") Fixes: b9ad52aafe38 ("media: rcar-vin: Rework parallel firmware parsing") Signed-off-by: Colin Ian King Reviewed-by: Niklas Söderlund Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/rcar-vin/rcar-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/rcar-vin/rcar-core.c b/drivers/media/platform/rcar-vin/rcar-core.c index 98bff765b02e6..e48d666f2c63a 100644 --- a/drivers/media/platform/rcar-vin/rcar-core.c +++ b/drivers/media/platform/rcar-vin/rcar-core.c @@ -654,7 +654,7 @@ static int rvin_parallel_parse_of(struct rvin_dev *vin) out: fwnode_handle_put(fwnode); - return 0; + return ret; } static int rvin_parallel_init(struct rvin_dev *vin) -- GitLab From 2984a99ff1c071c85dc09451c8adc859c22fbb96 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 17 Dec 2020 18:57:05 +0100 Subject: [PATCH 0312/2012] media: v4l: common: Fix naming of v4l2_get_link_rate Rename v4l2_get_link_rate() as v4l2_get_link_freq(). What the function returns is the frequency of the link; rename it to reflect the name of the control where the information is obtained. Fixes: 1b888b3cebef ("media: v4l: Add a helper for obtaining the link frequency") Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/pci/intel/ipu3/ipu3-cio2.c | 2 +- drivers/media/v4l2-core/v4l2-common.c | 4 ++-- include/media/v4l2-common.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/media/pci/intel/ipu3/ipu3-cio2.c b/drivers/media/pci/intel/ipu3/ipu3-cio2.c index 36e354ecf71ec..6cada8a6e50cc 100644 --- a/drivers/media/pci/intel/ipu3/ipu3-cio2.c +++ b/drivers/media/pci/intel/ipu3/ipu3-cio2.c @@ -302,7 +302,7 @@ static int cio2_csi2_calc_timing(struct cio2_device *cio2, struct cio2_queue *q, if (!q->sensor) return -ENODEV; - freq = v4l2_get_link_rate(q->sensor->ctrl_handler, bpp, lanes); + freq = v4l2_get_link_freq(q->sensor->ctrl_handler, bpp, lanes); if (freq < 0) { dev_err(dev, "error %lld, invalid link_freq\n", freq); return freq; diff --git a/drivers/media/v4l2-core/v4l2-common.c b/drivers/media/v4l2-core/v4l2-common.c index 78007dba4677c..133d20e40f82a 100644 --- a/drivers/media/v4l2-core/v4l2-common.c +++ b/drivers/media/v4l2-core/v4l2-common.c @@ -442,7 +442,7 @@ int v4l2_fill_pixfmt(struct v4l2_pix_format *pixfmt, u32 pixelformat, } EXPORT_SYMBOL_GPL(v4l2_fill_pixfmt); -s64 v4l2_get_link_rate(struct v4l2_ctrl_handler *handler, unsigned int mul, +s64 v4l2_get_link_freq(struct v4l2_ctrl_handler *handler, unsigned int mul, unsigned int div) { struct v4l2_ctrl *ctrl; @@ -473,4 +473,4 @@ s64 v4l2_get_link_rate(struct v4l2_ctrl_handler *handler, unsigned int mul, return freq > 0 ? freq : -EINVAL; } -EXPORT_SYMBOL_GPL(v4l2_get_link_rate); +EXPORT_SYMBOL_GPL(v4l2_get_link_freq); diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h index be36cbdcc1bd2..3eb202259e8cc 100644 --- a/include/media/v4l2-common.h +++ b/include/media/v4l2-common.h @@ -520,7 +520,7 @@ int v4l2_fill_pixfmt_mp(struct v4l2_pix_format_mplane *pixfmt, u32 pixelformat, u32 width, u32 height); /** - * v4l2_get_link_rate - Get link rate from transmitter + * v4l2_get_link_freq - Get link rate from transmitter * * @handler: The transmitter's control handler * @mul: The multiplier between pixel rate and link frequency. Bits per pixel on @@ -537,7 +537,7 @@ int v4l2_fill_pixfmt_mp(struct v4l2_pix_format_mplane *pixfmt, u32 pixelformat, * -ENOENT: Link frequency or pixel rate control not found * -EINVAL: Invalid link frequency value */ -s64 v4l2_get_link_rate(struct v4l2_ctrl_handler *handler, unsigned int mul, +s64 v4l2_get_link_freq(struct v4l2_ctrl_handler *handler, unsigned int mul, unsigned int div); static inline u64 v4l2_buffer_get_timestamp(const struct v4l2_buffer *buf) -- GitLab From 80c18e4ac20c9cde420cb3ffab48c936147cf07d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 7 Jan 2021 03:15:41 +0000 Subject: [PATCH 0313/2012] io_uring: trigger eventfd for IOPOLL Make sure io_iopoll_complete() tries to wake up eventfd, which currently is skipped together with io_cqring_ev_posted() for non-SQPOLL IOPOLL. Add an iopoll version of io_cqring_ev_posted(), duplicates a bit of code, but they actually use different sets of wait queues may be for better. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 27a8c226abf8c..91e517ad1421c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1713,6 +1713,16 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) eventfd_signal(ctx->cq_ev_fd, 1); } +static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) +{ + if (ctx->flags & IORING_SETUP_SQPOLL) { + if (waitqueue_active(&ctx->wait)) + wake_up(&ctx->wait); + } + if (io_should_trigger_evfd(ctx)) + eventfd_signal(ctx->cq_ev_fd, 1); +} + /* Returns true if there are no backlogged entries after the flush */ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, struct task_struct *tsk, @@ -2428,8 +2438,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, } io_commit_cqring(ctx); - if (ctx->flags & IORING_SETUP_SQPOLL) - io_cqring_ev_posted(ctx); + io_cqring_ev_posted_iopoll(ctx); io_req_free_batch_finish(ctx, &rb); if (!list_empty(&again)) -- GitLab From 4aa84f2ffa81f71e15e5cffc2cc6090dbee78f8e Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 7 Jan 2021 03:15:42 +0000 Subject: [PATCH 0314/2012] io_uring: dont kill fasync under completion_lock CPU0 CPU1 ---- ---- lock(&new->fa_lock); local_irq_disable(); lock(&ctx->completion_lock); lock(&new->fa_lock); lock(&ctx->completion_lock); *** DEADLOCK *** Move kill_fasync() out of io_commit_cqring() to io_cqring_ev_posted(), so it doesn't hold completion_lock while doing it. That saves from the reported deadlock, and it's just nice to shorten the locking time and untangle nested locks (compl_lock -> wq_head::lock). Reported-by: syzbot+91ca3f25bd7f795f019c@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 91e517ad1421c..401316fe2ae2a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1345,11 +1345,6 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx) /* order cqe stores with ring update */ smp_store_release(&rings->cq.tail, ctx->cached_cq_tail); - - if (wq_has_sleeper(&ctx->cq_wait)) { - wake_up_interruptible(&ctx->cq_wait); - kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); - } } static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req) @@ -1711,6 +1706,10 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) wake_up(&ctx->sq_data->wait); if (io_should_trigger_evfd(ctx)) eventfd_signal(ctx->cq_ev_fd, 1); + if (wq_has_sleeper(&ctx->cq_wait)) { + wake_up_interruptible(&ctx->cq_wait); + kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); + } } static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) @@ -1721,6 +1720,10 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) } if (io_should_trigger_evfd(ctx)) eventfd_signal(ctx->cq_ev_fd, 1); + if (wq_has_sleeper(&ctx->cq_wait)) { + wake_up_interruptible(&ctx->cq_wait); + kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); + } } /* Returns true if there are no backlogged entries after the flush */ -- GitLab From b1445e59cc9a10fdb8f83810ae1f4feb941ab36b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 7 Jan 2021 03:15:43 +0000 Subject: [PATCH 0315/2012] io_uring: synchronise ev_posted() with waitqueues waitqueue_active() needs smp_mb() to be in sync with waitqueues modification, but we miss it in io_cqring_ev_posted*() apart from cq_wait() case. Take an smb_mb() out of wq_has_sleeper() making it waitqueue_active(), and place it a few lines before, so it can synchronise other waitqueue_active() as well. The patch doesn't add any additional overhead, so even if there are no problems currently, it's just safer to have it this way. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 401316fe2ae2a..cb57e0360fcbb 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1700,13 +1700,16 @@ static inline unsigned __io_cqring_events(struct io_ring_ctx *ctx) static void io_cqring_ev_posted(struct io_ring_ctx *ctx) { + /* see waitqueue_active() comment */ + smp_mb(); + if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait)) wake_up(&ctx->sq_data->wait); if (io_should_trigger_evfd(ctx)) eventfd_signal(ctx->cq_ev_fd, 1); - if (wq_has_sleeper(&ctx->cq_wait)) { + if (waitqueue_active(&ctx->cq_wait)) { wake_up_interruptible(&ctx->cq_wait); kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); } @@ -1714,13 +1717,16 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) { + /* see waitqueue_active() comment */ + smp_mb(); + if (ctx->flags & IORING_SETUP_SQPOLL) { if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); } if (io_should_trigger_evfd(ctx)) eventfd_signal(ctx->cq_ev_fd, 1); - if (wq_has_sleeper(&ctx->cq_wait)) { + if (waitqueue_active(&ctx->cq_wait)) { wake_up_interruptible(&ctx->cq_wait); kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN); } -- GitLab From 71008734d27f2276fcef23a5e546d358430f2d52 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 16 Dec 2020 11:18:44 -0500 Subject: [PATCH 0316/2012] btrfs: print the actual offset in btrfs_root_name We're supposed to print the root_key.offset in btrfs_root_name in the case of a reloc root, not the objectid. Fix this helper to take the key so we have access to the offset when we need it. Fixes: 457f1864b569 ("btrfs: pretty print leaked root name") Reviewed-by: Qu Wenruo Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/print-tree.c | 10 +++++----- fs/btrfs/print-tree.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1dfd4b2d0e1e8..6b35b7e881369 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1457,7 +1457,7 @@ void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info) root = list_first_entry(&fs_info->allocated_roots, struct btrfs_root, leak_list); btrfs_err(fs_info, "leaked root %s refcount %d", - btrfs_root_name(root->root_key.objectid, buf), + btrfs_root_name(&root->root_key, buf), refcount_read(&root->refs)); while (refcount_read(&root->refs) > 1) btrfs_put_root(root); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index fe5e0026129d5..aae1027bd76a1 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -26,22 +26,22 @@ static const struct root_name_map root_map[] = { { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" }, }; -const char *btrfs_root_name(u64 objectid, char *buf) +const char *btrfs_root_name(const struct btrfs_key *key, char *buf) { int i; - if (objectid == BTRFS_TREE_RELOC_OBJECTID) { + if (key->objectid == BTRFS_TREE_RELOC_OBJECTID) { snprintf(buf, BTRFS_ROOT_NAME_BUF_LEN, - "TREE_RELOC offset=%llu", objectid); + "TREE_RELOC offset=%llu", key->offset); return buf; } for (i = 0; i < ARRAY_SIZE(root_map); i++) { - if (root_map[i].id == objectid) + if (root_map[i].id == key->objectid) return root_map[i].name; } - snprintf(buf, BTRFS_ROOT_NAME_BUF_LEN, "%llu", objectid); + snprintf(buf, BTRFS_ROOT_NAME_BUF_LEN, "%llu", key->objectid); return buf; } diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h index 78b99385a503f..8c3e9319ec4ef 100644 --- a/fs/btrfs/print-tree.h +++ b/fs/btrfs/print-tree.h @@ -11,6 +11,6 @@ void btrfs_print_leaf(struct extent_buffer *l); void btrfs_print_tree(struct extent_buffer *c, bool follow); -const char *btrfs_root_name(u64 objectid, char *buf); +const char *btrfs_root_name(const struct btrfs_key *key, char *buf); #endif -- GitLab From 29b665cc51e8b602bf2a275734349494776e3dbc Mon Sep 17 00:00:00 2001 From: Su Yue Date: Sun, 3 Jan 2021 17:28:03 +0800 Subject: [PATCH 0317/2012] btrfs: prevent NULL pointer dereference in extent_io_tree_panic Some extent io trees are initialized with NULL private member (e.g. btrfs_device::alloc_state and btrfs_fs_info::excluded_extents). Dereference of a NULL tree->private as inode pointer will cause panic. Pass tree->fs_info as it's known to be valid in all cases. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=208929 Fixes: 05912a3c04eb ("btrfs: drop extent_io_ops::tree_fs_info callback") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Anand Jain Signed-off-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 6e3b72e63e422..c9cee458e001b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -676,9 +676,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc) static void extent_io_tree_panic(struct extent_io_tree *tree, int err) { - struct inode *inode = tree->private_data; - - btrfs_panic(btrfs_sb(inode->i_sb), err, + btrfs_panic(tree->fs_info, err, "locking error: extent tree was modified by another thread while locked"); } -- GitLab From 347fb0cfc9bab5195c6701e62eda488310d7938f Mon Sep 17 00:00:00 2001 From: Su Yue Date: Sun, 3 Jan 2021 17:28:04 +0800 Subject: [PATCH 0318/2012] btrfs: tree-checker: check if chunk item end overflows While mounting a crafted image provided by user, kernel panics due to the invalid chunk item whose end is less than start. [66.387422] loop: module loaded [66.389773] loop0: detected capacity change from 262144 to 0 [66.427708] BTRFS: device fsid a62e00e8-e94e-4200-8217-12444de93c2e devid 1 transid 12 /dev/loop0 scanned by mount (613) [66.431061] BTRFS info (device loop0): disk space caching is enabled [66.431078] BTRFS info (device loop0): has skinny extents [66.437101] BTRFS error: insert state: end < start 29360127 37748736 [66.437136] ------------[ cut here ]------------ [66.437140] WARNING: CPU: 16 PID: 613 at fs/btrfs/extent_io.c:557 insert_state.cold+0x1a/0x46 [btrfs] [66.437369] CPU: 16 PID: 613 Comm: mount Tainted: G O 5.11.0-rc1-custom #45 [66.437374] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.14.0-1 04/01/2014 [66.437378] RIP: 0010:insert_state.cold+0x1a/0x46 [btrfs] [66.437420] RSP: 0018:ffff93e5414c3908 EFLAGS: 00010286 [66.437427] RAX: 0000000000000000 RBX: 0000000001bfffff RCX: 0000000000000000 [66.437431] RDX: 0000000000000000 RSI: ffffffffb90d4660 RDI: 00000000ffffffff [66.437434] RBP: ffff93e5414c3938 R08: 0000000000000001 R09: 0000000000000001 [66.437438] R10: ffff93e5414c3658 R11: 0000000000000000 R12: ffff8ec782d72aa0 [66.437441] R13: ffff8ec78bc71628 R14: 0000000000000000 R15: 0000000002400000 [66.437447] FS: 00007f01386a8580(0000) GS:ffff8ec809000000(0000) knlGS:0000000000000000 [66.437451] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [66.437455] CR2: 00007f01382fa000 CR3: 0000000109a34000 CR4: 0000000000750ee0 [66.437460] PKRU: 55555554 [66.437464] Call Trace: [66.437475] set_extent_bit+0x652/0x740 [btrfs] [66.437539] set_extent_bits_nowait+0x1d/0x20 [btrfs] [66.437576] add_extent_mapping+0x1e0/0x2f0 [btrfs] [66.437621] read_one_chunk+0x33c/0x420 [btrfs] [66.437674] btrfs_read_chunk_tree+0x6a4/0x870 [btrfs] [66.437708] ? kvm_sched_clock_read+0x18/0x40 [66.437739] open_ctree+0xb32/0x1734 [btrfs] [66.437781] ? bdi_register_va+0x1b/0x20 [66.437788] ? super_setup_bdi_name+0x79/0xd0 [66.437810] btrfs_mount_root.cold+0x12/0xeb [btrfs] [66.437854] ? __kmalloc_track_caller+0x217/0x3b0 [66.437873] legacy_get_tree+0x34/0x60 [66.437880] vfs_get_tree+0x2d/0xc0 [66.437888] vfs_kern_mount.part.0+0x78/0xc0 [66.437897] vfs_kern_mount+0x13/0x20 [66.437902] btrfs_mount+0x11f/0x3c0 [btrfs] [66.437940] ? kfree+0x5ff/0x670 [66.437944] ? __kmalloc_track_caller+0x217/0x3b0 [66.437962] legacy_get_tree+0x34/0x60 [66.437974] vfs_get_tree+0x2d/0xc0 [66.437983] path_mount+0x48c/0xd30 [66.437998] __x64_sys_mount+0x108/0x140 [66.438011] do_syscall_64+0x38/0x50 [66.438018] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [66.438023] RIP: 0033:0x7f0138827f6e [66.438033] RSP: 002b:00007ffecd79edf8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [66.438040] RAX: ffffffffffffffda RBX: 00007f013894c264 RCX: 00007f0138827f6e [66.438044] RDX: 00005593a4a41360 RSI: 00005593a4a33690 RDI: 00005593a4a3a6c0 [66.438047] RBP: 00005593a4a33440 R08: 0000000000000000 R09: 0000000000000001 [66.438050] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [66.438054] R13: 00005593a4a3a6c0 R14: 00005593a4a41360 R15: 00005593a4a33440 [66.438078] irq event stamp: 18169 [66.438082] hardirqs last enabled at (18175): [] console_unlock+0x4ff/0x5f0 [66.438088] hardirqs last disabled at (18180): [] console_unlock+0x467/0x5f0 [66.438092] softirqs last enabled at (16910): [] asm_call_irq_on_stack+0x12/0x20 [66.438097] softirqs last disabled at (16905): [] asm_call_irq_on_stack+0x12/0x20 [66.438103] ---[ end trace e114b111db64298b ]--- [66.438107] BTRFS error: found node 12582912 29360127 on insert of 37748736 29360127 [66.438127] BTRFS critical: panic in extent_io_tree_panic:679: locking error: extent tree was modified by another thread while locked (errno=-17 Object already exists) [66.441069] ------------[ cut here ]------------ [66.441072] kernel BUG at fs/btrfs/extent_io.c:679! [66.442064] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [66.443018] CPU: 16 PID: 613 Comm: mount Tainted: G W O 5.11.0-rc1-custom #45 [66.444538] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.14.0-1 04/01/2014 [66.446223] RIP: 0010:extent_io_tree_panic.isra.0+0x23/0x25 [btrfs] [66.450878] RSP: 0018:ffff93e5414c3948 EFLAGS: 00010246 [66.451840] RAX: 0000000000000000 RBX: 0000000001bfffff RCX: 0000000000000000 [66.453141] RDX: 0000000000000000 RSI: ffffffffb90d4660 RDI: 00000000ffffffff [66.454445] RBP: ffff93e5414c3948 R08: 0000000000000001 R09: 0000000000000001 [66.455743] R10: ffff93e5414c3658 R11: 0000000000000000 R12: ffff8ec782d728c0 [66.457055] R13: ffff8ec78bc71628 R14: ffff8ec782d72aa0 R15: 0000000002400000 [66.458356] FS: 00007f01386a8580(0000) GS:ffff8ec809000000(0000) knlGS:0000000000000000 [66.459841] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [66.460895] CR2: 00007f01382fa000 CR3: 0000000109a34000 CR4: 0000000000750ee0 [66.462196] PKRU: 55555554 [66.462692] Call Trace: [66.463139] set_extent_bit.cold+0x30/0x98 [btrfs] [66.464049] set_extent_bits_nowait+0x1d/0x20 [btrfs] [66.490466] add_extent_mapping+0x1e0/0x2f0 [btrfs] [66.514097] read_one_chunk+0x33c/0x420 [btrfs] [66.534976] btrfs_read_chunk_tree+0x6a4/0x870 [btrfs] [66.555718] ? kvm_sched_clock_read+0x18/0x40 [66.575758] open_ctree+0xb32/0x1734 [btrfs] [66.595272] ? bdi_register_va+0x1b/0x20 [66.614638] ? super_setup_bdi_name+0x79/0xd0 [66.633809] btrfs_mount_root.cold+0x12/0xeb [btrfs] [66.652938] ? __kmalloc_track_caller+0x217/0x3b0 [66.671925] legacy_get_tree+0x34/0x60 [66.690300] vfs_get_tree+0x2d/0xc0 [66.708221] vfs_kern_mount.part.0+0x78/0xc0 [66.725808] vfs_kern_mount+0x13/0x20 [66.742730] btrfs_mount+0x11f/0x3c0 [btrfs] [66.759350] ? kfree+0x5ff/0x670 [66.775441] ? __kmalloc_track_caller+0x217/0x3b0 [66.791750] legacy_get_tree+0x34/0x60 [66.807494] vfs_get_tree+0x2d/0xc0 [66.823349] path_mount+0x48c/0xd30 [66.838753] __x64_sys_mount+0x108/0x140 [66.854412] do_syscall_64+0x38/0x50 [66.869673] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [66.885093] RIP: 0033:0x7f0138827f6e [66.945613] RSP: 002b:00007ffecd79edf8 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [66.977214] RAX: ffffffffffffffda RBX: 00007f013894c264 RCX: 00007f0138827f6e [66.994266] RDX: 00005593a4a41360 RSI: 00005593a4a33690 RDI: 00005593a4a3a6c0 [67.011544] RBP: 00005593a4a33440 R08: 0000000000000000 R09: 0000000000000001 [67.028836] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [67.045812] R13: 00005593a4a3a6c0 R14: 00005593a4a41360 R15: 00005593a4a33440 [67.216138] ---[ end trace e114b111db64298c ]--- [67.237089] RIP: 0010:extent_io_tree_panic.isra.0+0x23/0x25 [btrfs] [67.325317] RSP: 0018:ffff93e5414c3948 EFLAGS: 00010246 [67.347946] RAX: 0000000000000000 RBX: 0000000001bfffff RCX: 0000000000000000 [67.371343] RDX: 0000000000000000 RSI: ffffffffb90d4660 RDI: 00000000ffffffff [67.394757] RBP: ffff93e5414c3948 R08: 0000000000000001 R09: 0000000000000001 [67.418409] R10: ffff93e5414c3658 R11: 0000000000000000 R12: ffff8ec782d728c0 [67.441906] R13: ffff8ec78bc71628 R14: ffff8ec782d72aa0 R15: 0000000002400000 [67.465436] FS: 00007f01386a8580(0000) GS:ffff8ec809000000(0000) knlGS:0000000000000000 [67.511660] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [67.535047] CR2: 00007f01382fa000 CR3: 0000000109a34000 CR4: 0000000000750ee0 [67.558449] PKRU: 55555554 [67.581146] note: mount[613] exited with preempt_count 2 The image has a chunk item which has a logical start 37748736 and length 18446744073701163008 (-8M). The calculated end 29360127 overflows. EEXIST was caught by insert_state() because of the duplicate end and extent_io_tree_panic() was called. Add overflow check of chunk item end to tree checker so it can be detected early at mount time. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=208929 CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Anand Jain Signed-off-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-checker.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 028e733e42f3b..582061c7b5471 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -760,6 +760,7 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, { struct btrfs_fs_info *fs_info = leaf->fs_info; u64 length; + u64 chunk_end; u64 stripe_len; u16 num_stripes; u16 sub_stripes; @@ -814,6 +815,12 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf, "invalid chunk length, have %llu", length); return -EUCLEAN; } + if (unlikely(check_add_overflow(logical, length, &chunk_end))) { + chunk_err(leaf, chunk, logical, +"invalid chunk logical start and length, have logical start %llu length %llu", + logical, length); + return -EUCLEAN; + } if (unlikely(!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN)) { chunk_err(leaf, chunk, logical, "invalid chunk stripe length: %llu", -- GitLab From 50e31ef486afe60f128d42fb9620e2a63172c15c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 29 Dec 2020 21:29:34 +0800 Subject: [PATCH 0319/2012] btrfs: reloc: fix wrong file extent type check to avoid false ENOENT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [BUG] There are several bug reports about recent kernel unable to relocate certain data block groups. Sometimes the error just goes away, but there is one reporter who can reproduce it reliably. The dmesg would look like: [438.260483] BTRFS info (device dm-10): balance: start -dvrange=34625344765952..34625344765953 [438.269018] BTRFS info (device dm-10): relocating block group 34625344765952 flags data|raid1 [450.439609] BTRFS info (device dm-10): found 167 extents, stage: move data extents [463.501781] BTRFS info (device dm-10): balance: ended with status: -2 [CAUSE] The ENOENT error is returned from the following call chain: add_data_references() |- delete_v1_space_cache(); |- if (!found) return -ENOENT; The variable @found is set to true if we find a data extent whose disk bytenr matches parameter @data_bytes. With extra debugging, the offending tree block looks like this: leaf bytenr = 42676709441536, data_bytenr = 34626327621632 ctime 1567904822.739884119 (2019-09-08 03:07:02) mtime 0.0 (1970-01-01 01:00:00) otime 0.0 (1970-01-01 01:00:00) item 27 key (51933 EXTENT_DATA 0) itemoff 9854 itemsize 53 generation 1517381 type 2 (prealloc) prealloc data disk byte 34626327621632 nr 262144 <<< prealloc data offset 0 nr 262144 item 28 key (52262 ROOT_ITEM 0) itemoff 9415 itemsize 439 generation 2618893 root_dirid 256 bytenr 42677048360960 level 3 refs 1 lastsnap 2618893 byte_limit 0 bytes_used 5557338112 flags 0x0(none) uuid d0d4361f-d231-6d40-8901-fe506e4b2b53 Although item 27 has disk bytenr 34626327621632, which matches the data_bytenr, its type is prealloc, not reg. This makes the existing code skip that item, and return ENOENT. [FIX] The code is modified in commit 19b546d7a1b2 ("btrfs: relocation: Use btrfs_find_all_leafs to locate data extent parent tree leaves"), before that commit, we use something like "if (type == BTRFS_FILE_EXTENT_INLINE) continue;" But in that offending commit, we use (type == BTRFS_FILE_EXTENT_REG), ignoring BTRFS_FILE_EXTENT_PREALLOC. Fix it by also checking BTRFS_FILE_EXTENT_PREALLOC. Reported-by: Stéphane Lesimple Link: https://lore.kernel.org/linux-btrfs/505cabfa88575ed6dbe7cb922d8914fb@lesimple.fr Fixes: 19b546d7a1b2 ("btrfs: relocation: Use btrfs_find_all_leafs to locate data extent parent tree leaves") CC: stable@vger.kernel.org # 5.6+ Tested-By: Stéphane Lesimple Reviewed-by: Su Yue Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 19b7db8b21171..df63ef64c5c0d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2975,11 +2975,16 @@ static int delete_v1_space_cache(struct extent_buffer *leaf, return 0; for (i = 0; i < btrfs_header_nritems(leaf); i++) { + u8 type; + btrfs_item_key_to_cpu(leaf, &key, i); if (key.type != BTRFS_EXTENT_DATA_KEY) continue; ei = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_REG && + type = btrfs_file_extent_type(leaf, ei); + + if ((type == BTRFS_FILE_EXTENT_REG || + type == BTRFS_FILE_EXTENT_PREALLOC) && btrfs_file_extent_disk_bytenr(leaf, ei) == data_bytenr) { found = true; space_cache_ino = key.objectid; -- GitLab From 04a6a536bc3fd1436fc78c546c6b3ecdccbfaf6d Mon Sep 17 00:00:00 2001 From: Satya Tangirala Date: Thu, 24 Dec 2020 04:49:54 +0000 Subject: [PATCH 0320/2012] fs: Fix freeze_bdev()/thaw_bdev() accounting of bd_fsfreeze_sb freeze/thaw_bdev() currently use bdev->bd_fsfreeze_count to infer whether or not bdev->bd_fsfreeze_sb is valid (it's valid iff bd_fsfreeze_count is non-zero). thaw_bdev() doesn't nullify bd_fsfreeze_sb. But this means a freeze_bdev() call followed by a thaw_bdev() call can leave bd_fsfreeze_sb with a non-null value, while bd_fsfreeze_count is zero. If freeze_bdev() is called again, and this time get_active_super() returns NULL (e.g. because the FS is unmounted), we'll end up with bd_fsfreeze_count > 0, but bd_fsfreeze_sb is *untouched* - it stays the same (now garbage) value. A subsequent thaw_bdev() will decide that the bd_fsfreeze_sb value is legitimate (since bd_fsfreeze_count > 0), and attempt to use it. Fix this by always setting bd_fsfreeze_sb to NULL when bd_fsfreeze_count is successfully decremented to 0 in thaw_sb(). Alternatively, we could set bd_fsfreeze_sb to whatever get_active_super() returns in freeze_bdev() whenever bd_fsfreeze_count is successfully incremented to 1 from 0 (which can be achieved cleanly by moving the line currently setting bd_fsfreeze_sb to immediately after the "sync:" label, but it might be a little too subtle/easily overlooked in future). This fixes the currently panicking xfstests generic/085. Fixes: 040f04bd2e82 ("fs: simplify freeze_bdev/thaw_bdev") Signed-off-by: Satya Tangirala Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Jens Axboe --- fs/block_dev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/block_dev.c b/fs/block_dev.c index 3e5b02f6606c4..e454c5a810436 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -605,6 +605,8 @@ int thaw_bdev(struct block_device *bdev) error = thaw_super(sb); if (error) bdev->bd_fsfreeze_count++; + else + bdev->bd_fsfreeze_sb = NULL; out: mutex_unlock(&bdev->bd_fsfreeze_mutex); return error; -- GitLab From 17ffd35809c34b9564edb10727d02eb62958ba5c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 5 Jan 2021 19:20:29 +0100 Subject: [PATCH 0321/2012] cpufreq: intel_pstate: Use HWP capabilities in intel_cpufreq_adjust_perf() If turbo P-states cannot be used, either due to the configuration of the processor, or because intel_pstate is not allowed to used them, the maximum available P-state with HWP enabled corresponds to the HWP_CAP.GUARANTEED value which is not static. It can be adjusted by an out-of-band agent or during an Intel Speed Select performance level change, so long as it remains less than or equal to HWP_CAP.MAX. However, if turbo P-states cannot be used, intel_cpufreq_adjust_perf() always uses pstate.max_pstate (set during the initialization of the driver only) as the maximum available P-state, so it may miss a change of the HWP_CAP.GUARANTEED value. Prevent that from happening by modifyig intel_cpufreq_adjust_perf() to always read the "guaranteed" and "maximum turbo" performance levels from the cached HWP_CAP value. Fixes: a365ab6b9dfb ("cpufreq: intel_pstate: Implement the ->adjust_perf() callback") Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada --- drivers/cpufreq/intel_pstate.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 1a660466dd756..32bc11851b8d3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2653,12 +2653,13 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, unsigned long capacity) { struct cpudata *cpu = all_cpu_data[cpunum]; + u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached); int old_pstate = cpu->pstate.current_pstate; int cap_pstate, min_pstate, max_pstate, target_pstate; update_turbo_state(); - cap_pstate = global.turbo_disabled ? cpu->pstate.max_pstate : - cpu->pstate.turbo_pstate; + cap_pstate = global.turbo_disabled ? HWP_GUARANTEED_PERF(hwp_cap) : + HWP_HIGHEST_PERF(hwp_cap); /* Optimization: Avoid unnecessary divisions. */ -- GitLab From 943bdd0cecad06da8392a33093230e30e501eccc Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 5 Jan 2021 10:19:57 +0000 Subject: [PATCH 0322/2012] cpufreq: powernow-k8: pass policy rather than use cpufreq_cpu_get() Currently there is an unlikely case where cpufreq_cpu_get() returns a NULL policy and this will cause a NULL pointer dereference later on. Fix this by passing the policy to transition_frequency_fidvid() from the caller and hence eliminating the need for the cpufreq_cpu_get() and cpufreq_cpu_put(). Thanks to Viresh Kumar for suggesting the fix. Addresses-Coverity: ("Dereference null return") Fixes: b43a7ffbf33b ("cpufreq: Notify all policy->cpus in cpufreq_notify_transition()") Suggested-by: Viresh Kumar Signed-off-by: Colin Ian King Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/powernow-k8.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 0acc9e241cd7d..b9ccb6a3dad98 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -878,9 +878,9 @@ static int get_transition_latency(struct powernow_k8_data *data) /* Take a frequency, and issue the fid/vid transition command */ static int transition_frequency_fidvid(struct powernow_k8_data *data, - unsigned int index) + unsigned int index, + struct cpufreq_policy *policy) { - struct cpufreq_policy *policy; u32 fid = 0; u32 vid = 0; int res; @@ -912,9 +912,6 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, freqs.old = find_khz_freq_from_fid(data->currfid); freqs.new = find_khz_freq_from_fid(fid); - policy = cpufreq_cpu_get(smp_processor_id()); - cpufreq_cpu_put(policy); - cpufreq_freq_transition_begin(policy, &freqs); res = transition_fid_vid(data, fid, vid); cpufreq_freq_transition_end(policy, &freqs, res); @@ -969,7 +966,7 @@ static long powernowk8_target_fn(void *arg) powernow_k8_acpi_pst_values(data, newstate); - ret = transition_frequency_fidvid(data, newstate); + ret = transition_frequency_fidvid(data, newstate, pol); if (ret) { pr_err("transition frequency failed\n"); -- GitLab From aa7a1bb02bb44399be69b0a1cbb6495d9eec29fc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 5 Jan 2021 19:19:18 +0100 Subject: [PATCH 0323/2012] ACPI: PM: s2idle: Drop unused local variables and related code Two local variables in drivers/acpi/x86/s2idle.c are never read, so drop them along with the code updating their values (in vain). Fixes: fef98671194b ("ACPI: PM: s2idle: Move x86-specific code to the x86 directory") Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/s2idle.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c index 25fea34b544c6..2b69536cdccba 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c @@ -105,18 +105,8 @@ static void lpi_device_get_constraints_amd(void) for (i = 0; i < out_obj->package.count; i++) { union acpi_object *package = &out_obj->package.elements[i]; - struct lpi_device_info_amd info = { }; - if (package->type == ACPI_TYPE_INTEGER) { - switch (i) { - case 0: - info.revision = package->integer.value; - break; - case 1: - info.count = package->integer.value; - break; - } - } else if (package->type == ACPI_TYPE_PACKAGE) { + if (package->type == ACPI_TYPE_PACKAGE) { lpi_constraints_table = kcalloc(package->package.count, sizeof(*lpi_constraints_table), GFP_KERNEL); @@ -135,12 +125,10 @@ static void lpi_device_get_constraints_amd(void) for (k = 0; k < info_obj->package.count; ++k) { union acpi_object *obj = &info_obj->package.elements[k]; - union acpi_object *obj_new; list = &lpi_constraints_table[lpi_constraints_table_size]; list->min_dstate = -1; - obj_new = &obj[k]; switch (k) { case 0: dev_info.enabled = obj->integer.value; -- GitLab From ee61cfd955a64a58ed35cbcfc54068fcbd486945 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 31 Dec 2020 19:35:25 +0800 Subject: [PATCH 0324/2012] ACPI: scan: add stub acpi_create_platform_device() for !CONFIG_ACPI It adds a stub acpi_create_platform_device() for !CONFIG_ACPI build, so that caller doesn't have to deal with !CONFIG_ACPI build issue. Reported-by: kernel test robot Signed-off-by: Shawn Guo Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 2630c2e953f73..053bf05fb1f76 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -885,6 +885,13 @@ static inline int acpi_device_modalias(struct device *dev, return -ENODEV; } +static inline struct platform_device * +acpi_create_platform_device(struct acpi_device *adev, + struct property_entry *properties) +{ + return NULL; +} + static inline bool acpi_dma_supported(struct acpi_device *adev) { return false; -- GitLab From 240bdc605e6a9d0309bd003de3413f6f729eca18 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Tue, 29 Dec 2020 11:17:59 +0000 Subject: [PATCH 0325/2012] ACPI: Update Kconfig help text for items that are no longer modular The CONTAINER and HOTPLUG_MEMORY options mention modules but are bool only, so if selected are always built in. Drop the help text about modules. Signed-off-by: Peter Robinson Signed-off-by: Rafael J. Wysocki --- drivers/acpi/Kconfig | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index edf1558c11052..ebcf534514be3 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -395,9 +395,6 @@ config ACPI_CONTAINER This helps support hotplug of nodes, CPUs, and memory. - To compile this driver as a module, choose M here: - the module will be called container. - config ACPI_HOTPLUG_MEMORY bool "Memory Hotplug" depends on MEMORY_HOTPLUG @@ -411,9 +408,6 @@ config ACPI_HOTPLUG_MEMORY removing memory devices at runtime, you need not enable this driver. - To compile this driver as a module, choose M here: - the module will be called acpi_memhotplug. - config ACPI_HOTPLUG_IOAPIC bool depends on PCI -- GitLab From 47f4469970d8861bc06d2d4d45ac8200ff07c693 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 5 Jan 2021 17:11:45 +0800 Subject: [PATCH 0326/2012] Revert "device property: Keep secondary firmware node secondary by type" While commit d5dcce0c414f ("device property: Keep secondary firmware node secondary by type") describes everything correct in its commit message, the change it made does the opposite and original commit c15e1bdda436 ("device property: Fix the secondary firmware node handling in set_primary_fwnode()") was fully correct. Revert the former one here and improve documentation in the next patch. Fixes: d5dcce0c414f ("device property: Keep secondary firmware node secondary by type") Signed-off-by: Bard Liao Reviewed-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Cc: 5.10+ # 5.10+ Signed-off-by: Rafael J. Wysocki --- drivers/base/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 25e08e5f40bd9..51b9545a050b7 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4433,7 +4433,7 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) if (fwnode_is_primary(fn)) { dev->fwnode = fn->secondary; if (!(parent && fn == parent->fwnode)) - fn->secondary = ERR_PTR(-ENODEV); + fn->secondary = NULL; } else { dev->fwnode = NULL; } -- GitLab From 3f7bddaf5d5a83aa2eb1e6d72db221d3ec43c813 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Tue, 5 Jan 2021 17:11:46 +0800 Subject: [PATCH 0327/2012] device property: add description of fwnode cases There are only four valid fwnode cases which are - primary --> secondary --> -ENODEV - primary --> NULL - secondary --> -ENODEV - NULL dev->fwnode should be converted between the 4 cases above no matter how/when set_primary_fwnode() and set_secondary_fwnode() are called. Describe it in the code so people will keep it in mind. Signed-off-by: Bard Liao Reviewed-by: Andy Shevchenko Reviewed-by: Heikki Krogerus [ rjw: Comment edit ] Signed-off-by: Rafael J. Wysocki --- drivers/base/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index 51b9545a050b7..14f1658167425 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4414,6 +4414,12 @@ static inline bool fwnode_is_primary(struct fwnode_handle *fwnode) * * Set the device's firmware node pointer to @fwnode, but if a secondary * firmware node of the device is present, preserve it. + * + * Valid fwnode cases are: + * - primary --> secondary --> -ENODEV + * - primary --> NULL + * - secondary --> -ENODEV + * - NULL */ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) { @@ -4432,6 +4438,7 @@ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) } else { if (fwnode_is_primary(fn)) { dev->fwnode = fn->secondary; + /* Set fn->secondary = NULL, so fn remains the primary fwnode */ if (!(parent && fn == parent->fwnode)) fn->secondary = NULL; } else { -- GitLab From 2b5f09cadfc576817c0450e01d454f750909b103 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Fri, 18 Dec 2020 09:53:40 -0800 Subject: [PATCH 0328/2012] drm/msm/dp: postpone irq_hpd event during connection pending state irq_hpd event can only be executed at connected state. Therefore irq_hpd event should be postponed if it happened at connection pending state. This patch also make sure both link rate and lane are valid before start link training. Signed-off-by: Kuogee Hsieh Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_display.c | 7 +++++++ drivers/gpu/drm/msm/dp/dp_panel.c | 12 +++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 6e971d552911f..3bc7ed21de286 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -693,6 +693,13 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data) return 0; } + if (state == ST_CONNECT_PENDING) { + /* wait until ST_CONNECTED */ + dp_add_event(dp, EV_IRQ_HPD_INT, 0, 1); /* delay = 1 */ + mutex_unlock(&dp->event_mutex); + return 0; + } + ret = dp_display_usbpd_attention_cb(&dp->pdev->dev); if (ret == -ECONNRESET) { /* cable unplugged */ dp->core_initialized = false; diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index 97dca3e378b7b..d1780bcac8cc8 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -167,12 +167,18 @@ int dp_panel_read_sink_caps(struct dp_panel *dp_panel, panel = container_of(dp_panel, struct dp_panel_private, dp_panel); rc = dp_panel_read_dpcd(dp_panel); + if (rc) { + DRM_ERROR("read dpcd failed %d\n", rc); + return rc; + } + bw_code = drm_dp_link_rate_to_bw_code(dp_panel->link_info.rate); - if (rc || !is_link_rate_valid(bw_code) || + if (!is_link_rate_valid(bw_code) || !is_lane_count_valid(dp_panel->link_info.num_lanes) || (bw_code > dp_panel->max_bw_code)) { - DRM_ERROR("read dpcd failed %d\n", rc); - return rc; + DRM_ERROR("Illegal link rate=%d lane=%d\n", dp_panel->link_info.rate, + dp_panel->link_info.num_lanes); + return -EINVAL; } if (dp_panel->dfp_present) { -- GitLab From d863f0c7b536288e2bd40cbc01c10465dd226b11 Mon Sep 17 00:00:00 2001 From: Craig Tatlor Date: Wed, 30 Dec 2020 17:29:42 +0200 Subject: [PATCH 0329/2012] drm/msm: Call msm_init_vram before binding the gpu vram.size is needed when binding a gpu without an iommu and is defined in msm_init_vram(), so run that before binding it. Signed-off-by: Craig Tatlor Reviewed-by: Brian Masney Tested-by: Alexey Minnekhanov Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_drv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index ce9bb6e929c22..549ffb60e9ca1 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -457,14 +457,14 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) drm_mode_config_init(ddev); - /* Bind all our sub-components: */ - ret = component_bind_all(dev, ddev); + ret = msm_init_vram(ddev); if (ret) goto err_destroy_mdss; - ret = msm_init_vram(ddev); + /* Bind all our sub-components: */ + ret = component_bind_all(dev, ddev); if (ret) - goto err_msm_uninit; + goto err_destroy_mdss; dma_set_max_seg_size(dev, UINT_MAX); -- GitLab From 3f7759e7b7585a0bffda06d4eddc6b0b850ef6c3 Mon Sep 17 00:00:00 2001 From: Iskren Chernev Date: Wed, 30 Dec 2020 17:29:43 +0200 Subject: [PATCH 0330/2012] drm/msm: Add modparam to allow vram carveout Using the GPU with a VRAM Carveout is a security vulnerability. Nevertheless it is sometimes required, especially when no IOMMU implementation is available for a certain platform. Signed-off-by: Iskren Chernev Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a2xx_gpu.c | 6 ++++-- drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 6 ++++-- drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 6 ++++-- drivers/gpu/drm/msm/adreno/adreno_device.c | 4 ++++ drivers/gpu/drm/msm/adreno/adreno_gpu.h | 1 + 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c index 7e82c41a85f1a..bdc989183c648 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpu.c @@ -534,8 +534,10 @@ struct msm_gpu *a2xx_gpu_init(struct drm_device *dev) if (!gpu->aspace) { dev_err(dev->dev, "No memory protection without MMU\n"); - ret = -ENXIO; - goto fail; + if (!allow_vram_carveout) { + ret = -ENXIO; + goto fail; + } } return gpu; diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 93da6683a8661..4534633fe7cdb 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -564,8 +564,10 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) * implement a cmdstream validator. */ DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n"); - ret = -ENXIO; - goto fail; + if (!allow_vram_carveout) { + ret = -ENXIO; + goto fail; + } } icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem"); diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index c0be3a0f36b2c..82bebb40234de 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -692,8 +692,10 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) * implement a cmdstream validator. */ DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n"); - ret = -ENXIO; - goto fail; + if (!allow_vram_carveout) { + ret = -ENXIO; + goto fail; + } } icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem"); diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 87c8b033ad1a6..12e75ba360f95 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -18,6 +18,10 @@ bool snapshot_debugbus = false; MODULE_PARM_DESC(snapshot_debugbus, "Include debugbus sections in GPU devcoredump (if not fused off)"); module_param_named(snapshot_debugbus, snapshot_debugbus, bool, 0600); +bool allow_vram_carveout = false; +MODULE_PARM_DESC(allow_vram_carveout, "Allow using VRAM Carveout, in place of IOMMU"); +module_param_named(allow_vram_carveout, allow_vram_carveout, bool, 0600); + static const struct adreno_info gpulist[] = { { .rev = ADRENO_REV(2, 0, 0, 0), diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index c3775f79525a7..fe5444a1482ae 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -18,6 +18,7 @@ #include "adreno_pm4.xml.h" extern bool snapshot_debugbus; +extern bool allow_vram_carveout; enum { ADRENO_FW_PM4 = 0, -- GitLab From c4151604f0603d5700072183a05828ff87d764e4 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 21 Dec 2020 06:13:20 +0100 Subject: [PATCH 0331/2012] cpufreq: intel_pstate: remove obsolete functions percent_fp() was used in intel_pstate_pid_reset(), which was removed in commit 9d0ef7af1f2d ("cpufreq: intel_pstate: Do not use PID-based P-state selection") and hence, percent_fp() is unused since then. percent_ext_fp() was last used in intel_pstate_update_perf_limits(), which was refactored in commit 1a4fe38add8b ("cpufreq: intel_pstate: Remove max/min fractions to limit performance"), and hence, percent_ext_fp() is unused since then. make CC=clang W=1 points us those unused functions: drivers/cpufreq/intel_pstate.c:79:23: warning: unused function 'percent_fp' [-Wunused-function] static inline int32_t percent_fp(int percent) ^ drivers/cpufreq/intel_pstate.c:94:23: warning: unused function 'percent_ext_fp' [-Wunused-function] static inline int32_t percent_ext_fp(int percent) ^ Remove those obsolete functions. Signed-off-by: Lukas Bulwahn Reviewed-by: Nathan Chancellor Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 32bc11851b8d3..be05e038d956c 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -76,11 +76,6 @@ static inline int ceiling_fp(int32_t x) return ret; } -static inline int32_t percent_fp(int percent) -{ - return div_fp(percent, 100); -} - static inline u64 mul_ext_fp(u64 x, u64 y) { return (x * y) >> EXT_FRAC_BITS; @@ -91,11 +86,6 @@ static inline u64 div_ext_fp(u64 x, u64 y) return div64_u64(x << EXT_FRAC_BITS, y); } -static inline int32_t percent_ext_fp(int percent) -{ - return div_ext_fp(percent, 100); -} - /** * struct sample - Store performance sample * @core_avg_perf: Ratio of APERF/MPERF which is the actual average -- GitLab From 00fd44a1a4700718d5d962432b55c09820f7e709 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 4 Jan 2021 20:30:41 +0100 Subject: [PATCH 0332/2012] drm/msm: Only enable A6xx LLCC code on A6xx Using this code on A5xx (and probably older too) causes a smmu bug. Fixes: 474dadb8b0d5 ("drm/msm/a6xx: Add support for using system cache(LLC)") Signed-off-by: Konrad Dybcio Tested-by: AngeloGioacchino Del Regno Reviewed-by: Jordan Crouse Reviewed-by: Sai Prakash Ranjan Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 21 ++++++++++++--------- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 5 +++++ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 6cf9975e951ed..f09175698827a 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -191,8 +191,6 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); - struct io_pgtable_domain_attr pgtbl_cfg; struct iommu_domain *iommu; struct msm_mmu *mmu; struct msm_gem_address_space *aspace; @@ -202,13 +200,18 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu, if (!iommu) return NULL; - /* - * This allows GPU to set the bus attributes required to use system - * cache on behalf of the iommu page table walker. - */ - if (!IS_ERR(a6xx_gpu->htw_llc_slice)) { - pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; - iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg); + + if (adreno_is_a6xx(adreno_gpu)) { + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + struct io_pgtable_domain_attr pgtbl_cfg; + /* + * This allows GPU to set the bus attributes required to use system + * cache on behalf of the iommu page table walker. + */ + if (!IS_ERR(a6xx_gpu->htw_llc_slice)) { + pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; + iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg); + } } mmu = msm_iommu_new(&pdev->dev, iommu); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index fe5444a1482ae..b3d9a333591b2 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -212,6 +212,11 @@ static inline int adreno_is_a540(struct adreno_gpu *gpu) return gpu->revn == 540; } +static inline bool adreno_is_a6xx(struct adreno_gpu *gpu) +{ + return ((gpu->revn < 700 && gpu->revn > 599)); +} + static inline int adreno_is_a618(struct adreno_gpu *gpu) { return gpu->revn == 618; -- GitLab From 3c638cdb8ecc0442552156e0fed8708dd2c7f35b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 16 Dec 2020 12:07:53 +0200 Subject: [PATCH 0333/2012] RDMA/restrack: Don't treat as an error allocation ID wrapping xa_alloc_cyclic() call returns positive number if ID allocation succeeded but wrapped. It is not an error, so normalize the "ret" variable to zero as marker of not-an-error. drivers/infiniband/core/restrack.c:261 rdma_restrack_add() warn: 'ret' can be either negative or positive Fixes: fd47c2f99f04 ("RDMA/restrack: Convert internal DB from hash to XArray") Link: https://lore.kernel.org/r/20201216100753.1127638-1-leon@kernel.org Reported-by: Dan Carpenter Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/restrack.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index e0a41c8670023..ff1551b3cf619 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -254,6 +254,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res) } else { ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b, &rt->next_id, GFP_KERNEL); + ret = (ret < 0) ? ret : 0; } out: -- GitLab From afded6d83aa7b35dab675c730528109cc58d6847 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 28 Dec 2020 20:43:13 +0200 Subject: [PATCH 0334/2012] misc: pvpanic: Check devm_ioport_map() for NULL Inconveniently devm_ioport_map() and devm_ioremap_resource() return errors differently, i.e. former uses simply NULL pointer, while the latter an error pointer. Due to this, we have to check each of them separately. Fixes: f104060813fe ("misc: pvpanic: Combine ACPI and platform drivers") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20201228184313.57610-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/pvpanic.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/misc/pvpanic.c b/drivers/misc/pvpanic.c index 951b37da5e3ca..41cab297d66e7 100644 --- a/drivers/misc/pvpanic.c +++ b/drivers/misc/pvpanic.c @@ -55,12 +55,23 @@ static int pvpanic_mmio_probe(struct platform_device *pdev) struct resource *res; res = platform_get_mem_or_io(pdev, 0); - if (res && resource_type(res) == IORESOURCE_IO) + if (!res) + return -EINVAL; + + switch (resource_type(res)) { + case IORESOURCE_IO: base = devm_ioport_map(dev, res->start, resource_size(res)); - else + if (!base) + return -ENOMEM; + break; + case IORESOURCE_MEM: base = devm_ioremap_resource(dev, res); - if (IS_ERR(base)) - return PTR_ERR(base); + if (IS_ERR(base)) + return PTR_ERR(base); + break; + default: + return -EINVAL; + } atomic_notifier_chain_register(&panic_notifier_list, &pvpanic_panic_nb); -- GitLab From 384b77fd48fd683a82760bc88bef8611cba997fc Mon Sep 17 00:00:00 2001 From: Amanoel Dawod Date: Sat, 26 Dec 2020 18:58:40 -0500 Subject: [PATCH 0335/2012] Fonts: font_ter16x32: Update font with new upstream Terminus release This is just a maintenance patch to update font_ter16x32.c with changes and minor fixes added in new upstream Terminus v4.49. >From release notes of new version 4.49, this brings: - Altered ascii grave in some sizes to be more useful as a back quote. - Fixed 21B5, added 21B2 and 21B3. Just as my initial submission of the font, above changes were obtained from new ter-i32b.psf font source. Terminus font sources are available for download at SourceForge: https://sourceforge.net/projects/terminus-font/files/terminus-font-4.49/ Simply running `make` in source directory will build the .psf font files. Signed-off-by: Amanoel Dawod Link: https://lore.kernel.org/r/20201226235840.26290-1-kernel@amanoeldawod.com Signed-off-by: Greg Kroah-Hartman --- lib/fonts/font_ter16x32.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/fonts/font_ter16x32.c b/lib/fonts/font_ter16x32.c index 1955d624177cf..5baedc573dd6b 100644 --- a/lib/fonts/font_ter16x32.c +++ b/lib/fonts/font_ter16x32.c @@ -774,8 +774,8 @@ static const struct font_data fontdata_ter16x32 = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xfc, 0x7f, 0xfc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 95 */ - 0x00, 0x00, 0x1c, 0x00, 0x0e, 0x00, 0x07, 0x00, - 0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x0e, 0x00, + 0x07, 0x00, 0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -1169,7 +1169,7 @@ static const struct font_data fontdata_ter16x32 = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x7f, 0xf8, 0x7f, 0xfc, 0x03, 0x9e, 0x03, 0x8e, + 0x7e, 0xf8, 0x7f, 0xfc, 0x03, 0x9e, 0x03, 0x8e, 0x03, 0x8e, 0x3f, 0x8e, 0x7f, 0xfe, 0xf3, 0xfe, 0xe3, 0x80, 0xe3, 0x80, 0xe3, 0x80, 0xf3, 0xce, 0x7f, 0xfe, 0x3e, 0xfc, 0x00, 0x00, 0x00, 0x00, -- GitLab From a306aba9c8d869b1fdfc8ad9237f1ed718ea55e6 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 26 Dec 2020 15:42:48 +0800 Subject: [PATCH 0336/2012] RDMA/usnic: Fix memleak in find_free_vf_and_create_qp_grp If usnic_ib_qp_grp_create() fails at the first call, dev_list will not be freed on error, which leads to memleak. Fixes: e3cf00d0a87f ("IB/usnic: Add Cisco VIC low-level hardware driver") Link: https://lore.kernel.org/r/20201226074248.2893-1-dinghao.liu@zju.edu.cn Signed-off-by: Dinghao Liu Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 38a37770c0162..3705c6b8b2237 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -214,6 +214,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, } usnic_uiom_free_dev_list(dev_list); + dev_list = NULL; } /* Try to find resources on an unused vf */ @@ -239,6 +240,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, qp_grp_check: if (IS_ERR_OR_NULL(qp_grp)) { usnic_err("Failed to allocate qp_grp\n"); + if (usnic_ib_share_vf) + usnic_uiom_free_dev_list(dev_list); return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM); } return qp_grp; -- GitLab From f2bc3af6353cb2a33dfa9d270d999d839eef54cb Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 29 Dec 2020 18:46:53 -0800 Subject: [PATCH 0337/2012] RDMA/ocrdma: Fix use after free in ocrdma_dealloc_ucontext_pd() In ocrdma_dealloc_ucontext_pd() uctx->cntxt_pd is assigned to the variable pd and then after uctx->cntxt_pd is freed, the variable pd is passed to function _ocrdma_dealloc_pd() which dereferences pd directly or through its call to ocrdma_mbx_dealloc_pd(). Reorder the free using the variable pd. Cc: stable@vger.kernel.org Fixes: 21a428a019c9 ("RDMA: Handle PD allocations by IB/core") Link: https://lore.kernel.org/r/20201230024653.1516495-1-trix@redhat.com Signed-off-by: Tom Rix Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index bc98bd950d99f..3acb5c10b1553 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -434,9 +434,9 @@ static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) pr_err("%s(%d) Freeing in use pdid=0x%x.\n", __func__, dev->id, pd->id); } - kfree(uctx->cntxt_pd); uctx->cntxt_pd = NULL; _ocrdma_dealloc_pd(dev, pd); + kfree(pd); } static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx) -- GitLab From 0ef597c3ac49a62e1a2c1c10f88dd76fde1e1636 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Tue, 5 Jan 2021 06:58:15 +0100 Subject: [PATCH 0338/2012] docs: remove mention of ENABLE_MUST_CHECK We removed ENABLE_MUST_CHECK in 196793946264 ("Compiler Attributes: remove CONFIG_ENABLE_MUST_CHECK"), so let's remove docs' mentions. At the same time, fix the outdated text related to ENABLE_WARN_DEPRECATED that wasn't removed in 3337d5cfe5e08 ("configs: get rid of obsolete CONFIG_ENABLE_WARN_DEPRECATED"). Finally, reflow the paragraph. Signed-off-by: Miguel Ojeda Reviewed-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20210105055815.GA5173@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/process/4.Coding.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/process/4.Coding.rst b/Documentation/process/4.Coding.rst index c27e59d2f7029..0825dc496f22d 100644 --- a/Documentation/process/4.Coding.rst +++ b/Documentation/process/4.Coding.rst @@ -249,10 +249,8 @@ features; most of these are found in the "kernel hacking" submenu. Several of these options should be turned on for any kernel used for development or testing purposes. In particular, you should turn on: - - ENABLE_MUST_CHECK and FRAME_WARN to get an - extra set of warnings for problems like the use of deprecated interfaces - or ignoring an important return value from a function. The output - generated by these warnings can be verbose, but one need not worry about + - FRAME_WARN to get warnings for stack frames larger than a given amount. + The output generated can be verbose, but one need not worry about warnings from other parts of the kernel. - DEBUG_OBJECTS will add code to track the lifetime of various objects -- GitLab From a734a7235ef3768dd3c9b7034f663ae6b260375f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 1 Jan 2021 22:14:47 +0100 Subject: [PATCH 0339/2012] docs: binfmt-misc: Fix .rst formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "name below" is not part of the /proc path and should not be formatted in monospace. "doesn``t" is rendered in HTML with a double backtick. Revert it back to "doesn't". Signed-off-by: Jonathan Neuschäfer Link: https://lore.kernel.org/r/20210101211447.1021412-1-j.neuschaefer@gmx.net Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/binfmt-misc.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/binfmt-misc.rst b/Documentation/admin-guide/binfmt-misc.rst index 7a864131e5ea7..59cd902e35497 100644 --- a/Documentation/admin-guide/binfmt-misc.rst +++ b/Documentation/admin-guide/binfmt-misc.rst @@ -23,7 +23,7 @@ Here is what the fields mean: - ``name`` is an identifier string. A new /proc file will be created with this - ``name below /proc/sys/fs/binfmt_misc``; cannot contain slashes ``/`` for + name below ``/proc/sys/fs/binfmt_misc``; cannot contain slashes ``/`` for obvious reasons. - ``type`` is the type of recognition. Give ``M`` for magic and ``E`` for extension. @@ -83,7 +83,7 @@ Here is what the fields mean: ``F`` - fix binary The usual behaviour of binfmt_misc is to spawn the binary lazily when the misc format file is invoked. However, - this doesn``t work very well in the face of mount namespaces and + this doesn't work very well in the face of mount namespaces and changeroots, so the ``F`` mode opens the binary as soon as the emulation is installed and uses the opened image to spawn the emulator, meaning it is always available once installed, -- GitLab From 25942e5ecbac33918ec2f0869ca9a374dbb023f2 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 31 Dec 2020 20:08:31 -0800 Subject: [PATCH 0340/2012] Documentation/admin-guide: kernel-parameters: hyphenate comma-separated Hyphenate "comma separated" when it is used as a compound adjective. hyphenated. Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20210101040831.4148-1-rdunlap@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/kernel-parameters.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c722ec19cd004..9e3cdb271d06c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1385,7 +1385,7 @@ ftrace_filter=[function-list] [FTRACE] Limit the functions traced by the function - tracer at boot up. function-list is a comma separated + tracer at boot up. function-list is a comma-separated list of functions. This list can be changed at run time by the set_ftrace_filter file in the debugfs tracing directory. @@ -1399,13 +1399,13 @@ ftrace_graph_filter=[function-list] [FTRACE] Limit the top level callers functions traced by the function graph tracer at boot up. - function-list is a comma separated list of functions + function-list is a comma-separated list of functions that can be changed at run time by the set_graph_function file in the debugfs tracing directory. ftrace_graph_notrace=[function-list] [FTRACE] Do not trace from the functions specified in - function-list. This list is a comma separated list of + function-list. This list is a comma-separated list of functions that can be changed at run time by the set_graph_notrace file in the debugfs tracing directory. @@ -2421,7 +2421,7 @@ when set. Format: - libata.force= [LIBATA] Force configurations. The format is comma + libata.force= [LIBATA] Force configurations. The format is comma- separated list of "[ID:]VAL" where ID is PORT[.DEVICE]. PORT and DEVICE are decimal numbers matching port, link or device. Basically, it matches @@ -5145,7 +5145,7 @@ stacktrace_filter=[function-list] [FTRACE] Limit the functions that the stack tracer - will trace at boot up. function-list is a comma separated + will trace at boot up. function-list is a comma-separated list of functions. This list can be changed at run time by the stack_trace_filter file in the debugfs tracing directory. Note, this enables stack tracing @@ -5348,7 +5348,7 @@ trace_event=[event-list] [FTRACE] Set and start specified trace events in order to facilitate early boot debugging. The event-list is a - comma separated list of trace events to enable. See + comma-separated list of trace events to enable. See also Documentation/trace/events.rst trace_options=[option-list] -- GitLab From 9d54ee78aef62c29b15ae2f58a70b1d1cd63a8f0 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Thu, 7 Jan 2021 18:26:10 +0530 Subject: [PATCH 0341/2012] docs: admin-guide: bootconfig: Fix feils to fails s/feils/fails/p Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20210107125610.1576368-1-unixbhaskar@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/bootconfig.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/bootconfig.rst b/Documentation/admin-guide/bootconfig.rst index 9b90efcc3a35e..452b7dcd7f6be 100644 --- a/Documentation/admin-guide/bootconfig.rst +++ b/Documentation/admin-guide/bootconfig.rst @@ -154,7 +154,7 @@ get the boot configuration data. Because of this "piggyback" method, there is no need to change or update the boot loader and the kernel image itself as long as the boot loader passes the correct initrd file size. If by any chance, the boot -loader passes a longer size, the kernel feils to find the bootconfig data. +loader passes a longer size, the kernel fails to find the bootconfig data. To do this operation, Linux kernel provides "bootconfig" command under tools/bootconfig, which allows admin to apply or delete the config file -- GitLab From bb12433bf56e76789c6b08b36c546f745a6aa6e1 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 6 Jan 2021 12:34:36 -0800 Subject: [PATCH 0342/2012] ARC: unbork 5.11 bootup: fix snafu in _TIF_NOTIFY_SIGNAL handling Linux 5.11.rcX was failing to boot on ARC HSDK board. Turns out we have a couple of issues, this being the first one, and I'm to blame as I didn't pay attention during review. TIF_NOTIFY_SIGNAL support requires checking multiple TIF_* bits in kernel return code path. Old code only needed to check a single bit so BBIT0 worked. New code needs to check multiple bits so AND instruction. So needs to use bit mask variant _TIF_SIGPENDING Cc: Jens Axboe Fixes: 53855e12588743ea128 ("arc: add support for TIF_NOTIFY_SIGNAL") Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/34 Signed-off-by: Vineet Gupta --- arch/arc/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 1f5308abf36d6..1743506081da6 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -307,7 +307,7 @@ resume_user_mode_begin: mov r0, sp ; pt_regs for arg to do_signal()/do_notify_resume() GET_CURR_THR_INFO_FLAGS r9 - and.f 0, r9, TIF_SIGPENDING|TIF_NOTIFY_SIGNAL + and.f 0, r9, _TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL bz .Lchk_notify_resume ; Normal Trap/IRQ entry only saves Scratch (caller-saved) regs -- GitLab From 2aa078932ff6c66bf10cc5b3144440dbfa7d813d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 17 Dec 2020 16:31:36 -0800 Subject: [PATCH 0343/2012] KVM: x86/mmu: Use -1 to flag an undefined spte in get_mmio_spte() Return -1 from the get_walk() helpers if the shadow walk doesn't fill at least one spte, which can theoretically happen if the walk hits a not-present PDPTR. Returning the root level in such a case will cause get_mmio_spte() to return garbage (uninitialized stack data). In practice, such a scenario should be impossible as KVM shouldn't get a reserved-bit page fault with a not-present PDPTR. Note, using mmu->root_level in get_walk() is wrong for other reasons, too, but that's now a moot point. Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU") Cc: Ben Gardon Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20201218003139.2167891-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 7 ++++++- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 7a6ae9e90bd70..a48cd12c01d75 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3488,7 +3488,7 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) { struct kvm_shadow_walk_iterator iterator; - int leaf = vcpu->arch.mmu->root_level; + int leaf = -1; u64 spte; @@ -3532,6 +3532,11 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) else leaf = get_walk(vcpu, addr, sptes); + if (unlikely(leaf < 0)) { + *sptep = 0ull; + return reserved; + } + rsvd_check = &vcpu->arch.mmu->shadow_zero_check; for (level = root; level >= leaf; level--) { diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 84c8f06bec261..50cec7a15ddb0 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1152,8 +1152,8 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) { struct tdp_iter iter; struct kvm_mmu *mmu = vcpu->arch.mmu; - int leaf = vcpu->arch.mmu->shadow_root_level; gfn_t gfn = addr >> PAGE_SHIFT; + int leaf = -1; tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { leaf = iter.level; -- GitLab From 39b4d43e6003cee51cd119596d3c33d0449eb44c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 17 Dec 2020 16:31:37 -0800 Subject: [PATCH 0344/2012] KVM: x86/mmu: Get root level from walkers when retrieving MMIO SPTE Get the so called "root" level from the low level shadow page table walkers instead of manually attempting to calculate it higher up the stack, e.g. in get_mmio_spte(). When KVM is using PAE shadow paging, the starting level of the walk, from the callers perspective, is not the CR3 root but rather the PDPTR "root". Checking for reserved bits from the CR3 root causes get_mmio_spte() to consume uninitialized stack data due to indexing into sptes[] for a level that was not filled by get_walk(). This can result in false positives and/or negatives depending on what garbage happens to be on the stack. Opportunistically nuke a few extra newlines. Fixes: 95fb5b0258b7 ("kvm: x86/mmu: Support MMIO in the TDP MMU") Reported-by: Richard Herbert Cc: Ben Gardon Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20201218003139.2167891-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 15 ++++++--------- arch/x86/kvm/mmu/tdp_mmu.c | 5 ++++- arch/x86/kvm/mmu/tdp_mmu.h | 4 +++- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a48cd12c01d75..52f36c8790862 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3485,16 +3485,16 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct) * Return the level of the lowest level SPTE added to sptes. * That SPTE may be non-present. */ -static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) +static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level) { struct kvm_shadow_walk_iterator iterator; int leaf = -1; u64 spte; - walk_shadow_page_lockless_begin(vcpu); - for (shadow_walk_init(&iterator, vcpu, addr); + for (shadow_walk_init(&iterator, vcpu, addr), + *root_level = iterator.level; shadow_walk_okay(&iterator); __shadow_walk_next(&iterator, spte)) { leaf = iterator.level; @@ -3504,7 +3504,6 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) if (!is_shadow_present_pte(spte)) break; - } walk_shadow_page_lockless_end(vcpu); @@ -3517,9 +3516,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { u64 sptes[PT64_ROOT_MAX_LEVEL]; struct rsvd_bits_validate *rsvd_check; - int root = vcpu->arch.mmu->shadow_root_level; - int leaf; - int level; + int root, leaf, level; bool reserved = false; if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) { @@ -3528,9 +3525,9 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) } if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa)) - leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes); + leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root); else - leaf = get_walk(vcpu, addr, sptes); + leaf = get_walk(vcpu, addr, sptes, &root); if (unlikely(leaf < 0)) { *sptep = 0ull; diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 50cec7a15ddb0..a4f9447f83276 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1148,13 +1148,16 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, * Return the level of the lowest level SPTE added to sptes. * That SPTE may be non-present. */ -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes) +int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, + int *root_level) { struct tdp_iter iter; struct kvm_mmu *mmu = vcpu->arch.mmu; gfn_t gfn = addr >> PAGE_SHIFT; int leaf = -1; + *root_level = vcpu->arch.mmu->shadow_root_level; + tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { leaf = iter.level; sptes[leaf - 1] = iter.old_spte; diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 556e065503f69..cbbdbadd1526f 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -44,5 +44,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn); -int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes); +int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, + int *root_level); + #endif /* __KVM_X86_MMU_TDP_MMU_H */ -- GitLab From dde81f9477d018a96fba991c5928c6ab8cc109f8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 17 Dec 2020 16:31:38 -0800 Subject: [PATCH 0345/2012] KVM: x86/mmu: Use raw level to index into MMIO walks' sptes array Bump the size of the sptes array by one and use the raw level of the SPTE to index into the sptes array. Using the SPTE level directly improves readability by eliminating the need to reason out why the level is being adjusted when indexing the array. The array is on the stack and is not explicitly initialized; bumping its size is nothing more than a superficial adjustment to the stack frame. Signed-off-by: Sean Christopherson Message-Id: <20201218003139.2167891-4-seanjc@google.com> Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 15 +++++++-------- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 52f36c8790862..4798a4472066d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3500,7 +3500,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level leaf = iterator.level; spte = mmu_spte_get_lockless(iterator.sptep); - sptes[leaf - 1] = spte; + sptes[leaf] = spte; if (!is_shadow_present_pte(spte)) break; @@ -3514,7 +3514,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level /* return true if reserved bit is detected on spte. */ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { - u64 sptes[PT64_ROOT_MAX_LEVEL]; + u64 sptes[PT64_ROOT_MAX_LEVEL + 1]; struct rsvd_bits_validate *rsvd_check; int root, leaf, level; bool reserved = false; @@ -3537,16 +3537,15 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) rsvd_check = &vcpu->arch.mmu->shadow_zero_check; for (level = root; level >= leaf; level--) { - if (!is_shadow_present_pte(sptes[level - 1])) + if (!is_shadow_present_pte(sptes[level])) break; /* * Use a bitwise-OR instead of a logical-OR to aggregate the * reserved bit and EPT's invalid memtype/XWR checks to avoid * adding a Jcc in the loop. */ - reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) | - __is_rsvd_bits_set(rsvd_check, sptes[level - 1], - level); + reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) | + __is_rsvd_bits_set(rsvd_check, sptes[level], level); } if (reserved) { @@ -3554,10 +3553,10 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) __func__, addr); for (level = root; level >= leaf; level--) pr_err("------ spte 0x%llx level %d.\n", - sptes[level - 1], level); + sptes[level], level); } - *sptep = sptes[leaf - 1]; + *sptep = sptes[leaf]; return reserved; } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index a4f9447f83276..efef571806ad6 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1160,7 +1160,7 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) { leaf = iter.level; - sptes[leaf - 1] = iter.old_spte; + sptes[leaf] = iter.old_spte; } return leaf; -- GitLab From 9aa418792f5f11ef5d6f72265e1f8ae07efd5784 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 17 Dec 2020 16:31:39 -0800 Subject: [PATCH 0346/2012] KVM: x86/mmu: Optimize not-present/MMIO SPTE check in get_mmio_spte() Check only the terminal leaf for a "!PRESENT || MMIO" SPTE when looking for reserved bits on valid, non-MMIO SPTEs. The get_walk() helpers terminate their walks if a not-present or MMIO SPTE is encountered, i.e. the non-terminal SPTEs have already been verified to be regular SPTEs. This eliminates an extra check-and-branch in a relatively hot loop. Signed-off-by: Sean Christopherson Message-Id: <20201218003139.2167891-5-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 4798a4472066d..769855f5f0a12 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3511,7 +3511,7 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level return leaf; } -/* return true if reserved bit is detected on spte. */ +/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { u64 sptes[PT64_ROOT_MAX_LEVEL + 1]; @@ -3534,11 +3534,20 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) return reserved; } + *sptep = sptes[leaf]; + + /* + * Skip reserved bits checks on the terminal leaf if it's not a valid + * SPTE. Note, this also (intentionally) skips MMIO SPTEs, which, by + * design, always have reserved bits set. The purpose of the checks is + * to detect reserved bits on non-MMIO SPTEs. i.e. buggy SPTEs. + */ + if (!is_shadow_present_pte(sptes[leaf])) + leaf++; + rsvd_check = &vcpu->arch.mmu->shadow_zero_check; - for (level = root; level >= leaf; level--) { - if (!is_shadow_present_pte(sptes[level])) - break; + for (level = root; level >= leaf; level--) /* * Use a bitwise-OR instead of a logical-OR to aggregate the * reserved bit and EPT's invalid memtype/XWR checks to avoid @@ -3546,7 +3555,6 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) */ reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) | __is_rsvd_bits_set(rsvd_check, sptes[level], level); - } if (reserved) { pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n", @@ -3556,8 +3564,6 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) sptes[level], level); } - *sptep = sptes[leaf]; - return reserved; } -- GitLab From f65cf84ee769767536dc367acc9568ddb6e4c9f4 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 18 Dec 2020 23:37:11 -0700 Subject: [PATCH 0347/2012] KVM: SVM: Add register operand to vmsave call in sev_es_vcpu_load When using LLVM's integrated assembler (LLVM_IAS=1) while building x86_64_defconfig + CONFIG_KVM=y + CONFIG_KVM_AMD=y, the following build error occurs: $ make LLVM=1 LLVM_IAS=1 arch/x86/kvm/svm/sev.o arch/x86/kvm/svm/sev.c:2004:15: error: too few operands for instruction asm volatile(__ex("vmsave") : : "a" (__sme_page_pa(sd->save_area)) : "memory"); ^ arch/x86/kvm/svm/sev.c:28:17: note: expanded from macro '__ex' #define __ex(x) __kvm_handle_fault_on_reboot(x) ^ ./arch/x86/include/asm/kvm_host.h:1646:10: note: expanded from macro '__kvm_handle_fault_on_reboot' "666: \n\t" \ ^ :2:2: note: instantiated into assembly here vmsave ^ 1 error generated. This happens because LLVM currently does not support calling vmsave without the fixed register operand (%rax for 64-bit and %eax for 32-bit). This will be fixed in LLVM 12 but the kernel currently supports LLVM 10.0.1 and newer so this needs to be handled. Add the proper register using the _ASM_AX macro, which matches the vmsave call in vmenter.S. Fixes: 861377730aa9 ("KVM: SVM: Provide support for SEV-ES vCPU loading") Link: https://reviews.llvm.org/D93524 Link: https://github.com/ClangBuiltLinux/linux/issues/1216 Signed-off-by: Nathan Chancellor Message-Id: <20201219063711.3526947-1-natechancellor@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 9858d5ae9dddd..563ced07b0b85 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2001,7 +2001,7 @@ void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu) * of which one step is to perform a VMLOAD. Since hardware does not * perform a VMSAVE on VMRUN, the host savearea must be updated. */ - asm volatile(__ex("vmsave") : : "a" (__sme_page_pa(sd->save_area)) : "memory"); + asm volatile(__ex("vmsave %0") : : "a" (__sme_page_pa(sd->save_area)) : "memory"); /* * Certain MSRs are restored on VMEXIT, only save ones that aren't -- GitLab From 52782d5b63725a6c4bf642557c83507430064110 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sun, 20 Dec 2020 21:03:39 +0100 Subject: [PATCH 0348/2012] KVM/SVM: Remove leftover __svm_vcpu_run prototype from svm.c Commit 16809ecdc1e8a moved __svm_vcpu_run the prototype to svm.h, but forgot to remove the original from svm.c. Fixes: 16809ecdc1e8a ("KVM: SVM: Provide an updated VMRUN invocation for SEV-ES guests") Cc: Tom Lendacky Cc: Paolo Bonzini Signed-off-by: Uros Bizjak Message-Id: <20201220200339.65115-1-ubizjak@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index cce0143a6f801..6824d611dc5dd 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3677,8 +3677,6 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) return EXIT_FASTPATH_NONE; } -void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); - static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, struct vcpu_svm *svm) { -- GitLab From e42ac777d661e878c3b9bac56df11e226cab3010 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 18 Dec 2020 15:17:32 +0100 Subject: [PATCH 0349/2012] KVM: selftests: Factor out guest mode code demand_paging_test, dirty_log_test, and dirty_log_perf_test have redundant guest mode code. Factor it out. Also, while adding a new include, remove the ones we don't need. Reviewed-by: Ben Gardon Reviewed-by: Peter Xu Signed-off-by: Andrew Jones Message-Id: <20201218141734.54359-2-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 +- .../selftests/kvm/demand_paging_test.c | 107 ++++----------- .../selftests/kvm/dirty_log_perf_test.c | 121 +++++------------ tools/testing/selftests/kvm/dirty_log_test.c | 125 ++++++------------ .../selftests/kvm/include/guest_modes.h | 21 +++ tools/testing/selftests/kvm/lib/guest_modes.c | 70 ++++++++++ 6 files changed, 189 insertions(+), 257 deletions(-) create mode 100644 tools/testing/selftests/kvm/include/guest_modes.h create mode 100644 tools/testing/selftests/kvm/lib/guest_modes.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c7ca4faba2721..a7286a08c3ae9 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -33,7 +33,7 @@ ifeq ($(ARCH),s390) UNAME_M := s390x endif -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 3d96a7bfaff30..946161a9ce2d8 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -7,23 +7,20 @@ * Copyright (C) 2019, Google, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ +#define _GNU_SOURCE /* for program_invocation_name and pipe2 */ #include #include -#include -#include -#include #include #include #include -#include -#include #include +#include -#include "perf_test_util.h" -#include "processor.h" +#include "kvm_util.h" #include "test_util.h" +#include "perf_test_util.h" +#include "guest_modes.h" #ifdef __NR_userfaultfd @@ -248,9 +245,14 @@ static int setup_demand_paging(struct kvm_vm *vm, return 0; } -static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay) +struct test_params { + bool use_uffd; + useconds_t uffd_delay; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) { + struct test_params *p = arg; pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; struct uffd_handler_args *uffd_args = NULL; @@ -275,7 +277,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); - if (use_uffd) { + if (p->use_uffd) { uffd_handler_threads = malloc(nr_vcpus * sizeof(*uffd_handler_threads)); TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); @@ -308,7 +310,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, r = setup_demand_paging(vm, &uffd_handler_threads[vcpu_id], pipefds[vcpu_id * 2], - uffd_delay, &uffd_args[vcpu_id], + p->uffd_delay, &uffd_args[vcpu_id], vcpu_hva, guest_percpu_mem_size); if (r < 0) exit(-r); @@ -339,7 +341,7 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, pr_info("All vCPU threads joined\n"); - if (use_uffd) { + if (p->use_uffd) { char c; /* Tell the user fault fd handler threads to quit */ @@ -362,38 +364,19 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, free(guest_data_prototype); free(vcpu_threads); - if (use_uffd) { + if (p->use_uffd) { free(uffd_handler_threads); free(uffd_args); free(pipefds); } } -struct guest_mode { - bool supported; - bool enabled; -}; -static struct guest_mode guest_modes[NUM_VM_MODES]; - -#define guest_mode_init(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ -}) - static void help(char *name) { - int i; - puts(""); printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n" " [-b memory] [-v vcpus]\n", name); - printf(" -m: specify the guest mode ID to test\n" - " (default: test all supported modes)\n" - " This option may be used multiple times.\n" - " Guest mode IDs:\n"); - for (i = 0; i < NUM_VM_MODES; ++i) { - printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - guest_modes[i].supported ? " (supported)" : ""); - } + guest_modes_help(); printf(" -u: use User Fault FD to handle vCPU page\n" " faults.\n"); printf(" -d: add a delay in usec to the User Fault\n" @@ -410,53 +393,22 @@ static void help(char *name) int main(int argc, char *argv[]) { int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); - bool mode_selected = false; - unsigned int mode; - int opt, i; - bool use_uffd = false; - useconds_t uffd_delay = 0; - -#ifdef __x86_64__ - guest_mode_init(VM_MODE_PXXV48_4K, true, true); -#endif -#ifdef __aarch64__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); - guest_mode_init(VM_MODE_P40V48_64K, true, true); - { - unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - - if (limit >= 52) - guest_mode_init(VM_MODE_P52V48_64K, true, true); - if (limit >= 48) { - guest_mode_init(VM_MODE_P48V48_4K, true, true); - guest_mode_init(VM_MODE_P48V48_64K, true, true); - } - } -#endif -#ifdef __s390x__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); -#endif + struct test_params p = {}; + int opt; + + guest_modes_append_default(); while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) { switch (opt) { case 'm': - if (!mode_selected) { - for (i = 0; i < NUM_VM_MODES; ++i) - guest_modes[i].enabled = false; - mode_selected = true; - } - mode = strtoul(optarg, NULL, 10); - TEST_ASSERT(mode < NUM_VM_MODES, - "Guest mode ID %d too big", mode); - guest_modes[mode].enabled = true; + guest_modes_cmdline(optarg); break; case 'u': - use_uffd = true; + p.use_uffd = true; break; case 'd': - uffd_delay = strtoul(optarg, NULL, 0); - TEST_ASSERT(uffd_delay >= 0, - "A negative UFFD delay is not supported."); + p.uffd_delay = strtoul(optarg, NULL, 0); + TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported."); break; case 'b': guest_percpu_mem_size = parse_size(optarg); @@ -473,14 +425,7 @@ int main(int argc, char *argv[]) } } - for (i = 0; i < NUM_VM_MODES; ++i) { - if (!guest_modes[i].enabled) - continue; - TEST_ASSERT(guest_modes[i].supported, - "Guest mode ID %d (%s) not supported.", - i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay); - } + for_each_guest_mode(run_test, &p); return 0; } diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 9c6a7be31e033..506741eb5d7f3 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -12,16 +12,14 @@ #include #include -#include #include #include #include -#include #include "kvm_util.h" -#include "perf_test_util.h" -#include "processor.h" #include "test_util.h" +#include "perf_test_util.h" +#include "guest_modes.h" /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL @@ -89,9 +87,15 @@ static void *vcpu_worker(void *data) return NULL; } -static void run_test(enum vm_guest_mode mode, unsigned long iterations, - uint64_t phys_offset, int wr_fract) +struct test_params { + unsigned long iterations; + uint64_t phys_offset; + int wr_fract; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) { + struct test_params *p = arg; pthread_t *vcpu_threads; struct kvm_vm *vm; unsigned long *bmap; @@ -108,7 +112,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); - perf_test_args.wr_fract = wr_fract; + perf_test_args.wr_fract = p->wr_fract; guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); @@ -156,7 +160,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", ts_diff.tv_sec, ts_diff.tv_nsec); - while (iteration < iterations) { + while (iteration < p->iterations) { /* * Incrementing the iteration number will start the vCPUs * dirtying memory again. @@ -210,15 +214,15 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, pr_info("Disabling dirty logging time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); - avg = timespec_div(get_dirty_log_total, iterations); + avg = timespec_div(get_dirty_log_total, p->iterations); pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", - iterations, get_dirty_log_total.tv_sec, + p->iterations, get_dirty_log_total.tv_sec, get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); if (dirty_log_manual_caps) { - avg = timespec_div(clear_dirty_log_total, iterations); + avg = timespec_div(clear_dirty_log_total, p->iterations); pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", - iterations, clear_dirty_log_total.tv_sec, + p->iterations, clear_dirty_log_total.tv_sec, clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); } @@ -228,20 +232,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, kvm_vm_free(vm); } -struct guest_mode { - bool supported; - bool enabled; -}; -static struct guest_mode guest_modes[NUM_VM_MODES]; - -#define guest_mode_init(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ -}) - static void help(char *name) { - int i; - puts(""); printf("usage: %s [-h] [-i iterations] [-p offset] " "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name); @@ -250,14 +242,7 @@ static void help(char *name) TEST_HOST_LOOP_N); printf(" -p: specify guest physical test memory offset\n" " Warning: a low offset can conflict with the loaded test code.\n"); - printf(" -m: specify the guest mode ID to test " - "(default: test all supported modes)\n" - " This option may be used multiple times.\n" - " Guest mode IDs:\n"); - for (i = 0; i < NUM_VM_MODES; ++i) { - printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - guest_modes[i].supported ? " (supported)" : ""); - } + guest_modes_help(); printf(" -b: specify the size of the memory region which should be\n" " dirtied by each vCPU. e.g. 10M or 3G.\n" " (default: 1G)\n"); @@ -272,74 +257,43 @@ static void help(char *name) int main(int argc, char *argv[]) { - unsigned long iterations = TEST_HOST_LOOP_N; - bool mode_selected = false; - uint64_t phys_offset = 0; - unsigned int mode; - int opt, i; - int wr_fract = 1; + int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + struct test_params p = { + .iterations = TEST_HOST_LOOP_N, + .wr_fract = 1, + }; + int opt; dirty_log_manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | KVM_DIRTY_LOG_INITIALLY_SET); -#ifdef __x86_64__ - guest_mode_init(VM_MODE_PXXV48_4K, true, true); -#endif -#ifdef __aarch64__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); - guest_mode_init(VM_MODE_P40V48_64K, true, true); - - { - unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - - if (limit >= 52) - guest_mode_init(VM_MODE_P52V48_64K, true, true); - if (limit >= 48) { - guest_mode_init(VM_MODE_P48V48_4K, true, true); - guest_mode_init(VM_MODE_P48V48_64K, true, true); - } - } -#endif -#ifdef __s390x__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); -#endif + guest_modes_append_default(); while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) { switch (opt) { case 'i': - iterations = strtol(optarg, NULL, 10); + p.iterations = strtol(optarg, NULL, 10); break; case 'p': - phys_offset = strtoull(optarg, NULL, 0); + p.phys_offset = strtoull(optarg, NULL, 0); break; case 'm': - if (!mode_selected) { - for (i = 0; i < NUM_VM_MODES; ++i) - guest_modes[i].enabled = false; - mode_selected = true; - } - mode = strtoul(optarg, NULL, 10); - TEST_ASSERT(mode < NUM_VM_MODES, - "Guest mode ID %d too big", mode); - guest_modes[mode].enabled = true; + guest_modes_cmdline(optarg); break; case 'b': guest_percpu_mem_size = parse_size(optarg); break; case 'f': - wr_fract = atoi(optarg); - TEST_ASSERT(wr_fract >= 1, + p.wr_fract = atoi(optarg); + TEST_ASSERT(p.wr_fract >= 1, "Write fraction cannot be less than one"); break; case 'v': nr_vcpus = atoi(optarg); - TEST_ASSERT(nr_vcpus > 0, - "Must have a positive number of vCPUs"); - TEST_ASSERT(nr_vcpus <= MAX_VCPUS, - "This test does not currently support\n" - "more than %d vCPUs.", MAX_VCPUS); + TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'h': default: @@ -348,18 +302,11 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(iterations >= 2, "The test should have at least two iterations"); + TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations"); - pr_info("Test iterations: %"PRIu64"\n", iterations); + pr_info("Test iterations: %"PRIu64"\n", p.iterations); - for (i = 0; i < NUM_VM_MODES; ++i) { - if (!guest_modes[i].enabled) - continue; - TEST_ASSERT(guest_modes[i].supported, - "Guest mode ID %d (%s) not supported.", - i, vm_guest_mode_string(i)); - run_test(i, iterations, phys_offset, wr_fract); - } + for_each_guest_mode(run_test, &p); return 0; } diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 471baecb7772a..bb2752d78fe3a 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -9,8 +9,6 @@ #include #include -#include -#include #include #include #include @@ -20,8 +18,9 @@ #include #include -#include "test_util.h" #include "kvm_util.h" +#include "test_util.h" +#include "guest_modes.h" #include "processor.h" #define VCPU_ID 1 @@ -673,9 +672,15 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, #define DIRTY_MEM_BITS 30 /* 1G */ #define PAGE_SHIFT_4K 12 -static void run_test(enum vm_guest_mode mode, unsigned long iterations, - unsigned long interval, uint64_t phys_offset) +struct test_params { + unsigned long iterations; + unsigned long interval; + uint64_t phys_offset; +}; + +static void run_test(enum vm_guest_mode mode, void *arg) { + struct test_params *p = arg; struct kvm_vm *vm; unsigned long *bmap; @@ -709,12 +714,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, host_page_size = getpagesize(); host_num_pages = vm_num_host_pages(mode, guest_num_pages); - if (!phys_offset) { + if (!p->phys_offset) { guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * guest_page_size; guest_test_phys_mem &= ~(host_page_size - 1); } else { - guest_test_phys_mem = phys_offset; + guest_test_phys_mem = p->phys_offset; } #ifdef __s390x__ @@ -758,9 +763,9 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, pthread_create(&vcpu_thread, NULL, vcpu_worker, vm); - while (iteration < iterations) { + while (iteration < p->iterations) { /* Give the vcpu thread some time to dirty some pages */ - usleep(interval * 1000); + usleep(p->interval * 1000); log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX, bmap, host_num_pages); vm_dirty_log_verify(mode, bmap); @@ -783,20 +788,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, kvm_vm_free(vm); } -struct guest_mode { - bool supported; - bool enabled; -}; -static struct guest_mode guest_modes[NUM_VM_MODES]; - -#define guest_mode_init(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ -}) - static void help(char *name) { - int i; - puts(""); printf("usage: %s [-h] [-i iterations] [-I interval] " "[-p offset] [-m mode]\n", name); @@ -813,51 +806,23 @@ static void help(char *name) printf(" -M: specify the host logging mode " "(default: run all log modes). Supported modes: \n\t"); log_modes_dump(); - printf(" -m: specify the guest mode ID to test " - "(default: test all supported modes)\n" - " This option may be used multiple times.\n" - " Guest mode IDs:\n"); - for (i = 0; i < NUM_VM_MODES; ++i) { - printf(" %d: %s%s\n", i, vm_guest_mode_string(i), - guest_modes[i].supported ? " (supported)" : ""); - } + guest_modes_help(); puts(""); exit(0); } int main(int argc, char *argv[]) { - unsigned long iterations = TEST_HOST_LOOP_N; - unsigned long interval = TEST_HOST_LOOP_INTERVAL; - bool mode_selected = false; - uint64_t phys_offset = 0; - unsigned int mode; - int opt, i, j; + struct test_params p = { + .iterations = TEST_HOST_LOOP_N, + .interval = TEST_HOST_LOOP_INTERVAL, + }; + int opt, i; sem_init(&dirty_ring_vcpu_stop, 0, 0); sem_init(&dirty_ring_vcpu_cont, 0, 0); -#ifdef __x86_64__ - guest_mode_init(VM_MODE_PXXV48_4K, true, true); -#endif -#ifdef __aarch64__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); - guest_mode_init(VM_MODE_P40V48_64K, true, true); - - { - unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - - if (limit >= 52) - guest_mode_init(VM_MODE_P52V48_64K, true, true); - if (limit >= 48) { - guest_mode_init(VM_MODE_P48V48_4K, true, true); - guest_mode_init(VM_MODE_P48V48_64K, true, true); - } - } -#endif -#ifdef __s390x__ - guest_mode_init(VM_MODE_P40V48_4K, true, true); -#endif + guest_modes_append_default(); while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) { switch (opt) { @@ -865,24 +830,16 @@ int main(int argc, char *argv[]) test_dirty_ring_count = strtol(optarg, NULL, 10); break; case 'i': - iterations = strtol(optarg, NULL, 10); + p.iterations = strtol(optarg, NULL, 10); break; case 'I': - interval = strtol(optarg, NULL, 10); + p.interval = strtol(optarg, NULL, 10); break; case 'p': - phys_offset = strtoull(optarg, NULL, 0); + p.phys_offset = strtoull(optarg, NULL, 0); break; case 'm': - if (!mode_selected) { - for (i = 0; i < NUM_VM_MODES; ++i) - guest_modes[i].enabled = false; - mode_selected = true; - } - mode = strtoul(optarg, NULL, 10); - TEST_ASSERT(mode < NUM_VM_MODES, - "Guest mode ID %d too big", mode); - guest_modes[mode].enabled = true; + guest_modes_cmdline(optarg); break; case 'M': if (!strcmp(optarg, "all")) { @@ -911,32 +868,24 @@ int main(int argc, char *argv[]) } } - TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); - TEST_ASSERT(interval > 0, "Interval must be greater than zero"); + TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(p.interval > 0, "Interval must be greater than zero"); pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", - iterations, interval); + p.iterations, p.interval); srandom(time(0)); - for (i = 0; i < NUM_VM_MODES; ++i) { - if (!guest_modes[i].enabled) - continue; - TEST_ASSERT(guest_modes[i].supported, - "Guest mode ID %d (%s) not supported.", - i, vm_guest_mode_string(i)); - if (host_log_mode_option == LOG_MODE_ALL) { - /* Run each log mode */ - for (j = 0; j < LOG_MODE_NUM; j++) { - pr_info("Testing Log Mode '%s'\n", - log_modes[j].name); - host_log_mode = j; - run_test(i, iterations, interval, phys_offset); - } - } else { - host_log_mode = host_log_mode_option; - run_test(i, iterations, interval, phys_offset); + if (host_log_mode_option == LOG_MODE_ALL) { + /* Run each log mode */ + for (i = 0; i < LOG_MODE_NUM; i++) { + pr_info("Testing Log Mode '%s'\n", log_modes[i].name); + host_log_mode = i; + for_each_guest_mode(run_test, &p); } + } else { + host_log_mode = host_log_mode_option; + for_each_guest_mode(run_test, &p); } return 0; diff --git a/tools/testing/selftests/kvm/include/guest_modes.h b/tools/testing/selftests/kvm/include/guest_modes.h new file mode 100644 index 0000000000000..b691df33e64e1 --- /dev/null +++ b/tools/testing/selftests/kvm/include/guest_modes.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include "kvm_util.h" + +struct guest_mode { + bool supported; + bool enabled; +}; + +extern struct guest_mode guest_modes[NUM_VM_MODES]; + +#define guest_mode_append(mode, supported, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ +}) + +void guest_modes_append_default(void); +void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg); +void guest_modes_help(void); +void guest_modes_cmdline(const char *arg); diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c new file mode 100644 index 0000000000000..25bff307c71f2 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include "guest_modes.h" + +struct guest_mode guest_modes[NUM_VM_MODES]; + +void guest_modes_append_default(void) +{ + guest_mode_append(VM_MODE_DEFAULT, true, true); + +#ifdef __aarch64__ + guest_mode_append(VM_MODE_P40V48_64K, true, true); + { + unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + if (limit >= 52) + guest_mode_append(VM_MODE_P52V48_64K, true, true); + if (limit >= 48) { + guest_mode_append(VM_MODE_P48V48_4K, true, true); + guest_mode_append(VM_MODE_P48V48_64K, true, true); + } + } +#endif +} + +void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg) +{ + int i; + + for (i = 0; i < NUM_VM_MODES; ++i) { + if (!guest_modes[i].enabled) + continue; + TEST_ASSERT(guest_modes[i].supported, + "Guest mode ID %d (%s) not supported.", + i, vm_guest_mode_string(i)); + func(i, arg); + } +} + +void guest_modes_help(void) +{ + int i; + + printf(" -m: specify the guest mode ID to test\n" + " (default: test all supported modes)\n" + " This option may be used multiple times.\n" + " Guest mode IDs:\n"); + for (i = 0; i < NUM_VM_MODES; ++i) { + printf(" %d: %s%s\n", i, vm_guest_mode_string(i), + guest_modes[i].supported ? " (supported)" : ""); + } +} + +void guest_modes_cmdline(const char *arg) +{ + static bool mode_selected; + unsigned int mode; + int i; + + if (!mode_selected) { + for (i = 0; i < NUM_VM_MODES; ++i) + guest_modes[i].enabled = false; + mode_selected = true; + } + + mode = strtoul(optarg, NULL, 10); + TEST_ASSERT(mode < NUM_VM_MODES, "Guest mode ID %d too big", mode); + guest_modes[mode].enabled = true; +} -- GitLab From 1133e17ea7c9929ff7b90e81d8926f9e870748e9 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 18 Dec 2020 15:17:33 +0100 Subject: [PATCH 0350/2012] KVM: selftests: Use vm_create_with_vcpus in create_vm Reviewed-by: Ben Gardon Signed-off-by: Andrew Jones Message-Id: <20201218141734.54359-3-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- .../selftests/kvm/demand_paging_test.c | 2 +- .../selftests/kvm/dirty_log_perf_test.c | 2 - .../testing/selftests/kvm/include/kvm_util.h | 8 ++++ .../selftests/kvm/include/perf_test_util.h | 47 +++++-------------- tools/testing/selftests/kvm/lib/kvm_util.c | 9 +--- 5 files changed, 21 insertions(+), 47 deletions(-) diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 946161a9ce2d8..b0c41de32e9bb 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -7,7 +7,7 @@ * Copyright (C) 2019, Google, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name and pipe2 */ +#define _GNU_SOURCE /* for pipe2 */ #include #include diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 506741eb5d7f3..36bea75a8d6fa 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -8,8 +8,6 @@ * Copyright (C) 2020, Google, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ - #include #include #include diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index dfa9d369e8fc6..149766ecd68be 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -70,6 +70,14 @@ enum vm_guest_mode { #define vm_guest_mode_string(m) vm_guest_mode_string[m] extern const char * const vm_guest_mode_string[]; +struct vm_guest_mode_params { + unsigned int pa_bits; + unsigned int va_bits; + unsigned int page_size; + unsigned int page_shift; +}; +extern const struct vm_guest_mode_params vm_guest_mode_params[]; + enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS, VM_MEM_SRC_ANONYMOUS_THP, diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index 239421e4f6b81..cd4c258f458d3 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -13,9 +13,6 @@ #define MAX_VCPUS 512 -#define PAGE_SHIFT_4K 12 -#define PTES_PER_4K_PT 512 - #define TEST_MEM_SLOT_INDEX 1 /* Default guest test virtual memory offset */ @@ -94,41 +91,26 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, uint64_t vcpu_memory_bytes) { struct kvm_vm *vm; - uint64_t pages = DEFAULT_GUEST_PHY_PAGES; uint64_t guest_num_pages; - /* Account for a few pages per-vCPU for stacks */ - pages += DEFAULT_STACK_PGS * vcpus; - - /* - * Reserve twice the ammount of memory needed to map the test region and - * the page table / stacks region, at 4k, for page tables. Do the - * calculation with 4K page size: the smallest of all archs. (e.g., 64K - * page size guest will need even less memory for page tables). - */ - pages += (2 * pages) / PTES_PER_4K_PT; - pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / - PTES_PER_4K_PT; - pages = vm_adjust_num_guest_pages(mode, pages); - pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = vm_create(mode, pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif - - perf_test_args.vm = vm; - perf_test_args.guest_page_size = vm_get_page_size(vm); perf_test_args.host_page_size = getpagesize(); + perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; + guest_num_pages = vm_adjust_num_guest_pages(mode, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + "Guest memory size is not host page size aligned."); TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, "Guest memory size is not guest page size aligned."); - guest_num_pages = (vcpus * vcpu_memory_bytes) / - perf_test_args.guest_page_size; - guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + vm = vm_create_with_vcpus(mode, vcpus, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, + 0, guest_code, NULL); + + perf_test_args.vm = vm; /* * If there should be more memory in the guest test region than there @@ -140,18 +122,13 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, guest_num_pages, vm_get_max_gfn(vm), vcpus, vcpu_memory_bytes); - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, - "Guest memory size is not host page size aligned."); - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * perf_test_args.guest_page_size; guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); - #ifdef __s390x__ /* Align to 1M (segment size) */ guest_test_phys_mem &= ~((1 << 20) - 1); #endif - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); /* Add an extra memory slot for testing */ @@ -177,8 +154,6 @@ static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; - vm_vcpu_add_default(vm, vcpu_id, guest_code); - vcpu_args->vcpu_id = vcpu_id; vcpu_args->gva = guest_test_virt_mem + (vcpu_id * vcpu_memory_bytes); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 88ef7067f1e66..fa5a90e6c6f07 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -153,14 +153,7 @@ const char * const vm_guest_mode_string[] = { _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); -struct vm_guest_mode_params { - unsigned int pa_bits; - unsigned int va_bits; - unsigned int page_size; - unsigned int page_shift; -}; - -static const struct vm_guest_mode_params vm_guest_mode_params[] = { +const struct vm_guest_mode_params vm_guest_mode_params[] = { { 52, 48, 0x1000, 12 }, { 52, 48, 0x10000, 16 }, { 48, 48, 0x1000, 12 }, -- GitLab From b268b6f0bd36322358accb15c45683a9e1220231 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 18 Dec 2020 15:17:34 +0100 Subject: [PATCH 0351/2012] KVM: selftests: Implement perf_test_util more conventionally It's not conventional C to put non-inline functions in header files. Create a source file for the functions instead. Also reduce the amount of globals and rename the functions to something less generic. Reviewed-by: Ben Gardon Reviewed-by: Peter Xu Signed-off-by: Andrew Jones Message-Id: <20201218141734.54359-4-drjones@redhat.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 +- .../selftests/kvm/demand_paging_test.c | 11 +- .../selftests/kvm/dirty_log_perf_test.c | 22 +-- .../testing/selftests/kvm/include/kvm_util.h | 1 + .../selftests/kvm/include/perf_test_util.h | 142 ++---------------- .../selftests/kvm/lib/perf_test_util.c | 134 +++++++++++++++++ 6 files changed, 166 insertions(+), 146 deletions(-) create mode 100644 tools/testing/selftests/kvm/lib/perf_test_util.c diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a7286a08c3ae9..fe41c6a0fa67d 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -33,7 +33,7 @@ ifeq ($(ARCH),s390) UNAME_M := s390x endif -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index b0c41de32e9bb..cdad1eca72f74 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -36,12 +36,14 @@ #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) #endif +static int nr_vcpus = 1; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static char *guest_data_prototype; static void *vcpu_worker(void *data) { int ret; - struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; @@ -263,7 +265,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) int vcpu_id; int r; - vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size); perf_test_args.wr_fract = 1; @@ -275,7 +277,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size); if (p->use_uffd) { uffd_handler_threads = @@ -359,8 +361,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) perf_test_args.vcpu_args[0].pages * nr_vcpus / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); - ucall_uninit(vm); - kvm_vm_free(vm); + perf_test_destroy_vm(vm); free(guest_data_prototype); free(vcpu_threads); diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 36bea75a8d6fa..2283a0ec74a97 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -22,11 +22,14 @@ /* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ #define TEST_HOST_LOOP_N 2UL +static int nr_vcpus = 1; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + /* Host variables */ static u64 dirty_log_manual_caps; static bool host_quit; static uint64_t iteration; -static uint64_t vcpu_last_completed_iteration[MAX_VCPUS]; +static uint64_t vcpu_last_completed_iteration[KVM_MAX_VCPUS]; static void *vcpu_worker(void *data) { @@ -38,7 +41,7 @@ static void *vcpu_worker(void *data) struct timespec ts_diff; struct timespec total = (struct timespec){0}; struct timespec avg; - struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data; int vcpu_id = vcpu_args->vcpu_id; vcpu_args_set(vm, vcpu_id, 1, vcpu_id); @@ -108,7 +111,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_enable_cap cap = {}; struct timespec clear_dirty_log_total = (struct timespec){0}; - vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size); perf_test_args.wr_fract = p->wr_fract; @@ -126,7 +129,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); - add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size); sync_global_to_guest(vm, perf_test_args); @@ -152,7 +155,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Enable dirty logging */ clock_gettime(CLOCK_MONOTONIC, &start); - vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, + vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, KVM_MEM_LOG_DIRTY_PAGES); ts_diff = timespec_diff_now(start); pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", @@ -179,7 +182,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) iteration, ts_diff.tv_sec, ts_diff.tv_nsec); clock_gettime(CLOCK_MONOTONIC, &start); - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + kvm_vm_get_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap); ts_diff = timespec_diff_now(start); get_dirty_log_total = timespec_add(get_dirty_log_total, @@ -189,7 +192,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) if (dirty_log_manual_caps) { clock_gettime(CLOCK_MONOTONIC, &start); - kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, + kvm_vm_clear_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap, 0, host_num_pages); ts_diff = timespec_diff_now(start); @@ -207,7 +210,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Disable dirty logging */ clock_gettime(CLOCK_MONOTONIC, &start); - vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0); + vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, 0); ts_diff = timespec_diff_now(start); pr_info("Disabling dirty logging time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); @@ -226,8 +229,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) free(bmap); free(vcpu_threads); - ucall_uninit(vm); - kvm_vm_free(vm); + perf_test_destroy_vm(vm); } static void help(char *name) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 149766ecd68be..5cbb861525edf 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -16,6 +16,7 @@ #include "sparsebit.h" +#define KVM_MAX_VCPUS 512 /* * Callers of kvm_util only have an incomplete/opaque description of the diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index cd4c258f458d3..b1188823c31b7 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -9,35 +9,15 @@ #define SELFTEST_KVM_PERF_TEST_UTIL_H #include "kvm_util.h" -#include "processor.h" - -#define MAX_VCPUS 512 - -#define TEST_MEM_SLOT_INDEX 1 /* Default guest test virtual memory offset */ #define DEFAULT_GUEST_TEST_MEM 0xc0000000 #define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ -/* - * Guest physical memory offset of the testing memory slot. - * This will be set to the topmost valid physical address minus - * the test memory size. - */ -static uint64_t guest_test_phys_mem; - -/* - * Guest virtual memory offset of the testing memory slot. - * Must not conflict with identity mapped test code. - */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; -static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; - -/* Number of VCPUs for the test */ -static int nr_vcpus = 1; +#define PERF_TEST_MEM_SLOT_INDEX 1 -struct vcpu_args { +struct perf_test_vcpu_args { uint64_t gva; uint64_t pages; @@ -51,119 +31,21 @@ struct perf_test_args { uint64_t guest_page_size; int wr_fract; - struct vcpu_args vcpu_args[MAX_VCPUS]; + struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS]; }; -static struct perf_test_args perf_test_args; +extern struct perf_test_args perf_test_args; /* - * Continuously write to the first 8 bytes of each page in the - * specified region. + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. */ -static void guest_code(uint32_t vcpu_id) -{ - struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; - uint64_t gva; - uint64_t pages; - int i; - - /* Make sure vCPU args data structure is not corrupt. */ - GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); - - gva = vcpu_args->gva; - pages = vcpu_args->pages; - - while (true) { - for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * perf_test_args.guest_page_size); - - if (i % perf_test_args.wr_fract == 0) - *(uint64_t *)addr = 0x0123456789ABCDEF; - else - READ_ONCE(*(uint64_t *)addr); - } - - GUEST_SYNC(1); - } -} - -static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, - uint64_t vcpu_memory_bytes) -{ - struct kvm_vm *vm; - uint64_t guest_num_pages; - - pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - - perf_test_args.host_page_size = getpagesize(); - perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; - - guest_num_pages = vm_adjust_num_guest_pages(mode, - (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); - - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, - "Guest memory size is not host page size aligned."); - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, - "Guest memory size is not guest page size aligned."); - - vm = vm_create_with_vcpus(mode, vcpus, - (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, - 0, guest_code, NULL); - - perf_test_args.vm = vm; - - /* - * If there should be more memory in the guest test region than there - * can be pages in the guest, it will definitely cause problems. - */ - TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), - "Requested more guest memory than address space allows.\n" - " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", - guest_num_pages, vm_get_max_gfn(vm), vcpus, - vcpu_memory_bytes); - - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * - perf_test_args.guest_page_size; - guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); -#ifdef __s390x__ - /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); -#endif - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - - /* Add an extra memory slot for testing */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - guest_test_phys_mem, - TEST_MEM_SLOT_INDEX, - guest_num_pages, 0); - - /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); - - ucall_init(vm, NULL); - - return vm; -} - -static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) -{ - vm_paddr_t vcpu_gpa; - struct vcpu_args *vcpu_args; - int vcpu_id; - - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; - - vcpu_args->vcpu_id = vcpu_id; - vcpu_args->gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); - vcpu_args->pages = vcpu_memory_bytes / - perf_test_args.guest_page_size; +extern uint64_t guest_test_phys_mem; - vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); - pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); - } -} +struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes); +void perf_test_destroy_vm(struct kvm_vm *vm); +void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes); #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c new file mode 100644 index 0000000000000..9be1944c2d1c9 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Google LLC. + */ + +#include "kvm_util.h" +#include "perf_test_util.h" +#include "processor.h" + +struct perf_test_args perf_test_args; + +uint64_t guest_test_phys_mem; + +/* + * Guest virtual memory offset of the testing memory slot. + * Must not conflict with identity mapped test code. + */ +static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; + +/* + * Continuously write to the first 8 bytes of each page in the + * specified region. + */ +static void guest_code(uint32_t vcpu_id) +{ + struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + uint64_t gva; + uint64_t pages; + int i; + + /* Make sure vCPU args data structure is not corrupt. */ + GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); + + gva = vcpu_args->gva; + pages = vcpu_args->pages; + + while (true) { + for (i = 0; i < pages; i++) { + uint64_t addr = gva + (i * perf_test_args.guest_page_size); + + if (i % perf_test_args.wr_fract == 0) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); + } + + GUEST_SYNC(1); + } +} + +struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes) +{ + struct kvm_vm *vm; + uint64_t guest_num_pages; + + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + + perf_test_args.host_page_size = getpagesize(); + perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; + + guest_num_pages = vm_adjust_num_guest_pages(mode, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + "Guest memory size is not host page size aligned."); + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, + "Guest memory size is not guest page size aligned."); + + vm = vm_create_with_vcpus(mode, vcpus, + (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, + 0, guest_code, NULL); + + perf_test_args.vm = vm; + + /* + * If there should be more memory in the guest test region than there + * can be pages in the guest, it will definitely cause problems. + */ + TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), + "Requested more guest memory than address space allows.\n" + " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", + guest_num_pages, vm_get_max_gfn(vm), vcpus, + vcpu_memory_bytes); + + guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + perf_test_args.guest_page_size; + guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); +#ifdef __s390x__ + /* Align to 1M (segment size) */ + guest_test_phys_mem &= ~((1 << 20) - 1); +#endif + pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); + + /* Add an extra memory slot for testing */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + guest_test_phys_mem, + PERF_TEST_MEM_SLOT_INDEX, + guest_num_pages, 0); + + /* Do mapping for the demand paging memory slot */ + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + + ucall_init(vm, NULL); + + return vm; +} + +void perf_test_destroy_vm(struct kvm_vm *vm) +{ + ucall_uninit(vm); + kvm_vm_free(vm); +} + +void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) +{ + vm_paddr_t vcpu_gpa; + struct perf_test_vcpu_args *vcpu_args; + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + + vcpu_args->vcpu_id = vcpu_id; + vcpu_args->gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args->pages = vcpu_memory_bytes / + perf_test_args.guest_page_size; + + vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + } +} -- GitLab From 2f80d502d627f30257ba7e3655e71c373b7d1a5a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 22 Dec 2020 05:20:43 -0500 Subject: [PATCH 0352/2012] KVM: x86: fix shift out of bounds reported by UBSAN Since we know that e >= s, we can reassociate the left shift, changing the shifted number from 1 to 2 in exchange for decreasing the right hand side by 1. Reported-by: syzbot+e87846c48bf72bc85311@syzkaller.appspotmail.com Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 9c4a9c8e43d90..581925e476d6c 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -49,7 +49,7 @@ static inline u64 rsvd_bits(int s, int e) if (e < s) return 0; - return ((1ULL << (e - s + 1)) - 1) << s; + return ((2ULL << (e - s)) - 1) << s; } void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask); -- GitLab From 7f0c1f1a8277de906a242a6ef907476149f006de Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 6 Jan 2021 10:29:16 -0800 Subject: [PATCH 0353/2012] MAINTAINERS: Really update email address for Sean Christopherson Use my @google.com address in MAINTAINERS, somehow only the .mailmap entry was added when the original update patch was applied. Fixes: c2b1209d852f ("MAINTAINERS: Update email address for Sean Christopherson") Cc: kvm@vger.kernel.org Reported-by: Nathan Chancellor Signed-off-by: Sean Christopherson Message-Id: <20210106182916.331743-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 281de213ef478..9d8b77332ae38 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9672,7 +9672,7 @@ F: tools/testing/selftests/kvm/s390x/ KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86) M: Paolo Bonzini -R: Sean Christopherson +R: Sean Christopherson R: Vitaly Kuznetsov R: Wanpeng Li R: Jim Mattson -- GitLab From de7860c8a388e4cb757c7da26889b9e2641ffcfe Mon Sep 17 00:00:00 2001 From: Stephen Zhang Date: Fri, 18 Dec 2020 15:51:37 +0800 Subject: [PATCH 0354/2012] KVM: x86: change in pv_eoi_get_pending() to make code more readable Signed-off-by: Stephen Zhang Message-Id: <1608277897-1932-1-git-send-email-stephenzhangzsd@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3136e05831cf3..78823227c5929 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -674,7 +674,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) (unsigned long long)vcpu->arch.pv_eoi.msr_val); return false; } - return val & 0x1; + return val & KVM_PV_EOI_ENABLED; } static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) -- GitLab From 88bf56d04bc3564542049ec4ec168a8b60d0b48c Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 17 Dec 2020 23:41:18 +0800 Subject: [PATCH 0355/2012] kvm: check tlbs_dirty directly In kvm_mmu_notifier_invalidate_range_start(), tlbs_dirty is used as: need_tlb_flush |= kvm->tlbs_dirty; with need_tlb_flush's type being int and tlbs_dirty's type being long. It means that tlbs_dirty is always used as int and the higher 32 bits is useless. We need to check tlbs_dirty in a correct way and this change checks it directly without propagating it to need_tlb_flush. Note: it's _extremely_ unlikely this neglecting of higher 32 bits can cause problems in practice. It would require encountering tlbs_dirty on a 4 billion count boundary, and KVM would need to be using shadow paging or be running a nested guest. Cc: stable@vger.kernel.org Fixes: a4ee1ca4a36e ("KVM: MMU: delay flush all tlbs on sync_page path") Signed-off-by: Lai Jiangshan Message-Id: <20201217154118.16497-1-jiangshanlai@gmail.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3abcb2ce5b7db..19dae28904f71 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -485,9 +485,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, kvm->mmu_notifier_count++; need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end, range->flags); - need_tlb_flush |= kvm->tlbs_dirty; /* we've to flush the tlb before the pages can be freed */ - if (need_tlb_flush) + if (need_tlb_flush || kvm->tlbs_dirty) kvm_flush_remote_tlbs(kvm); spin_unlock(&kvm->mmu_lock); -- GitLab From a889ea54b3daa63ee1463dc19ed699407d61458b Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Wed, 6 Jan 2021 16:19:34 -0800 Subject: [PATCH 0356/2012] KVM: x86/mmu: Ensure TDP MMU roots are freed after yield Many TDP MMU functions which need to perform some action on all TDP MMU roots hold a reference on that root so that they can safely drop the MMU lock in order to yield to other threads. However, when releasing the reference on the root, there is a bug: the root will not be freed even if its reference count (root_count) is reduced to 0. To simplify acquiring and releasing references on TDP MMU root pages, and to ensure that these roots are properly freed, move the get/put operations into another TDP MMU root iterator macro. Moving the get/put operations into an iterator macro also helps simplify control flow when a root does need to be freed. Note that using the list_for_each_entry_safe macro would not have been appropriate in this situation because it could keep a pointer to the next root across an MMU lock release + reacquire, during which time that root could be freed. Reported-by: Maciej S. Szmigiero Suggested-by: Paolo Bonzini Fixes: faaf05b00aec ("kvm: x86/mmu: Support zapping SPTEs in the TDP MMU") Fixes: 063afacd8730 ("kvm: x86/mmu: Support invalidate range MMU notifier for TDP MMU") Fixes: a6a0b05da9f3 ("kvm: x86/mmu: Support dirty logging for the TDP MMU") Fixes: 14881998566d ("kvm: x86/mmu: Support disabling dirty logging for the tdp MMU") Signed-off-by: Ben Gardon Message-Id: <20210107001935.3732070-1-bgardon@google.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 104 +++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 56 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 6574af2d09944..2ef8615f9dba8 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -44,7 +44,48 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); } -#define for_each_tdp_mmu_root(_kvm, _root) \ +static void tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root) +{ + if (kvm_mmu_put_root(kvm, root)) + kvm_tdp_mmu_free_root(kvm, root); +} + +static inline bool tdp_mmu_next_root_valid(struct kvm *kvm, + struct kvm_mmu_page *root) +{ + lockdep_assert_held(&kvm->mmu_lock); + + if (list_entry_is_head(root, &kvm->arch.tdp_mmu_roots, link)) + return false; + + kvm_mmu_get_root(kvm, root); + return true; + +} + +static inline struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm, + struct kvm_mmu_page *root) +{ + struct kvm_mmu_page *next_root; + + next_root = list_next_entry(root, link); + tdp_mmu_put_root(kvm, root); + return next_root; +} + +/* + * Note: this iterator gets and puts references to the roots it iterates over. + * This makes it safe to release the MMU lock and yield within the loop, but + * if exiting the loop early, the caller must drop the reference to the most + * recent root. (Unless keeping a live reference is desirable.) + */ +#define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \ + for (_root = list_first_entry(&_kvm->arch.tdp_mmu_roots, \ + typeof(*_root), link); \ + tdp_mmu_next_root_valid(_kvm, _root); \ + _root = tdp_mmu_next_root(_kvm, _root)) + +#define for_each_tdp_mmu_root(_kvm, _root) \ list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) @@ -447,18 +488,9 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end) struct kvm_mmu_page *root; bool flush = false; - for_each_tdp_mmu_root(kvm, root) { - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - + for_each_tdp_mmu_root_yield_safe(kvm, root) flush |= zap_gfn_range(kvm, root, start, end, true); - kvm_mmu_put_root(kvm, root); - } - return flush; } @@ -619,13 +651,7 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start, int ret = 0; int as_id; - for_each_tdp_mmu_root(kvm, root) { - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - + for_each_tdp_mmu_root_yield_safe(kvm, root) { as_id = kvm_mmu_page_as_id(root); slots = __kvm_memslots(kvm, as_id); kvm_for_each_memslot(memslot, slots) { @@ -647,8 +673,6 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start, ret |= handler(kvm, memslot, root, gfn_start, gfn_end, data); } - - kvm_mmu_put_root(kvm, root); } return ret; @@ -838,21 +862,13 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot, int root_as_id; bool spte_set = false; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages, min_level); - - kvm_mmu_put_root(kvm, root); } return spte_set; @@ -906,21 +922,13 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot) int root_as_id; bool spte_set = false; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); - - kvm_mmu_put_root(kvm, root); } return spte_set; @@ -1029,21 +1037,13 @@ bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot) int root_as_id; bool spte_set = false; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); - - kvm_mmu_put_root(kvm, root); } return spte_set; } @@ -1089,21 +1089,13 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, struct kvm_mmu_page *root; int root_as_id; - for_each_tdp_mmu_root(kvm, root) { + for_each_tdp_mmu_root_yield_safe(kvm, root) { root_as_id = kvm_mmu_page_as_id(root); if (root_as_id != slot->as_id) continue; - /* - * Take a reference on the root so that it cannot be freed if - * this thread releases the MMU lock and yields in this loop. - */ - kvm_mmu_get_root(kvm, root); - zap_collapsible_spte_range(kvm, root, slot->base_gfn, slot->base_gfn + slot->npages); - - kvm_mmu_put_root(kvm, root); } } -- GitLab From c0dba6e46825716db15c4b3a8f05c85b4a59edda Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Wed, 6 Jan 2021 16:19:35 -0800 Subject: [PATCH 0357/2012] KVM: x86/mmu: Clarify TDP MMU page list invariants The tdp_mmu_roots and tdp_mmu_pages in struct kvm_arch should only contain pages with tdp_mmu_page set to true. tdp_mmu_pages should not contain any pages with a non-zero root_count and tdp_mmu_roots should only contain pages with a positive root_count, unless a thread holds the MMU lock and is in the process of modifying the list. Various functions expect these invariants to be maintained, but they are not explictily documented. Add to the comments on both fields to document the above invariants. Signed-off-by: Ben Gardon Message-Id: <20210107001935.3732070-2-bgardon@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3ab7b46087b74..afed3da3b3a09 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1010,9 +1010,21 @@ struct kvm_arch { */ bool tdp_mmu_enabled; - /* List of struct tdp_mmu_pages being used as roots */ + /* + * List of struct kvmp_mmu_pages being used as roots. + * All struct kvm_mmu_pages in the list should have + * tdp_mmu_page set. + * All struct kvm_mmu_pages in the list should have a positive + * root_count except when a thread holds the MMU lock and is removing + * an entry from the list. + */ struct list_head tdp_mmu_roots; - /* List of struct tdp_mmu_pages not being used as roots */ + + /* + * List of struct kvmp_mmu_pages not being used as roots. + * All struct kvm_mmu_pages in the list should have + * tdp_mmu_page set and a root_count of 0. + */ struct list_head tdp_mmu_pages; }; -- GitLab From 81f76adad560dfc39cb9625cf1e00a7e2b7b88df Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 7 Jan 2021 11:38:52 +0200 Subject: [PATCH 0358/2012] KVM: nSVM: correctly restore nested_run_pending on migration The code to store it on the migration exists, but no code was restoring it. One of the side effects of fixing this is that L1->L2 injected events are no longer lost when migration happens with nested run pending. Signed-off-by: Maxim Levitsky Message-Id: <20210107093854.882483-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index b0b667456b2e7..a466336aab43a 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1194,6 +1194,10 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, * in the registers, the save area of the nested state instead * contains saved L1 state. */ + + svm->nested.nested_run_pending = + !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); + copy_vmcb_control_area(&hsave->control, &svm->vmcb->control); hsave->save = *save; -- GitLab From 56fe28de8c4f0167275c411c0daa5709e9a47bd7 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 7 Jan 2021 11:38:54 +0200 Subject: [PATCH 0359/2012] KVM: nSVM: mark vmcb as dirty when forcingly leaving the guest mode We overwrite most of vmcb fields while doing so, so we must mark it as dirty. Signed-off-by: Maxim Levitsky Message-Id: <20210107093854.882483-5-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a466336aab43a..a622e63739b4a 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -754,6 +754,7 @@ void svm_leave_nested(struct vcpu_svm *svm) leave_guest_mode(&svm->vcpu); copy_vmcb_control_area(&vmcb->control, &hsave->control); nested_svm_uninit_mmu_context(&svm->vcpu); + vmcb_mark_all_dirty(svm->vmcb); } kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu); -- GitLab From f2c7ef3ba9556d62a7e2bb23b563c6510007d55c Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 7 Jan 2021 11:38:51 +0200 Subject: [PATCH 0360/2012] KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES on nested vmexit It is possible to exit the nested guest mode, entered by svm_set_nested_state prior to first vm entry to it (e.g due to pending event) if the nested run was not pending during the migration. In this case we must not switch to the nested msr permission bitmap. Also add a warning to catch similar cases in the future. Fixes: a7d5c7ce41ac1 ("KVM: nSVM: delay MSR permission processing to first nested VM run") Signed-off-by: Maxim Levitsky Message-Id: <20210107093854.882483-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 3 +++ arch/x86/kvm/vmx/nested.c | 2 ++ arch/x86/kvm/x86.c | 4 +++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a622e63739b4a..cb4c6ee10029c 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -199,6 +199,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + if (!nested_svm_vmrun_msrpm(svm)) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = @@ -595,6 +596,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm) svm->nested.vmcb12_gpa = 0; WARN_ON_ONCE(svm->nested.nested_run_pending); + kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu); + /* in case we halted in L2 */ svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index e2f26564a12de..0fbb46990dfce 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4442,6 +4442,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, /* trying to cancel vmlaunch/vmresume is a bug */ WARN_ON_ONCE(vmx->nested.nested_run_pending); + kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); + /* Service the TLB flush request for L2 before switching to L1. */ if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) kvm_vcpu_flush_tlb_current(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e28ab76b80dc9..f6e7b25c40e23 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8789,7 +8789,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { - if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { + if (WARN_ON_ONCE(!is_guest_mode(vcpu))) + ; + else if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { r = 0; goto out; } -- GitLab From 647daca25d24fb6eadc7b6cd680ad3e6eed0f3d5 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 4 Jan 2021 14:20:01 -0600 Subject: [PATCH 0361/2012] KVM: SVM: Add support for booting APs in an SEV-ES guest Typically under KVM, an AP is booted using the INIT-SIPI-SIPI sequence, where the guest vCPU register state is updated and then the vCPU is VMRUN to begin execution of the AP. For an SEV-ES guest, this won't work because the guest register state is encrypted. Following the GHCB specification, the hypervisor must not alter the guest register state, so KVM must track an AP/vCPU boot. Should the guest want to park the AP, it must use the AP Reset Hold exit event in place of, for example, a HLT loop. First AP boot (first INIT-SIPI-SIPI sequence): Execute the AP (vCPU) as it was initialized and measured by the SEV-ES support. It is up to the guest to transfer control of the AP to the proper location. Subsequent AP boot: KVM will expect to receive an AP Reset Hold exit event indicating that the vCPU is being parked and will require an INIT-SIPI-SIPI sequence to awaken it. When the AP Reset Hold exit event is received, KVM will place the vCPU into a simulated HLT mode. Upon receiving the INIT-SIPI-SIPI sequence, KVM will make the vCPU runnable. It is again up to the guest to then transfer control of the AP to the proper location. To differentiate between an actual HLT and an AP Reset Hold, a new MP state is introduced, KVM_MP_STATE_AP_RESET_HOLD, which the vCPU is placed in upon receiving the AP Reset Hold exit event. Additionally, to communicate the AP Reset Hold exit event up to userspace (if needed), a new exit reason is introduced, KVM_EXIT_AP_RESET_HOLD. A new x86 ops function is introduced, vcpu_deliver_sipi_vector, in order to accomplish AP booting. For VMX, vcpu_deliver_sipi_vector is set to the original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(). SVM adds a new function that, for non SEV-ES guests, invokes the original SIPI delivery function, kvm_vcpu_deliver_sipi_vector(), but for SEV-ES guests, implements the logic above. Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/lapic.c | 2 +- arch/x86/kvm/svm/sev.c | 22 ++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 10 ++++++++++ arch/x86/kvm/svm/svm.h | 2 ++ arch/x86/kvm/vmx/vmx.c | 2 ++ arch/x86/kvm/x86.c | 26 +++++++++++++++++++++----- include/uapi/linux/kvm.h | 2 ++ 8 files changed, 63 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index afed3da3b3a09..3d6616f6f6ef8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1299,6 +1299,8 @@ struct kvm_x86_ops { void (*migrate_timers)(struct kvm_vcpu *vcpu); void (*msr_filter_changed)(struct kvm_vcpu *vcpu); int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); + + void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); }; struct kvm_x86_nested_ops { @@ -1480,6 +1482,7 @@ int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int kvm_vcpu_halt(struct kvm_vcpu *vcpu); +int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 78823227c5929..43cceadd073ed 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2898,7 +2898,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) /* evaluate pending_events before reading the vector */ smp_rmb(); sipi_vector = apic->sipi_vector; - kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); + kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, sipi_vector); vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; } } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 563ced07b0b85..c8ffdbc81709e 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1563,6 +1563,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) goto vmgexit_err; break; case SVM_VMGEXIT_NMI_COMPLETE: + case SVM_VMGEXIT_AP_HLT_LOOP: case SVM_VMGEXIT_AP_JUMP_TABLE: case SVM_VMGEXIT_UNSUPPORTED_EVENT: break; @@ -1888,6 +1889,9 @@ int sev_handle_vmgexit(struct vcpu_svm *svm) case SVM_VMGEXIT_NMI_COMPLETE: ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET); break; + case SVM_VMGEXIT_AP_HLT_LOOP: + ret = kvm_emulate_ap_reset_hold(&svm->vcpu); + break; case SVM_VMGEXIT_AP_JUMP_TABLE: { struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; @@ -2040,3 +2044,21 @@ void sev_es_vcpu_put(struct vcpu_svm *svm) wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]); } } + +void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + /* First SIPI: Use the values as initially set by the VMM */ + if (!svm->received_first_sipi) { + svm->received_first_sipi = true; + return; + } + + /* + * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where + * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a + * non-zero value. + */ + ghcb_set_sw_exit_info_2(svm->ghcb, 1); +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 6824d611dc5dd..7ef171790d02b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4382,6 +4382,14 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu) (vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT)); } +static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) +{ + if (!sev_es_guest(vcpu->kvm)) + return kvm_vcpu_deliver_sipi_vector(vcpu, vector); + + sev_vcpu_deliver_sipi_vector(vcpu, vector); +} + static void svm_vm_destroy(struct kvm *kvm) { avic_vm_destroy(kvm); @@ -4524,6 +4532,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .msr_filter_changed = svm_msr_filter_changed, .complete_emulated_msr = svm_complete_emulated_msr, + + .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 5431e6335e2e8..0fe874ae54982 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -185,6 +185,7 @@ struct vcpu_svm { struct vmcb_save_area *vmsa; struct ghcb *ghcb; struct kvm_host_map ghcb_map; + bool received_first_sipi; /* SEV-ES scratch area support */ void *ghcb_sa; @@ -591,6 +592,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm); void sev_es_create_vcpu(struct vcpu_svm *svm); void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu); void sev_es_vcpu_put(struct vcpu_svm *svm); +void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); /* vmenter.S */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 75c9c6a0a3a45..2af05d3b05909 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7707,6 +7707,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .msr_filter_changed = vmx_msr_filter_changed, .complete_emulated_msr = kvm_complete_insn_gp, .cpu_dirty_log_size = vmx_cpu_dirty_log_size, + + .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, }; static __init int hardware_setup(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f6e7b25c40e23..0287840b93e09 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7976,17 +7976,22 @@ void kvm_arch_exit(void) kmem_cache_destroy(x86_fpu_cache); } -int kvm_vcpu_halt(struct kvm_vcpu *vcpu) +int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason) { ++vcpu->stat.halt_exits; if (lapic_in_kernel(vcpu)) { - vcpu->arch.mp_state = KVM_MP_STATE_HALTED; + vcpu->arch.mp_state = state; return 1; } else { - vcpu->run->exit_reason = KVM_EXIT_HLT; + vcpu->run->exit_reason = reason; return 0; } } + +int kvm_vcpu_halt(struct kvm_vcpu *vcpu) +{ + return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT); +} EXPORT_SYMBOL_GPL(kvm_vcpu_halt); int kvm_emulate_halt(struct kvm_vcpu *vcpu) @@ -8000,6 +8005,14 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_halt); +int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu) +{ + int ret = kvm_skip_emulated_instruction(vcpu); + + return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret; +} +EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold); + #ifdef CONFIG_X86_64 static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, unsigned long clock_type) @@ -9096,6 +9109,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) kvm_apic_accept_events(vcpu); switch(vcpu->arch.mp_state) { case KVM_MP_STATE_HALTED: + case KVM_MP_STATE_AP_RESET_HOLD: vcpu->arch.pv.pv_unhalted = false; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -9522,8 +9536,9 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, kvm_load_guest_fpu(vcpu); kvm_apic_accept_events(vcpu); - if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && - vcpu->arch.pv.pv_unhalted) + if ((vcpu->arch.mp_state == KVM_MP_STATE_HALTED || + vcpu->arch.mp_state == KVM_MP_STATE_AP_RESET_HOLD) && + vcpu->arch.pv.pv_unhalted) mp_state->mp_state = KVM_MP_STATE_RUNNABLE; else mp_state->mp_state = vcpu->arch.mp_state; @@ -10154,6 +10169,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); kvm_rip_write(vcpu, 0); } +EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector); int kvm_arch_hardware_enable(void) { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 886802b8ffba3..374c67875cdbd 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -251,6 +251,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_DIRTY_RING_FULL 31 +#define KVM_EXIT_AP_RESET_HOLD 32 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -573,6 +574,7 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_CHECK_STOP 6 #define KVM_MP_STATE_OPERATING 7 #define KVM_MP_STATE_LOAD 8 +#define KVM_MP_STATE_AP_RESET_HOLD 9 struct kvm_mp_state { __u32 mp_state; -- GitLab From 491b1bea00040233b791dc8fea1608ac6a7003bc Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Fri, 8 Jan 2021 08:35:30 +0800 Subject: [PATCH 0362/2012] MAINTAINERS: update Peter Chen's email address Using kernel.org as my email address. Signed-off-by: Peter Chen --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index afe3a5c66bc90..c16a2e83e1764 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3881,7 +3881,7 @@ F: Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt F: drivers/mtd/nand/raw/cadence-nand-controller.c CADENCE USB3 DRD IP DRIVER -M: Peter Chen +M: Peter Chen M: Pawel Laszczak R: Roger Quadros R: Aswath Govindraju @@ -4163,7 +4163,7 @@ S: Maintained F: Documentation/translations/zh_CN/ CHIPIDEA USB HIGH SPEED DUAL ROLE CONTROLLER -M: Peter Chen +M: Peter Chen L: linux-usb@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git @@ -18404,7 +18404,7 @@ F: Documentation/usb/ohci.rst F: drivers/usb/host/ohci* USB OTG FSM (Finite State Machine) -M: Peter Chen +M: Peter Chen L: linux-usb@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git -- GitLab From cf7b2ae4d70432fa94ebba3fbaab825481ae7189 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Mon, 21 Dec 2020 23:52:00 +0100 Subject: [PATCH 0363/2012] riscv: return -ENOSYS for syscall -1 Properly return -ENOSYS for syscall -1 instead of leaving the return value uninitialized. This fixes the strace teststuite. Fixes: 5340627e3fe0 ("riscv: add support for SECCOMP and SECCOMP_FILTER") Cc: stable@vger.kernel.org Signed-off-by: Andreas Schwab Reviewed-by: Tycho Andersen Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 524d918f3601b..d07763001eb0c 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -186,14 +186,7 @@ check_syscall_nr: * Syscall number held in a7. * If syscall number is above allowed value, redirect to ni_syscall. */ - bge a7, t0, 1f - /* - * Check if syscall is rejected by tracer, i.e., a7 == -1. - * If yes, we pretend it was executed. - */ - li t1, -1 - beq a7, t1, ret_from_syscall_rejected - blt a7, t1, 1f + bgeu a7, t0, 1f /* Call syscall */ la s0, sys_call_table slli t0, a7, RISCV_LGPTR -- GitLab From 11f4c2e940e2f317c9d8fb5a79702f2a4a02ff98 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sun, 13 Dec 2020 22:50:34 +0900 Subject: [PATCH 0364/2012] riscv: Fix kernel time_init() If of_clk_init() is not called in time_init(), clock providers defined in the system device tree are not initialized, resulting in failures for other devices to initialize due to missing clocks. Similarly to other architectures and to the default kernel time_init() implementation, call of_clk_init() before executing timer_probe() in time_init(). Signed-off-by: Damien Le Moal Acked-by: Stephen Boyd Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/time.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 4d3a1048ad8b1..8a5cf99c07762 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -4,6 +4,7 @@ * Copyright (C) 2017 SiFive */ +#include #include #include #include @@ -24,6 +25,8 @@ void __init time_init(void) riscv_timebase = prop; lpj_fine = riscv_timebase / HZ; + + of_clk_init(NULL); timer_probe(); } -- GitLab From 1f1496a923b6ba16679074fe77100e1b53cdb880 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sun, 13 Dec 2020 22:50:35 +0900 Subject: [PATCH 0365/2012] riscv: Fix sifive serial driver Setup the port uartclk in sifive_serial_probe() so that the base baud rate is correctly printed during device probe instead of always showing "0". I.e. the probe message is changed from 38000000.serial: ttySIF0 at MMIO 0x38000000 (irq = 1, base_baud = 0) is a SiFive UART v0 to the correct: 38000000.serial: ttySIF0 at MMIO 0x38000000 (irq = 1, base_baud = 115200) is a SiFive UART v0 Signed-off-by: Damien Le Moal Reviewed-by: Palmer Dabbelt Acked-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- drivers/tty/serial/sifive.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c index 1066eebe3b28b..328d5a78792fe 100644 --- a/drivers/tty/serial/sifive.c +++ b/drivers/tty/serial/sifive.c @@ -1000,6 +1000,7 @@ static int sifive_serial_probe(struct platform_device *pdev) /* Set up clock divider */ ssp->clkin_rate = clk_get_rate(ssp->clk); ssp->baud_rate = SIFIVE_DEFAULT_BAUD_RATE; + ssp->port.uartclk = ssp->baud_rate * 16; __ssp_update_div(ssp); platform_set_drvdata(pdev, ssp); -- GitLab From 643437b996bac9267785e0bd528332e2d5811067 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sun, 13 Dec 2020 22:50:36 +0900 Subject: [PATCH 0366/2012] riscv: Enable interrupts during syscalls with M-Mode When running is M-Mode (no MMU config), MPIE does not get set. This results in all syscalls being executed with interrupts disabled as handle_exception never sets SR_IE as it always sees SR_PIE being cleared. Fix this by always force enabling interrupts in handle_syscall when CONFIG_RISCV_M_MODE is enabled. Signed-off-by: Damien Le Moal Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index d07763001eb0c..48de316c68c18 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -155,6 +155,15 @@ skip_context_tracking: tail do_trap_unknown handle_syscall: +#ifdef CONFIG_RISCV_M_MODE + /* + * When running is M-Mode (no MMU config), MPIE does not get set. + * As a result, we need to force enable interrupts here because + * handle_exception did not do set SR_IE as it always sees SR_PIE + * being cleared. + */ + csrs CSR_STATUS, SR_IE +#endif #if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING) /* Recover a0 - a7 for system calls */ REG_L a0, PT_A0(sp) -- GitLab From 0b2894cd0fdf8ccc8a9b4e28563db9ac0ecb62b2 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 4 Jan 2021 17:50:26 +0200 Subject: [PATCH 0367/2012] scsi: docs: ABI: sysfs-driver-ufs: Add DeepSleep power mode Update sysfs documentation for addition of DeepSleep power mode. Link: https://lore.kernel.org/r/20210104155026.16417-1-adrian.hunter@intel.com Fixes: fe1d4c2ebcae ("scsi: ufs: Add DeepSleep feature") Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- Documentation/ABI/testing/sysfs-driver-ufs | 34 +++++++++++++--------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs index adc0d0e916078..e77fa784d6d81 100644 --- a/Documentation/ABI/testing/sysfs-driver-ufs +++ b/Documentation/ABI/testing/sysfs-driver-ufs @@ -916,21 +916,24 @@ Date: September 2014 Contact: Subhash Jadavani Description: This entry could be used to set or show the UFS device runtime power management level. The current driver - implementation supports 6 levels with next target states: + implementation supports 7 levels with next target states: == ==================================================== - 0 an UFS device will stay active, an UIC link will + 0 UFS device will stay active, UIC link will stay active - 1 an UFS device will stay active, an UIC link will + 1 UFS device will stay active, UIC link will hibernate - 2 an UFS device will moved to sleep, an UIC link will + 2 UFS device will be moved to sleep, UIC link will stay active - 3 an UFS device will moved to sleep, an UIC link will + 3 UFS device will be moved to sleep, UIC link will hibernate - 4 an UFS device will be powered off, an UIC link will + 4 UFS device will be powered off, UIC link will hibernate - 5 an UFS device will be powered off, an UIC link will + 5 UFS device will be powered off, UIC link will be powered off + 6 UFS device will be moved to deep sleep, UIC link + will be powered off. Note, deep sleep might not be + supported in which case this value will not be accepted == ==================================================== What: /sys/bus/platform/drivers/ufshcd/*/rpm_target_dev_state @@ -954,21 +957,24 @@ Date: September 2014 Contact: Subhash Jadavani Description: This entry could be used to set or show the UFS device system power management level. The current driver - implementation supports 6 levels with next target states: + implementation supports 7 levels with next target states: == ==================================================== - 0 an UFS device will stay active, an UIC link will + 0 UFS device will stay active, UIC link will stay active - 1 an UFS device will stay active, an UIC link will + 1 UFS device will stay active, UIC link will hibernate - 2 an UFS device will moved to sleep, an UIC link will + 2 UFS device will be moved to sleep, UIC link will stay active - 3 an UFS device will moved to sleep, an UIC link will + 3 UFS device will be moved to sleep, UIC link will hibernate - 4 an UFS device will be powered off, an UIC link will + 4 UFS device will be powered off, UIC link will hibernate - 5 an UFS device will be powered off, an UIC link will + 5 UFS device will be powered off, UIC link will be powered off + 6 UFS device will be moved to deep sleep, UIC link + will be powered off. Note, deep sleep might not be + supported in which case this value will not be accepted == ==================================================== What: /sys/bus/platform/drivers/ufshcd/*/spm_target_dev_state -- GitLab From b112036535eda34460677ea883eaecc3a45a435d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Jan 2021 00:41:04 +0100 Subject: [PATCH 0368/2012] scsi: megaraid_sas: Fix MEGASAS_IOC_FIRMWARE regression Phil Oester reported that a fix for a possible buffer overrun that I sent caused a regression that manifests in this output: Event Message: A PCI parity error was detected on a component at bus 0 device 5 function 0. Severity: Critical Message ID: PCI1308 The original code tried to handle the sense data pointer differently when using 32-bit 64-bit DMA addressing, which would lead to a 32-bit dma_addr_t value of 0x11223344 to get stored 32-bit kernel: 44 33 22 11 ?? ?? ?? ?? 64-bit LE kernel: 44 33 22 11 00 00 00 00 64-bit BE kernel: 00 00 00 00 44 33 22 11 or a 64-bit dma_addr_t value of 0x1122334455667788 to get stored as 32-bit kernel: 88 77 66 55 ?? ?? ?? ?? 64-bit kernel: 88 77 66 55 44 33 22 11 In my patch, I tried to ensure that the same value is used on both 32-bit and 64-bit kernels, and picked what seemed to be the most sensible combination, storing 32-bit addresses in the first four bytes (as 32-bit kernels already did), and 64-bit addresses in eight consecutive bytes (as 64-bit kernels already did), but evidently this was incorrect. Always storing the dma_addr_t pointer as 64-bit little-endian, i.e. initializing the second four bytes to zero in case of 32-bit addressing, apparently solved the problem for Phil, and is consistent with what all 64-bit little-endian machines did before. I also checked in the history that in previous versions of the code, the pointer was always in the first four bytes without padding, and that previous attempts to fix 64-bit user space, big-endian architectures and 64-bit DMA were clearly flawed and seem to have introduced made this worse. Link: https://lore.kernel.org/r/20210104234137.438275-1-arnd@kernel.org Fixes: 381d34e376e3 ("scsi: megaraid_sas: Check user-provided offsets") Fixes: 107a60dd71b5 ("scsi: megaraid_sas: Add support for 64bit consistent DMA") Fixes: 94cd65ddf4d7 ("[SCSI] megaraid_sas: addded support for big endian architecture") Fixes: 7b2519afa1ab ("[SCSI] megaraid_sas: fix 64 bit sense pointer truncation") Reported-by: Phil Oester Tested-by: Phil Oester Signed-off-by: Arnd Bergmann Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index af192096a82b1..63a4f48bdc755 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -8244,11 +8244,9 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance, goto out; } + /* always store 64 bits regardless of addressing */ sense_ptr = (void *)cmd->frame + ioc->sense_off; - if (instance->consistent_mask_64bit) - put_unaligned_le64(sense_handle, sense_ptr); - else - put_unaligned_le32(sense_handle, sense_ptr); + put_unaligned_le64(sense_handle, sense_ptr); } /* -- GitLab From 5e6ddadf7637d336acaad1df1f3bcbb07f7d104d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 5 Jan 2021 20:08:22 -0800 Subject: [PATCH 0369/2012] scsi: ufs: ufshcd-pltfrm depends on HAS_IOMEM Building ufshcd-pltfrm.c on arch/s390/ has a linker error since S390 does not support IOMEM, so add a dependency on HAS_IOMEM. s390-linux-ld: drivers/scsi/ufs/ufshcd-pltfrm.o: in function `ufshcd_pltfrm_init': ufshcd-pltfrm.c:(.text+0x38e): undefined reference to `devm_platform_ioremap_resource' where that devm_ function is inside an #ifdef CONFIG_HAS_IOMEM/#endif block. Link: lore.kernel.org/r/202101031125.ZEFCUiKi-lkp@intel.com Link: https://lore.kernel.org/r/20210106040822.933-1-rdunlap@infradead.org Fixes: 03b1781aa978 ("[SCSI] ufs: Add Platform glue driver for ufshcd") Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: Alim Akhtar Cc: Avri Altman Cc: linux-scsi@vger.kernel.org Reported-by: kernel test robot Signed-off-by: Randy Dunlap Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig index 3f6dfed4fe84b..b915b38c2b277 100644 --- a/drivers/scsi/ufs/Kconfig +++ b/drivers/scsi/ufs/Kconfig @@ -72,6 +72,7 @@ config SCSI_UFS_DWC_TC_PCI config SCSI_UFSHCD_PLATFORM tristate "Platform bus based UFS Controller support" depends on SCSI_UFSHCD + depends on HAS_IOMEM help This selects the UFS host controller support. Select this if you have an UFS controller on Platform bus. -- GitLab From 901d01c8e50c35a182073219a38b9c6391e59144 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 6 Jan 2021 14:37:21 -0600 Subject: [PATCH 0370/2012] scsi: ibmvfc: Fix missing cast of ibmvfc_event pointer to u64 handle Commit 2aa0102c6688 ("scsi: ibmvfc: Use correlation token to tag commands") sets the vfcFrame correlation token to the pointer handle of the associated ibmvfc_event. However, that commit failed to cast the pointer to an appropriate type which in this case is a u64. As such sparse warnings are generated for both correlation token assignments. ibmvfc.c:2375:36: sparse: incorrect type in argument 1 (different base types) ibmvfc.c:2375:36: sparse: expected unsigned long long [usertype] val ibmvfc.c:2375:36: sparse: got struct ibmvfc_event *[assigned] evt Add the appropriate u64 casts when assigning an ibmvfc_event as a correlation token. Link: https://lore.kernel.org/r/20210106203721.1054693-1-tyreld@linux.ibm.com Fixes: 2aa0102c6688 ("scsi: ibmvfc: Use correlation token to tag commands") Reported-by: kernel test robot Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 42e4d35e0d355..7312f31df878c 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1744,7 +1744,7 @@ static int ibmvfc_queuecommand_lck(struct scsi_cmnd *cmnd, iu->pri_task_attr = IBMVFC_SIMPLE_TASK; } - vfc_cmd->correlation = cpu_to_be64(evt); + vfc_cmd->correlation = cpu_to_be64((u64)evt); if (likely(!(rc = ibmvfc_map_sg_data(cmnd, evt, vfc_cmd, vhost->dev)))) return ibmvfc_send_event(evt, vhost, 0); @@ -2418,7 +2418,7 @@ static int ibmvfc_abort_task_set(struct scsi_device *sdev) tmf->flags = cpu_to_be16((IBMVFC_NO_MEM_DESC | IBMVFC_TMF)); evt->sync_iu = &rsp_iu; - tmf->correlation = cpu_to_be64(evt); + tmf->correlation = cpu_to_be64((u64)evt); init_completion(&evt->comp); rsp_rc = ibmvfc_send_event(evt, vhost, default_timeout); -- GitLab From 4ee7ee530bc2bae6268247988d86722c65d02a37 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Jan 2021 10:53:15 -0800 Subject: [PATCH 0371/2012] scsi: ufs: Fix livelock of ufshcd_clear_ua_wluns() When gate_work/ungate_work experience an error during hibern8_enter or exit we can livelock: ufshcd_err_handler() ufshcd_scsi_block_requests() ufshcd_reset_and_restore() ufshcd_clear_ua_wluns() -> stuck ufshcd_scsi_unblock_requests() In order to avoid this, ufshcd_clear_ua_wluns() can be called per recovery flows such as suspend/resume, link_recovery, and error_handler. Link: https://lore.kernel.org/r/20210107185316.788815-2-jaegeuk@kernel.org Fixes: 1918651f2d7e ("scsi: ufs: Clear UAC for RPMB after ufshcd resets") Reviewed-by: Can Guo Signed-off-by: Jaegeuk Kim Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index e31d2c5c7b23b..cff4f52a91f0a 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -3996,6 +3996,8 @@ int ufshcd_link_recovery(struct ufs_hba *hba) if (ret) dev_err(hba->dev, "%s: link recovery failed, err %d", __func__, ret); + else + ufshcd_clear_ua_wluns(hba); return ret; } @@ -6001,6 +6003,9 @@ skip_err_handling: ufshcd_scsi_unblock_requests(hba); ufshcd_err_handling_unprepare(hba); up(&hba->eh_sem); + + if (!err && needs_reset) + ufshcd_clear_ua_wluns(hba); } /** @@ -6938,14 +6943,11 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) ufshcd_set_clk_freq(hba, true); err = ufshcd_hba_enable(hba); - if (err) - goto out; /* Establish the link again and restore the device */ - err = ufshcd_probe_hba(hba, false); if (!err) - ufshcd_clear_ua_wluns(hba); -out: + err = ufshcd_probe_hba(hba, false); + if (err) dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err); ufshcd_update_evt_hist(hba, UFS_EVT_HOST_RESET, (u32)err); @@ -7716,6 +7718,8 @@ static int ufshcd_add_lus(struct ufs_hba *hba) if (ret) goto out; + ufshcd_clear_ua_wluns(hba); + /* Initialize devfreq after UFS device is detected */ if (ufshcd_is_clkscaling_supported(hba)) { memcpy(&hba->clk_scaling.saved_pwr_info.info, @@ -7917,8 +7921,6 @@ out: pm_runtime_put_sync(hba->dev); ufshcd_exit_clk_scaling(hba); ufshcd_hba_exit(hba); - } else { - ufshcd_clear_ua_wluns(hba); } } @@ -8775,6 +8777,7 @@ enable_gating: ufshcd_resume_clkscaling(hba); hba->clk_gating.is_suspended = false; hba->dev_info.b_rpm_dev_flush_capable = false; + ufshcd_clear_ua_wluns(hba); ufshcd_release(hba); out: if (hba->dev_info.b_rpm_dev_flush_capable) { @@ -8885,6 +8888,8 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) cancel_delayed_work(&hba->rpm_dev_flush_recheck_work); } + ufshcd_clear_ua_wluns(hba); + /* Schedule clock gating in case of no access to UFS device yet */ ufshcd_release(hba); -- GitLab From eeb1b55b6e25c5f7265ff45cd050f3bc2cc423a4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 7 Jan 2021 10:53:16 -0800 Subject: [PATCH 0372/2012] scsi: ufs: Fix tm request when non-fatal error happens When non-fatal error like line-reset happens, ufshcd_err_handler() starts to abort tasks by ufshcd_try_to_abort_task(). When it tries to issue a task management request, we hit two warnings: WARNING: CPU: 7 PID: 7 at block/blk-core.c:630 blk_get_request+0x68/0x70 WARNING: CPU: 4 PID: 157 at block/blk-mq-tag.c:82 blk_mq_get_tag+0x438/0x46c After fixing the above warnings we hit another tm_cmd timeout which may be caused by unstable controller state: __ufshcd_issue_tm_cmd: task management cmd 0x80 timed-out Then, ufshcd_err_handler() enters full reset, and kernel gets stuck. It turned out ufshcd_print_trs() printed too many messages on console which requires CPU locks. Likewise hba->silence_err_logs, we need to avoid too verbose messages. This is actually not an error case. Link: https://lore.kernel.org/r/20210107185316.788815-3-jaegeuk@kernel.org Fixes: 69a6c269c097 ("scsi: ufs: Use blk_{get,put}_request() to allocate and free TMFs") Reviewed-by: Can Guo Signed-off-by: Jaegeuk Kim Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index cff4f52a91f0a..fb32d122f2e38 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4994,7 +4994,8 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) break; } /* end of switch */ - if ((host_byte(result) != DID_OK) && !hba->silence_err_logs) + if ((host_byte(result) != DID_OK) && + (host_byte(result) != DID_REQUEUE) && !hba->silence_err_logs) ufshcd_print_trs(hba, 1 << lrbp->task_tag, true); return result; } @@ -6300,9 +6301,13 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba) intr_status = ufshcd_readl(hba, REG_INTERRUPT_STATUS); } - if (enabled_intr_status && retval == IRQ_NONE) { - dev_err(hba->dev, "%s: Unhandled interrupt 0x%08x\n", - __func__, intr_status); + if (enabled_intr_status && retval == IRQ_NONE && + !ufshcd_eh_in_progress(hba)) { + dev_err(hba->dev, "%s: Unhandled interrupt 0x%08x (0x%08x, 0x%08x)\n", + __func__, + intr_status, + hba->ufs_stats.last_intr_status, + enabled_intr_status); ufshcd_dump_regs(hba, 0, UFSHCI_REG_SPACE_SIZE, "host_regs: "); } @@ -6346,7 +6351,10 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba, * Even though we use wait_event() which sleeps indefinitely, * the maximum wait time is bounded by %TM_CMD_TIMEOUT. */ - req = blk_get_request(q, REQ_OP_DRV_OUT, BLK_MQ_REQ_RESERVED); + req = blk_get_request(q, REQ_OP_DRV_OUT, 0); + if (IS_ERR(req)) + return PTR_ERR(req); + req->end_io_data = &wait; free_slot = req->tag; WARN_ON_ONCE(free_slot < 0 || free_slot >= hba->nutmrs); -- GitLab From 2d2f6f1b4799428d160c021dd652bc3e3593945e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Jan 2021 19:36:40 +0100 Subject: [PATCH 0373/2012] block: pre-initialize struct block_device in bdev_alloc_inode bdev_evict_inode and bdev_free_inode are also called for the root inode of bdevfs, for which bdev_alloc is never called. Move the zeroing o f struct block_device and the initialization of the bd_bdi field into bdev_alloc_inode to make sure they are initialized for the root inode as well. Fixes: e6cb53827ed6 ("block: initialize struct block_device in bdev_alloc") Reported-by: Alexey Kardashevskiy Tested-by: Alexey Kardashevskiy Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index e454c5a810436..3b8963e228a1b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -776,8 +776,11 @@ static struct kmem_cache * bdev_cachep __read_mostly; static struct inode *bdev_alloc_inode(struct super_block *sb) { struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); + if (!ei) return NULL; + memset(&ei->bdev, 0, sizeof(ei->bdev)); + ei->bdev.bd_bdi = &noop_backing_dev_info; return &ei->vfs_inode; } @@ -871,14 +874,12 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) mapping_set_gfp_mask(&inode->i_data, GFP_USER); bdev = I_BDEV(inode); - memset(bdev, 0, sizeof(*bdev)); mutex_init(&bdev->bd_mutex); mutex_init(&bdev->bd_fsfreeze_mutex); spin_lock_init(&bdev->bd_size_lock); bdev->bd_disk = disk; bdev->bd_partno = partno; bdev->bd_inode = inode; - bdev->bd_bdi = &noop_backing_dev_info; #ifdef CONFIG_SYSFS INIT_LIST_HEAD(&bdev->bd_holder_disks); #endif -- GitLab From 3c02e04fd4f57130e4fa75fab6f528f7a52db9b5 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 31 Dec 2020 00:33:18 +0300 Subject: [PATCH 0374/2012] crypto: xor - Fix divide error in do_xor_speed() crypto: Fix divide error in do_xor_speed() From: Kirill Tkhai Latest (but not only latest) linux-next panics with divide error on my QEMU setup. The patch at the bottom of this message fixes the problem. xor: measuring software checksum speed divide error: 0000 [#1] PREEMPT SMP KASAN PREEMPT SMP KASAN CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.10.0-next-20201223+ #2177 RIP: 0010:do_xor_speed+0xbb/0xf3 Code: 41 ff cc 75 b5 bf 01 00 00 00 e8 3d 23 8b fe 65 8b 05 f6 49 83 7d 85 c0 75 05 e8 84 70 81 fe b8 00 00 50 c3 31 d2 48 8d 7b 10 f5 41 89 c4 e8 58 07 a2 fe 44 89 63 10 48 8d 7b 08 e8 cb 07 a2 RSP: 0000:ffff888100137dc8 EFLAGS: 00010246 RAX: 00000000c3500000 RBX: ffffffff823f0160 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000808 RDI: ffffffff823f0170 RBP: 0000000000000000 R08: ffffffff8109c50f R09: ffffffff824bb6f7 R10: fffffbfff04976de R11: 0000000000000001 R12: 0000000000000000 R13: ffff888101997000 R14: ffff888101994000 R15: ffffffff823f0178 FS: 0000000000000000(0000) GS:ffff8881f7780000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000000220e000 CR4: 00000000000006a0 Call Trace: calibrate_xor_blocks+0x13c/0x1c4 ? do_xor_speed+0xf3/0xf3 do_one_initcall+0xc1/0x1b7 ? start_kernel+0x373/0x373 ? unpoison_range+0x3a/0x60 kernel_init_freeable+0x1dd/0x238 ? rest_init+0xc6/0xc6 kernel_init+0x8/0x10a ret_from_fork+0x1f/0x30 ---[ end trace 5bd3c1d0b77772da ]--- Fixes: c055e3eae0f1 ("crypto: xor - use ktime for template benchmarking") Cc: Signed-off-by: Kirill Tkhai Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/xor.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/xor.c b/crypto/xor.c index eacbf4f939900..8f899f898ec9f 100644 --- a/crypto/xor.c +++ b/crypto/xor.c @@ -107,6 +107,8 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) preempt_enable(); // bytes/ns == GB/s, multiply by 1000 to get MB/s [not MiB/s] + if (!min) + min = 1; speed = (1000 * REPS * BENCH_SIZE) / (unsigned int)ktime_to_ns(min); tmpl->speed = speed; -- GitLab From 382811940303f7cd01d0f3dcdf432dfd89c5a98e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 15:03:04 +0100 Subject: [PATCH 0375/2012] crypto: omap-sham - Fix link error without crypto-engine The driver was converted to use the crypto engine helper but is missing the corresponding Kconfig statement to ensure it is available: arm-linux-gnueabi-ld: drivers/crypto/omap-sham.o: in function `omap_sham_probe': omap-sham.c:(.text+0x374): undefined reference to `crypto_engine_alloc_init' arm-linux-gnueabi-ld: omap-sham.c:(.text+0x384): undefined reference to `crypto_engine_start' arm-linux-gnueabi-ld: omap-sham.c:(.text+0x510): undefined reference to `crypto_engine_exit' arm-linux-gnueabi-ld: drivers/crypto/omap-sham.o: in function `omap_sham_finish_req': omap-sham.c:(.text+0x98c): undefined reference to `crypto_finalize_hash_request' arm-linux-gnueabi-ld: omap-sham.c:(.text+0x9a0): undefined reference to `crypto_transfer_hash_request_to_engine' arm-linux-gnueabi-ld: drivers/crypto/omap-sham.o: in function `omap_sham_update': omap-sham.c:(.text+0xf24): undefined reference to `crypto_transfer_hash_request_to_engine' arm-linux-gnueabi-ld: drivers/crypto/omap-sham.o: in function `omap_sham_final': omap-sham.c:(.text+0x1020): undefined reference to `crypto_transfer_hash_request_to_engine' Fixes: 133c3d434d91 ("crypto: omap-sham - convert to use crypto engine") Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index bbd51703e738b..e535f28a80283 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -366,6 +366,7 @@ if CRYPTO_DEV_OMAP config CRYPTO_DEV_OMAP_SHAM tristate "Support for OMAP MD5/SHA1/SHA2 hw accelerator" depends on ARCH_OMAP2PLUS + select CRYPTO_ENGINE select CRYPTO_SHA1 select CRYPTO_MD5 select CRYPTO_SHA256 -- GitLab From ae28d1aae48a1258bd09a6f707ebb4231d79a761 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 17 Dec 2020 14:31:18 -0800 Subject: [PATCH 0376/2012] x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR Currently, when moving a task to a resource group the PQR_ASSOC MSR is updated with the new closid and rmid in an added task callback. If the task is running, the work is run as soon as possible. If the task is not running, the work is executed later in the kernel exit path when the kernel returns to the task again. Updating the PQR_ASSOC MSR as soon as possible on the CPU a moved task is running is the right thing to do. Queueing work for a task that is not running is unnecessary (the PQR_ASSOC MSR is already updated when the task is scheduled in) and causing system resource waste with the way in which it is implemented: Work to update the PQR_ASSOC register is queued every time the user writes a task id to the "tasks" file, even if the task already belongs to the resource group. This could result in multiple pending work items associated with a single task even if they are all identical and even though only a single update with most recent values is needed. Specifically, even if a task is moved between different resource groups while it is sleeping then it is only the last move that is relevant but yet a work item is queued during each move. This unnecessary queueing of work items could result in significant system resource waste, especially on tasks sleeping for a long time. For example, as demonstrated by Shakeel Butt in [1] writing the same task id to the "tasks" file can quickly consume significant memory. The same problem (wasted system resources) occurs when moving a task between different resource groups. As pointed out by Valentin Schneider in [2] there is an additional issue with the way in which the queueing of work is done in that the task_struct update is currently done after the work is queued, resulting in a race with the register update possibly done before the data needed by the update is available. To solve these issues, update the PQR_ASSOC MSR in a synchronous way right after the new closid and rmid are ready during the task movement, only if the task is running. If a moved task is not running nothing is done since the PQR_ASSOC MSR will be updated next time the task is scheduled. This is the same way used to update the register when tasks are moved as part of resource group removal. [1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/ [2] https://lore.kernel.org/lkml/20201123022433.17905-1-valentin.schneider@arm.com [ bp: Massage commit message and drop the two update_task_closid_rmid() variants. ] Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files") Reported-by: Shakeel Butt Reported-by: Valentin Schneider Signed-off-by: Fenghua Yu Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Reviewed-by: James Morse Reviewed-by: Valentin Schneider Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/17aa2fb38fc12ce7bb710106b3e7c7b45acb9e94.1608243147.git.reinette.chatre@intel.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 112 ++++++++++--------------- 1 file changed, 43 insertions(+), 69 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 29ffb95b25fff..1c6f8a60ac52a 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -525,89 +525,63 @@ static void rdtgroup_remove(struct rdtgroup *rdtgrp) kfree(rdtgrp); } -struct task_move_callback { - struct callback_head work; - struct rdtgroup *rdtgrp; -}; - -static void move_myself(struct callback_head *head) +static void _update_task_closid_rmid(void *task) { - struct task_move_callback *callback; - struct rdtgroup *rdtgrp; - - callback = container_of(head, struct task_move_callback, work); - rdtgrp = callback->rdtgrp; - /* - * If resource group was deleted before this task work callback - * was invoked, then assign the task to root group and free the - * resource group. + * If the task is still current on this CPU, update PQR_ASSOC MSR. + * Otherwise, the MSR is updated when the task is scheduled in. */ - if (atomic_dec_and_test(&rdtgrp->waitcount) && - (rdtgrp->flags & RDT_DELETED)) { - current->closid = 0; - current->rmid = 0; - rdtgroup_remove(rdtgrp); - } - - if (unlikely(current->flags & PF_EXITING)) - goto out; - - preempt_disable(); - /* update PQR_ASSOC MSR to make resource group go into effect */ - resctrl_sched_in(); - preempt_enable(); + if (task == current) + resctrl_sched_in(); +} -out: - kfree(callback); +static void update_task_closid_rmid(struct task_struct *t) +{ + if (IS_ENABLED(CONFIG_SMP) && task_curr(t)) + smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1); + else + _update_task_closid_rmid(t); } static int __rdtgroup_move_task(struct task_struct *tsk, struct rdtgroup *rdtgrp) { - struct task_move_callback *callback; - int ret; - - callback = kzalloc(sizeof(*callback), GFP_KERNEL); - if (!callback) - return -ENOMEM; - callback->work.func = move_myself; - callback->rdtgrp = rdtgrp; - /* - * Take a refcount, so rdtgrp cannot be freed before the - * callback has been invoked. + * Set the task's closid/rmid before the PQR_ASSOC MSR can be + * updated by them. + * + * For ctrl_mon groups, move both closid and rmid. + * For monitor groups, can move the tasks only from + * their parent CTRL group. */ - atomic_inc(&rdtgrp->waitcount); - ret = task_work_add(tsk, &callback->work, TWA_RESUME); - if (ret) { - /* - * Task is exiting. Drop the refcount and free the callback. - * No need to check the refcount as the group cannot be - * deleted before the write function unlocks rdtgroup_mutex. - */ - atomic_dec(&rdtgrp->waitcount); - kfree(callback); - rdt_last_cmd_puts("Task exited\n"); - } else { - /* - * For ctrl_mon groups move both closid and rmid. - * For monitor groups, can move the tasks only from - * their parent CTRL group. - */ - if (rdtgrp->type == RDTCTRL_GROUP) { - tsk->closid = rdtgrp->closid; + + if (rdtgrp->type == RDTCTRL_GROUP) { + tsk->closid = rdtgrp->closid; + tsk->rmid = rdtgrp->mon.rmid; + } else if (rdtgrp->type == RDTMON_GROUP) { + if (rdtgrp->mon.parent->closid == tsk->closid) { tsk->rmid = rdtgrp->mon.rmid; - } else if (rdtgrp->type == RDTMON_GROUP) { - if (rdtgrp->mon.parent->closid == tsk->closid) { - tsk->rmid = rdtgrp->mon.rmid; - } else { - rdt_last_cmd_puts("Can't move task to different control group\n"); - ret = -EINVAL; - } + } else { + rdt_last_cmd_puts("Can't move task to different control group\n"); + return -EINVAL; } } - return ret; + + /* + * Ensure the task's closid and rmid are written before determining if + * the task is current that will decide if it will be interrupted. + */ + barrier(); + + /* + * By now, the task's closid and rmid are set. If the task is current + * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource + * group go into effect. If the task is not current, the MSR will be + * updated when the task is scheduled in. + */ + update_task_closid_rmid(tsk); + + return 0; } static bool is_closid_match(struct task_struct *t, struct rdtgroup *r) -- GitLab From a0195f314a25582b38993bf30db11c300f4f4611 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 17 Dec 2020 14:31:19 -0800 Subject: [PATCH 0377/2012] x86/resctrl: Don't move a task to the same resource group Shakeel Butt reported in [1] that a user can request a task to be moved to a resource group even if the task is already in the group. It just wastes time to do the move operation which could be costly to send IPI to a different CPU. Add a sanity check to ensure that the move operation only happens when the task is not already in the resource group. [1] https://lore.kernel.org/lkml/CALvZod7E9zzHwenzf7objzGKsdBmVwTgEJ0nPgs0LUFU3SN5Pw@mail.gmail.com/ Fixes: e02737d5b826 ("x86/intel_rdt: Add tasks files") Reported-by: Shakeel Butt Signed-off-by: Fenghua Yu Signed-off-by: Reinette Chatre Signed-off-by: Borislav Petkov Reviewed-by: Tony Luck Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/962ede65d8e95be793cb61102cca37f7bb018e66.1608243147.git.reinette.chatre@intel.com --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 1c6f8a60ac52a..460f3e0df106c 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -546,6 +546,13 @@ static void update_task_closid_rmid(struct task_struct *t) static int __rdtgroup_move_task(struct task_struct *tsk, struct rdtgroup *rdtgrp) { + /* If the task is already in rdtgrp, no need to move the task. */ + if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid && + tsk->rmid == rdtgrp->mon.rmid) || + (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid && + tsk->closid == rdtgrp->mon.parent->closid)) + return 0; + /* * Set the task's closid/rmid before the PQR_ASSOC MSR can be * updated by them. -- GitLab From 872f36eb0b0f4f0e3a81ea1e51a6bdf58ccfdc6e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 8 Jan 2021 05:54:44 -0500 Subject: [PATCH 0378/2012] KVM: x86: __kvm_vcpu_halt can be static Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0287840b93e09..a480804ae27a3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7976,7 +7976,7 @@ void kvm_arch_exit(void) kmem_cache_destroy(x86_fpu_cache); } -int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason) +static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason) { ++vcpu->stat.halt_exits; if (lapic_in_kernel(vcpu)) { -- GitLab From 2a0435df963f996ca870a2ef1cbf1773dc0ea25a Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Thu, 7 Jan 2021 17:51:31 +0100 Subject: [PATCH 0379/2012] ASoC: hdmi-codec: Fix return value in hdmi_codec_set_jack() Sound is broken on the DragonBoard 410c (apq8016_sbc) since 5.10: hdmi-audio-codec hdmi-audio-codec.1.auto: ASoC: error at snd_soc_component_set_jack on hdmi-audio-codec.1.auto: -95 qcom-apq8016-sbc 7702000.sound: Failed to set jack: -95 ADV7533: ASoC: error at snd_soc_link_init on ADV7533: -95 hdmi-audio-codec hdmi-audio-codec.1.auto: ASoC: error at snd_soc_component_set_jack on hdmi-audio-codec.1.auto: -95 qcom-apq8016-sbc: probe of 7702000.sound failed with error -95 This happens because apq8016_sbc calls snd_soc_component_set_jack() on all codec DAIs and attempts to ignore failures with return code -ENOTSUPP. -ENOTSUPP is also excluded from error logging in soc_component_ret(). However, hdmi_codec_set_jack() returns -E*OP*NOTSUPP if jack detection is not supported, which is not handled in apq8016_sbc and soc_component_ret(). Make it return -ENOTSUPP instead to fix sound and silence the errors. Cc: Cheng-Yi Chiang Cc: Srinivas Kandagatla Fixes: 55c5cc63ab32 ("ASoC: hdmi-codec: Use set_jack ops to set jack") Signed-off-by: Stephan Gerhold Acked-by: Nicolin Chen Link: https://lore.kernel.org/r/20210107165131.2535-1-stephan@gerhold.net Signed-off-by: Mark Brown --- sound/soc/codecs/hdmi-codec.c | 2 +- sound/soc/fsl/imx-hdmi.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index d5fcc4db8284a..0f3ac22f2cf8e 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -717,7 +717,7 @@ static int hdmi_codec_set_jack(struct snd_soc_component *component, void *data) { struct hdmi_codec_priv *hcp = snd_soc_component_get_drvdata(component); - int ret = -EOPNOTSUPP; + int ret = -ENOTSUPP; if (hcp->hcd.ops->hook_plugged_cb) { hcp->jack = jack; diff --git a/sound/soc/fsl/imx-hdmi.c b/sound/soc/fsl/imx-hdmi.c index ede4a9ad1054c..dbbb7618351c7 100644 --- a/sound/soc/fsl/imx-hdmi.c +++ b/sound/soc/fsl/imx-hdmi.c @@ -90,7 +90,7 @@ static int imx_hdmi_init(struct snd_soc_pcm_runtime *rtd) } ret = snd_soc_component_set_jack(component, &data->hdmi_jack, NULL); - if (ret && ret != -EOPNOTSUPP) { + if (ret && ret != -ENOTSUPP) { dev_err(card->dev, "Can't set HDMI Jack %d\n", ret); return ret; } -- GitLab From e400071a805d6229223a98899e9da8c6233704a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filipe=20La=C3=ADns?= Date: Mon, 4 Jan 2021 20:47:17 +0000 Subject: [PATCH 0380/2012] HID: logitech-dj: add the G602 receiver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tested. The device gets correctly exported to userspace and I can see mouse and keyboard events. Signed-off-by: Filipe Laíns Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-dj.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 1ffcfc9a1e033..45e7e0bdd382b 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -1869,6 +1869,10 @@ static const struct hid_device_id logi_dj_receivers[] = { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xc531), .driver_data = recvr_type_gaming_hidpp}, + { /* Logitech G602 receiver (0xc537) */ + HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, + 0xc537), + .driver_data = recvr_type_gaming_hidpp}, { /* Logitech lightspeed receiver (0xc539) */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_NANO_RECEIVER_LIGHTSPEED_1), -- GitLab From 74acfa996b2aec2a4ea8587104c7e2f8d4c6aec2 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Fri, 8 Jan 2021 15:36:30 +0100 Subject: [PATCH 0381/2012] block/rnbd: Select SG_POOL for RNBD_CLIENT lkp reboot following build error: drivers/block/rnbd/rnbd-clt.c: In function 'rnbd_softirq_done_fn': >> drivers/block/rnbd/rnbd-clt.c:387:2: error: implicit declaration of function 'sg_free_table_chained' [-Werror=implicit-function-declaration] 387 | sg_free_table_chained(&iu->sgt, RNBD_INLINE_SG_CNT); | ^~~~~~~~~~~~~~~~~~~~~ The reason is CONFIG_SG_POOL is not enabled in the config, to avoid such failure, select SG_POOL in Kconfig for RNBD_CLIENT. Fixes: 5a1328d0c3a7 ("block/rnbd-clt: Dynamically allocate sglist for rnbd_iu") Reported-by: kernel test robot Signed-off-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/rnbd/Kconfig b/drivers/block/rnbd/Kconfig index 4b6d3d816d1f5..2ff05a0d26461 100644 --- a/drivers/block/rnbd/Kconfig +++ b/drivers/block/rnbd/Kconfig @@ -7,6 +7,7 @@ config BLK_DEV_RNBD_CLIENT tristate "RDMA Network Block Device driver client" depends on INFINIBAND_RTRS_CLIENT select BLK_DEV_RNBD + select SG_POOL help RNBD client is a network block device driver using rdma transport. -- GitLab From 1a84e7c629f8f288e02236bc799f9b0be1cab4a7 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Fri, 8 Jan 2021 15:36:31 +0100 Subject: [PATCH 0382/2012] block/rnbd-srv: Fix use after free in rnbd_srv_sess_dev_force_close KASAN detect following BUG: [ 778.215311] ================================================================== [ 778.216696] BUG: KASAN: use-after-free in rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.219037] Read of size 8 at addr ffff88b1d6516c28 by task tee/8842 [ 778.220500] CPU: 37 PID: 8842 Comm: tee Kdump: loaded Not tainted 5.10.0-pserver #5.10.0-1+feature+linux+next+20201214.1025+0910d71 [ 778.220529] Hardware name: Supermicro Super Server/X11DDW-L, BIOS 3.3 02/21/2020 [ 778.220555] Call Trace: [ 778.220609] dump_stack+0x99/0xcb [ 778.220667] ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.220715] print_address_description.constprop.7+0x1e/0x230 [ 778.220750] ? freeze_kernel_threads+0x73/0x73 [ 778.220896] ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.220932] ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.220994] kasan_report.cold.9+0x37/0x7c [ 778.221066] ? kobject_put+0x80/0x270 [ 778.221102] ? rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.221184] rnbd_srv_sess_dev_force_close+0x38/0x60 [rnbd_server] [ 778.221240] rnbd_srv_dev_session_force_close_store+0x6a/0xc0 [rnbd_server] [ 778.221304] ? sysfs_file_ops+0x90/0x90 [ 778.221353] kernfs_fop_write+0x141/0x240 [ 778.221451] vfs_write+0x142/0x4d0 [ 778.221553] ksys_write+0xc0/0x160 [ 778.221602] ? __ia32_sys_read+0x50/0x50 [ 778.221684] ? lockdep_hardirqs_on_prepare+0x13d/0x210 [ 778.221718] ? syscall_enter_from_user_mode+0x1c/0x50 [ 778.221821] do_syscall_64+0x33/0x40 [ 778.221862] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 778.221896] RIP: 0033:0x7f4affdd9504 [ 778.221928] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f 80 00 00 00 00 48 8d 05 f9 61 0d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [ 778.221956] RSP: 002b:00007fffebb36b28 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 778.222011] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f4affdd9504 [ 778.222038] RDX: 0000000000000002 RSI: 00007fffebb36c50 RDI: 0000000000000003 [ 778.222066] RBP: 00007fffebb36c50 R08: 0000556a151aa600 R09: 00007f4affeb1540 [ 778.222094] R10: fffffffffffffc19 R11: 0000000000000246 R12: 0000556a151aa520 [ 778.222121] R13: 0000000000000002 R14: 00007f4affea6760 R15: 0000000000000002 [ 778.222764] Allocated by task 3212: [ 778.223285] kasan_save_stack+0x19/0x40 [ 778.223316] __kasan_kmalloc.constprop.7+0xc1/0xd0 [ 778.223347] kmem_cache_alloc_trace+0x186/0x350 [ 778.223382] rnbd_srv_rdma_ev+0xf16/0x1690 [rnbd_server] [ 778.223422] process_io_req+0x4d1/0x670 [rtrs_server] [ 778.223573] __ib_process_cq+0x10a/0x350 [ib_core] [ 778.223709] ib_cq_poll_work+0x31/0xb0 [ib_core] [ 778.223743] process_one_work+0x521/0xa90 [ 778.223773] worker_thread+0x65/0x5b0 [ 778.223802] kthread+0x1f2/0x210 [ 778.223833] ret_from_fork+0x22/0x30 [ 778.224296] Freed by task 8842: [ 778.224800] kasan_save_stack+0x19/0x40 [ 778.224829] kasan_set_track+0x1c/0x30 [ 778.224860] kasan_set_free_info+0x1b/0x30 [ 778.224889] __kasan_slab_free+0x108/0x150 [ 778.224919] slab_free_freelist_hook+0x64/0x190 [ 778.224947] kfree+0xe2/0x650 [ 778.224982] rnbd_destroy_sess_dev+0x2fa/0x3b0 [rnbd_server] [ 778.225011] kobject_put+0xda/0x270 [ 778.225046] rnbd_srv_sess_dev_force_close+0x30/0x60 [rnbd_server] [ 778.225081] rnbd_srv_dev_session_force_close_store+0x6a/0xc0 [rnbd_server] [ 778.225111] kernfs_fop_write+0x141/0x240 [ 778.225140] vfs_write+0x142/0x4d0 [ 778.225169] ksys_write+0xc0/0x160 [ 778.225198] do_syscall_64+0x33/0x40 [ 778.225227] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 778.226506] The buggy address belongs to the object at ffff88b1d6516c00 which belongs to the cache kmalloc-512 of size 512 [ 778.227464] The buggy address is located 40 bytes inside of 512-byte region [ffff88b1d6516c00, ffff88b1d6516e00) The problem is in the sess_dev release function we call rnbd_destroy_sess_dev, and could free the sess_dev already, but we still set the keep_id in rnbd_srv_sess_dev_force_close, which lead to use after free. To fix it, move the keep_id before the sysfs removal, and cache the rnbd_srv_session for lock accessing, Fixes: 786998050cbc ("block/rnbd-srv: close a mapped device from server side.") Signed-off-by: Jack Wang Reviewed-by: Guoqing Jiang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-srv.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index b8e44331e4944..a6a68d44f517c 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -338,10 +338,12 @@ static int rnbd_srv_link_ev(struct rtrs_srv *rtrs, void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev) { - mutex_lock(&sess_dev->sess->lock); - rnbd_srv_destroy_dev_session_sysfs(sess_dev); - mutex_unlock(&sess_dev->sess->lock); + struct rnbd_srv_session *sess = sess_dev->sess; + sess_dev->keep_id = true; + mutex_lock(&sess->lock); + rnbd_srv_destroy_dev_session_sysfs(sess_dev); + mutex_unlock(&sess->lock); } static int process_msg_close(struct rtrs_srv *rtrs, -- GitLab From 80f99093d81370c5cec37fca3b5a6bdf6bddf0f6 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 8 Jan 2021 15:36:32 +0100 Subject: [PATCH 0383/2012] block/rnbd-clt: Fix sg table use after free Since dynamically allocate sglist is used for rnbd_iu, we can't free sg table after send_usr_msg since the callback function (cqe.done) could still access the sglist. Otherwise KASAN reports UAF issue: [ 4856.600257] BUG: KASAN: use-after-free in dma_direct_unmap_sg+0x53/0x290 [ 4856.600772] Read of size 4 at addr ffff888206af3a98 by task swapper/1/0 [ 4856.601729] CPU: 1 PID: 0 Comm: swapper/1 Kdump: loaded Tainted: G W 5.10.0-pserver #5.10.0-1+feature+linux+next+20201214.1025+0910d71 [ 4856.601748] Hardware name: Supermicro Super Server/X11DDW-L, BIOS 3.3 02/21/2020 [ 4856.601766] Call Trace: [ 4856.601785] [ 4856.601822] dump_stack+0x99/0xcb [ 4856.601856] ? dma_direct_unmap_sg+0x53/0x290 [ 4856.601888] print_address_description.constprop.7+0x1e/0x230 [ 4856.601913] ? freeze_kernel_threads+0x73/0x73 [ 4856.601965] ? mark_held_locks+0x29/0xa0 [ 4856.602019] ? dma_direct_unmap_sg+0x53/0x290 [ 4856.602039] ? dma_direct_unmap_sg+0x53/0x290 [ 4856.602079] kasan_report.cold.9+0x37/0x7c [ 4856.602188] ? mlx5_ib_post_recv+0x430/0x520 [mlx5_ib] [ 4856.602209] ? dma_direct_unmap_sg+0x53/0x290 [ 4856.602256] dma_direct_unmap_sg+0x53/0x290 [ 4856.602366] complete_rdma_req+0x188/0x4b0 [rtrs_client] [ 4856.602451] ? rtrs_clt_close+0x80/0x80 [rtrs_client] [ 4856.602535] ? mlx5_ib_poll_cq+0x48b/0x16e0 [mlx5_ib] [ 4856.602589] ? radix_tree_insert+0x3a0/0x3a0 [ 4856.602610] ? do_raw_spin_lock+0x119/0x1d0 [ 4856.602647] ? rwlock_bug.part.1+0x60/0x60 [ 4856.602740] rtrs_clt_rdma_done+0x3f7/0x670 [rtrs_client] [ 4856.602804] ? rtrs_clt_rdma_cm_handler+0xda0/0xda0 [rtrs_client] [ 4856.602857] ? check_flags.part.31+0x6c/0x1f0 [ 4856.602927] ? rcu_read_lock_sched_held+0xaf/0xe0 [ 4856.602963] ? rcu_read_lock_bh_held+0xc0/0xc0 [ 4856.603137] __ib_process_cq+0x10a/0x350 [ib_core] [ 4856.603309] ib_poll_handler+0x41/0x1c0 [ib_core] [ 4856.603358] irq_poll_softirq+0xe6/0x280 [ 4856.603392] ? lockdep_hardirqs_on_prepare+0x111/0x210 [ 4856.603446] __do_softirq+0x10d/0x646 [ 4856.603540] asm_call_irq_on_stack+0x12/0x20 [ 4856.603563] [ 4856.605096] Allocated by task 8914: [ 4856.605510] kasan_save_stack+0x19/0x40 [ 4856.605532] __kasan_kmalloc.constprop.7+0xc1/0xd0 [ 4856.605552] __kmalloc+0x155/0x320 [ 4856.605574] __sg_alloc_table+0x155/0x1c0 [ 4856.605594] sg_alloc_table+0x1f/0x50 [ 4856.605620] send_msg_sess_info+0x119/0x2e0 [rnbd_client] [ 4856.605646] remap_devs+0x71/0x210 [rnbd_client] [ 4856.605676] init_sess+0xad8/0xe10 [rtrs_client] [ 4856.605706] rtrs_clt_reconnect_work+0xd6/0x170 [rtrs_client] [ 4856.605728] process_one_work+0x521/0xa90 [ 4856.605748] worker_thread+0x65/0x5b0 [ 4856.605769] kthread+0x1f2/0x210 [ 4856.605789] ret_from_fork+0x22/0x30 [ 4856.606159] Freed by task 8914: [ 4856.606559] kasan_save_stack+0x19/0x40 [ 4856.606580] kasan_set_track+0x1c/0x30 [ 4856.606601] kasan_set_free_info+0x1b/0x30 [ 4856.606622] __kasan_slab_free+0x108/0x150 [ 4856.606642] slab_free_freelist_hook+0x64/0x190 [ 4856.606661] kfree+0xe2/0x650 [ 4856.606681] __sg_free_table+0xa4/0x100 [ 4856.606707] send_msg_sess_info+0x1d6/0x2e0 [rnbd_client] [ 4856.606733] remap_devs+0x71/0x210 [rnbd_client] [ 4856.606763] init_sess+0xad8/0xe10 [rtrs_client] [ 4856.606792] rtrs_clt_reconnect_work+0xd6/0x170 [rtrs_client] [ 4856.606813] process_one_work+0x521/0xa90 [ 4856.606833] worker_thread+0x65/0x5b0 [ 4856.606853] kthread+0x1f2/0x210 [ 4856.606872] ret_from_fork+0x22/0x30 The solution is to free iu's sgtable after the iu is not used anymore. And also move sg_alloc_table into rnbd_get_iu accordingly. Fixes: 5a1328d0c3a7 ("block/rnbd-clt: Dynamically allocate sglist for rnbd_iu") Signed-off-by: Guoqing Jiang Signed-off-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 96e3f9fe82418..506e9094487f6 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -375,12 +375,19 @@ static struct rnbd_iu *rnbd_get_iu(struct rnbd_clt_session *sess, init_waitqueue_head(&iu->comp.wait); iu->comp.errno = INT_MAX; + if (sg_alloc_table(&iu->sgt, 1, GFP_KERNEL)) { + rnbd_put_permit(sess, permit); + kfree(iu); + return NULL; + } + return iu; } static void rnbd_put_iu(struct rnbd_clt_session *sess, struct rnbd_iu *iu) { if (atomic_dec_and_test(&iu->refcount)) { + sg_free_table(&iu->sgt); rnbd_put_permit(sess, iu->permit); kfree(iu); } @@ -487,8 +494,6 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait) iu->buf = NULL; iu->dev = dev; - sg_alloc_table(&iu->sgt, 1, GFP_KERNEL); - msg.hdr.type = cpu_to_le16(RNBD_MSG_CLOSE); msg.device_id = cpu_to_le32(device_id); @@ -502,7 +507,6 @@ static int send_msg_close(struct rnbd_clt_dev *dev, u32 device_id, bool wait) err = errno; } - sg_free_table(&iu->sgt); rnbd_put_iu(sess, iu); return err; } @@ -575,7 +579,6 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) iu->buf = rsp; iu->dev = dev; - sg_alloc_table(&iu->sgt, 1, GFP_KERNEL); sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp)); msg.hdr.type = cpu_to_le16(RNBD_MSG_OPEN); @@ -594,7 +597,6 @@ static int send_msg_open(struct rnbd_clt_dev *dev, bool wait) err = errno; } - sg_free_table(&iu->sgt); rnbd_put_iu(sess, iu); return err; } @@ -622,8 +624,6 @@ static int send_msg_sess_info(struct rnbd_clt_session *sess, bool wait) iu->buf = rsp; iu->sess = sess; - - sg_alloc_table(&iu->sgt, 1, GFP_KERNEL); sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp)); msg.hdr.type = cpu_to_le16(RNBD_MSG_SESS_INFO); @@ -650,7 +650,6 @@ put_iu: } else { err = errno; } - sg_free_table(&iu->sgt); rnbd_put_iu(sess, iu); return err; } -- GitLab From ef8048dd2345d070c41bc7df16763fd4d8fac296 Mon Sep 17 00:00:00 2001 From: Swapnil Ingle Date: Fri, 8 Jan 2021 15:36:33 +0100 Subject: [PATCH 0384/2012] block/rnbd: Adding name to the Contributors List Adding name to the Contributors List Signed-off-by: Swapnil Ingle Acked-by: Jack Wang Acked-by: Danil Kipnis Signed-off-by: Jack Wang Signed-off-by: Jens Axboe --- drivers/block/rnbd/README | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/rnbd/README b/drivers/block/rnbd/README index 1773c0aa0bd43..080f58a5400ad 100644 --- a/drivers/block/rnbd/README +++ b/drivers/block/rnbd/README @@ -90,3 +90,4 @@ Kleber Souza Lutz Pogrell Milind Dumbare Roman Penyaev +Swapnil Ingle -- GitLab From 3a21777c6ee99749bac10727b3c17e5bcfebe5c1 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Fri, 8 Jan 2021 15:36:34 +0100 Subject: [PATCH 0385/2012] block/rnbd-clt: avoid module unload race with close confirmation We had kernel panic, it is caused by unload module and last close confirmation. call trace: [1196029.743127] free_sess+0x15/0x50 [rtrs_client] [1196029.743128] rtrs_clt_close+0x4c/0x70 [rtrs_client] [1196029.743129] ? rnbd_clt_unmap_device+0x1b0/0x1b0 [rnbd_client] [1196029.743130] close_rtrs+0x25/0x50 [rnbd_client] [1196029.743131] rnbd_client_exit+0x93/0xb99 [rnbd_client] [1196029.743132] __x64_sys_delete_module+0x190/0x260 And in the crashdump confirmation kworker is also running. PID: 6943 TASK: ffff9e2ac8098000 CPU: 4 COMMAND: "kworker/4:2" #0 [ffffb206cf337c30] __schedule at ffffffff9f93f891 #1 [ffffb206cf337cc8] schedule at ffffffff9f93fe98 #2 [ffffb206cf337cd0] schedule_timeout at ffffffff9f943938 #3 [ffffb206cf337d50] wait_for_completion at ffffffff9f9410a7 #4 [ffffb206cf337da0] __flush_work at ffffffff9f08ce0e #5 [ffffb206cf337e20] rtrs_clt_close_conns at ffffffffc0d5f668 [rtrs_client] #6 [ffffb206cf337e48] rtrs_clt_close at ffffffffc0d5f801 [rtrs_client] #7 [ffffb206cf337e68] close_rtrs at ffffffffc0d26255 [rnbd_client] #8 [ffffb206cf337e78] free_sess at ffffffffc0d262ad [rnbd_client] #9 [ffffb206cf337e88] rnbd_clt_put_dev at ffffffffc0d266a7 [rnbd_client] The problem is both code path try to close same session, which lead to panic. To fix it, just skip the sess if the refcount already drop to 0. Fixes: f7a7a5c228d4 ("block/rnbd: client: main functionality") Signed-off-by: Jack Wang Reviewed-by: Gioh Kim Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 506e9094487f6..45a4700766524 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1697,7 +1697,8 @@ static void rnbd_destroy_sessions(void) */ list_for_each_entry_safe(sess, sn, &sess_list, list) { - WARN_ON(!rnbd_clt_get_sess(sess)); + if (!rnbd_clt_get_sess(sess)) + continue; close_rtrs(sess); list_for_each_entry_safe(dev, tn, &sess->devs_list, list) { /* -- GitLab From 02f938e9fed1681791605ca8b96c2d9da9355f6a Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 8 Jan 2021 16:55:37 +0800 Subject: [PATCH 0386/2012] blk-mq-debugfs: Add decode for BLK_MQ_F_TAG_HCTX_SHARED Showing the hctx flags for when BLK_MQ_F_TAG_HCTX_SHARED is set gives something like: root@debian:/home/john# more /sys/kernel/debug/block/sda/hctx0/flags alloc_policy=FIFO SHOULD_MERGE|TAG_QUEUE_SHARED|3 Add the decoding for that flag. Fixes: 32bc15afed04b ("blk-mq: Facilitate a shared sbitmap per tagset") Signed-off-by: John Garry Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index e21eed20a1551..712a95f74ccb8 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -246,6 +246,7 @@ static const char *const hctx_flag_name[] = { HCTX_FLAG_NAME(BLOCKING), HCTX_FLAG_NAME(NO_SCHED), HCTX_FLAG_NAME(STACKING), + HCTX_FLAG_NAME(TAG_HCTX_SHARED), }; #undef HCTX_FLAG_NAME -- GitLab From bac717171971176b78c72d15a8b6961764ab197f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Dec 2020 16:20:05 +0100 Subject: [PATCH 0387/2012] ARM: picoxcell: fix missing interrupt-parent properties dtc points out that the interrupts for some devices are not parsable: picoxcell-pc3x2.dtsi:45.19-49.5: Warning (interrupts_property): /paxi/gem@30000: Missing interrupt-parent picoxcell-pc3x2.dtsi:51.21-55.5: Warning (interrupts_property): /paxi/dmac@40000: Missing interrupt-parent picoxcell-pc3x2.dtsi:57.21-61.5: Warning (interrupts_property): /paxi/dmac@50000: Missing interrupt-parent picoxcell-pc3x2.dtsi:233.21-237.5: Warning (interrupts_property): /rwid-axi/axi2pico@c0000000: Missing interrupt-parent There are two VIC instances, so it's not clear which one needs to be used. I found the BSP sources that reference VIC0, so use that: https://github.com/r1mikey/meta-picoxcell/blob/master/recipes-kernel/linux/linux-picochip-3.0/0001-picoxcell-support-for-Picochip-picoXcell-SoC.patch Acked-by: Jamie Iles Link: https://lore.kernel.org/r/20201230152010.3914962-1-arnd@kernel.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/picoxcell-pc3x2.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/picoxcell-pc3x2.dtsi b/arch/arm/boot/dts/picoxcell-pc3x2.dtsi index c4c6c7e9e37b6..5898879a3038e 100644 --- a/arch/arm/boot/dts/picoxcell-pc3x2.dtsi +++ b/arch/arm/boot/dts/picoxcell-pc3x2.dtsi @@ -45,18 +45,21 @@ emac: gem@30000 { compatible = "cadence,gem"; reg = <0x30000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <31>; }; dmac1: dmac@40000 { compatible = "snps,dw-dmac"; reg = <0x40000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <25>; }; dmac2: dmac@50000 { compatible = "snps,dw-dmac"; reg = <0x50000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <26>; }; @@ -233,6 +236,7 @@ axi2pico@c0000000 { compatible = "picochip,axi2pico-pc3x2"; reg = <0xc0000000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <13 14 15 16 17 18 19 20 21>; }; }; -- GitLab From 84e261553e6f919bf0b4d65244599ab2b41f1da5 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Thu, 7 Jan 2021 09:47:07 -0500 Subject: [PATCH 0388/2012] hwmon: (amd_energy) fix allocation of hwmon_channel_info config hwmon, specifically hwmon_num_channel_attrs, expects the config array in the hwmon_channel_info structure to be terminated by a zero entry. amd_energy does not honor this convention. As result, a KASAN warning is possible. Fix this by adding an additional entry and setting it to zero. Fixes: 8abee9566b7e ("hwmon: Add amd_energy driver to report energy counters") Signed-off-by: David Arcari Cc: Naveen Krishna Chatradhi Cc: Jean Delvare Cc: Guenter Roeck Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: David Arcari Acked-by: Naveen Krishna Chatradhi Link: https://lore.kernel.org/r/20210107144707.6927-1-darcari@redhat.com Signed-off-by: Guenter Roeck --- drivers/hwmon/amd_energy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c index 9b306448b7a0f..822c2e74b98d4 100644 --- a/drivers/hwmon/amd_energy.c +++ b/drivers/hwmon/amd_energy.c @@ -222,7 +222,7 @@ static int amd_create_sensor(struct device *dev, */ cpus = num_present_cpus() / num_siblings; - s_config = devm_kcalloc(dev, cpus + sockets, + s_config = devm_kcalloc(dev, cpus + sockets + 1, sizeof(u32), GFP_KERNEL); if (!s_config) return -ENOMEM; @@ -254,6 +254,7 @@ static int amd_create_sensor(struct device *dev, scnprintf(label_l[i], 10, "Esocket%u", (i - cpus)); } + s_config[i] = 0; return 0; } -- GitLab From d0243bbd5dd3ebbd49dafa8b56bb911d971131d0 Mon Sep 17 00:00:00 2001 From: Meng Li Date: Tue, 5 Jan 2021 15:09:27 +0800 Subject: [PATCH 0389/2012] drivers core: Free dma_range_map when driver probe failed There will be memory leak if driver probe failed. Trace as below: backtrace: [<000000002415258f>] kmemleak_alloc+0x3c/0x50 [<00000000f447ebe4>] __kmalloc+0x208/0x530 [<0000000048bc7b3a>] of_dma_get_range+0xe4/0x1b0 [<0000000041e39065>] of_dma_configure_id+0x58/0x27c [<000000006356866a>] platform_dma_configure+0x2c/0x40 ...... [<000000000afcf9b5>] ret_from_fork+0x10/0x3c This issue is introduced by commit e0d072782c73("dma-mapping: introduce DMA range map, supplanting dma_pfn_offset "). It doesn't free dma_range_map when driver probe failed and cause above memory leak. So, add code to free it in error path. Fixes: e0d072782c73 ("dma-mapping: introduce DMA range map, supplanting dma_pfn_offset ") Cc: stable@vger.kernel.org Signed-off-by: Meng Li Link: https://lore.kernel.org/r/20210105070927.14968-1-Meng.Li@windriver.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 2f32f38a11ed0..0b76b54237781 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -619,6 +619,8 @@ dev_groups_failed: else if (drv->remove) drv->remove(dev); probe_failed: + kfree(dev->dma_range_map); + dev->dma_range_map = NULL; if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DRIVER_NOT_BOUND, dev); -- GitLab From e076ab2a2ca70a0270232067cd49f76cd92efe64 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 7 Jan 2021 17:08:30 -0500 Subject: [PATCH 0390/2012] btrfs: shrink delalloc pages instead of full inodes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 38d715f494f2 ("btrfs: use btrfs_start_delalloc_roots in shrink_delalloc") cleaned up how we do delalloc shrinking by utilizing some infrastructure we have in place to flush inodes that we use for device replace and snapshot. However this introduced a pretty serious performance regression. To reproduce the user untarred the source tarball of Firefox (360MiB xz compressed/1.5GiB uncompressed), and would see it take anywhere from 5 to 20 times as long to untar in 5.10 compared to 5.9. This was observed on fast devices (SSD and better) and not on HDD. The root cause is because before we would generally use the normal writeback path to reclaim delalloc space, and for this we would provide it with the number of pages we wanted to flush. The referenced commit changed this to flush that many inodes, which drastically increased the amount of space we were flushing in certain cases, which severely affected performance. We cannot revert this patch unfortunately because of 3d45f221ce62 ("btrfs: fix deadlock when cloning inline extent and low on free metadata space") which requires the ability to skip flushing inodes that are being cloned in certain scenarios, which means we need to keep using our flushing infrastructure or risk re-introducing the deadlock. Instead to fix this problem we can go back to providing btrfs_start_delalloc_roots with a number of pages to flush, and then set up a writeback_control and utilize sync_inode() to handle the flushing for us. This gives us the same behavior we had prior to the fix, while still allowing us to avoid the deadlock that was fixed by Filipe. I redid the users original test and got the following results on one of our test machines (256GiB of ram, 56 cores, 2TiB Intel NVMe drive) 5.9 0m54.258s 5.10 1m26.212s 5.10+patch 0m38.800s 5.10+patch is significantly faster than plain 5.9 because of my patch series "Change data reservations to use the ticketing infra" which contained the patch that introduced the regression, but generally improved the overall ENOSPC flushing mechanisms. Additional testing on consumer-grade SSD (8GiB ram, 8 CPU) confirm the results: 5.10.5 4m00s 5.10.5+patch 1m08s 5.11-rc2 5m14s 5.11-rc2+patch 1m30s Reported-by: René Rebe Fixes: 38d715f494f2 ("btrfs: use btrfs_start_delalloc_roots in shrink_delalloc") CC: stable@vger.kernel.org # 5.10 Signed-off-by: Josef Bacik Tested-by: David Sterba Reviewed-by: David Sterba [ add my test results ] Signed-off-by: David Sterba --- fs/btrfs/inode.c | 60 +++++++++++++++++++++++++++++++------------ fs/btrfs/space-info.c | 4 ++- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 070716650df87..a8e0a6b038d3e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9390,7 +9390,8 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode * some fairly slow code that needs optimization. This walks the list * of all the inodes with pending delalloc and forces them to disk. */ -static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot, +static int start_delalloc_inodes(struct btrfs_root *root, + struct writeback_control *wbc, bool snapshot, bool in_reclaim_context) { struct btrfs_inode *binode; @@ -9399,6 +9400,7 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot struct list_head works; struct list_head splice; int ret = 0; + bool full_flush = wbc->nr_to_write == LONG_MAX; INIT_LIST_HEAD(&works); INIT_LIST_HEAD(&splice); @@ -9427,18 +9429,24 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot if (snapshot) set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, &binode->runtime_flags); - work = btrfs_alloc_delalloc_work(inode); - if (!work) { - iput(inode); - ret = -ENOMEM; - goto out; - } - list_add_tail(&work->list, &works); - btrfs_queue_work(root->fs_info->flush_workers, - &work->work); - if (*nr != U64_MAX) { - (*nr)--; - if (*nr == 0) + if (full_flush) { + work = btrfs_alloc_delalloc_work(inode); + if (!work) { + iput(inode); + ret = -ENOMEM; + goto out; + } + list_add_tail(&work->list, &works); + btrfs_queue_work(root->fs_info->flush_workers, + &work->work); + } else { + ret = sync_inode(inode, wbc); + if (!ret && + test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags)) + ret = sync_inode(inode, wbc); + btrfs_add_delayed_iput(inode); + if (ret || wbc->nr_to_write <= 0) goto out; } cond_resched(); @@ -9464,18 +9472,29 @@ out: int btrfs_start_delalloc_snapshot(struct btrfs_root *root) { + struct writeback_control wbc = { + .nr_to_write = LONG_MAX, + .sync_mode = WB_SYNC_NONE, + .range_start = 0, + .range_end = LLONG_MAX, + }; struct btrfs_fs_info *fs_info = root->fs_info; - u64 nr = U64_MAX; if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) return -EROFS; - return start_delalloc_inodes(root, &nr, true, false); + return start_delalloc_inodes(root, &wbc, true, false); } int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, bool in_reclaim_context) { + struct writeback_control wbc = { + .nr_to_write = (nr == U64_MAX) ? LONG_MAX : (unsigned long)nr, + .sync_mode = WB_SYNC_NONE, + .range_start = 0, + .range_end = LLONG_MAX, + }; struct btrfs_root *root; struct list_head splice; int ret; @@ -9489,6 +9508,13 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, spin_lock(&fs_info->delalloc_root_lock); list_splice_init(&fs_info->delalloc_roots, &splice); while (!list_empty(&splice) && nr) { + /* + * Reset nr_to_write here so we know that we're doing a full + * flush. + */ + if (nr == U64_MAX) + wbc.nr_to_write = LONG_MAX; + root = list_first_entry(&splice, struct btrfs_root, delalloc_root); root = btrfs_grab_root(root); @@ -9497,9 +9523,9 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr, &fs_info->delalloc_roots); spin_unlock(&fs_info->delalloc_root_lock); - ret = start_delalloc_inodes(root, &nr, false, in_reclaim_context); + ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context); btrfs_put_root(root); - if (ret < 0) + if (ret < 0 || wbc.nr_to_write <= 0) goto out; spin_lock(&fs_info->delalloc_root_lock); } diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 67e55c5479b8e..e8347461c8ddd 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -532,7 +532,9 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, loops = 0; while ((delalloc_bytes || dio_bytes) && loops < 3) { - btrfs_start_delalloc_roots(fs_info, items, true); + u64 nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT; + + btrfs_start_delalloc_roots(fs_info, nr_pages, true); loops++; if (wait_ordered && !trans) { -- GitLab From 29f7c54b253fc18bff9bf7e9f303b75deb285c7a Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 21 Dec 2020 22:30:55 +0800 Subject: [PATCH 0391/2012] Driver core: platform: Add extra error check in devm_platform_get_irqs_affinity() The current check of nvec < minvec for nvec returned from platform_irq_count() will not detect a negative error code in nvec. This is because minvec is unsigned, and, as such, nvec is promoted to unsigned in that check, which will make it a huge number (if it contained -EPROBE_DEFER). In practice, an error should not occur in nvec for the only in-tree user, but add a check anyway. Fixes: e15f2fa959f2 ("driver core: platform: Add devm_platform_get_irqs_affinity()") Reported-by: Dan Carpenter Signed-off-by: John Garry Link: https://lore.kernel.org/r/1608561055-231244-1-git-send-email-john.garry@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 95fd1549f87de..8456d8384ac8e 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -366,6 +366,8 @@ int devm_platform_get_irqs_affinity(struct platform_device *dev, return -ERANGE; nvec = platform_irq_count(dev); + if (nvec < 0) + return nvec; if (nvec < minvec) return -ENOSPC; -- GitLab From 7c38e769d5c508939ce5dc26df72602f3c902342 Mon Sep 17 00:00:00 2001 From: Seth Miller Date: Mon, 4 Jan 2021 22:58:12 -0600 Subject: [PATCH 0392/2012] HID: Ignore battery for Elan touchscreen on ASUS UX550 Battery status is being reported for the Elan touchscreen on ASUS UX550 laptops despite not having a batter. It always shows either 0 or 1%. Signed-off-by: Seth Miller Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-input.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 4c5f23640f9c7..5ba0aa1d23353 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -389,6 +389,7 @@ #define USB_DEVICE_ID_TOSHIBA_CLICK_L9W 0x0401 #define USB_DEVICE_ID_HP_X2 0x074d #define USB_DEVICE_ID_HP_X2_10_COVER 0x0755 +#define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706 #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index dc7f6b4a775c9..f23027d2795ba 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -322,6 +322,8 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD), HID_BATTERY_QUIRK_IGNORE }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN), + HID_BATTERY_QUIRK_IGNORE }, {} }; -- GitLab From 35d0b389f3b23439ad15b610d6e43fc72fc75779 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 5 Jan 2021 11:32:43 -0700 Subject: [PATCH 0393/2012] task_work: unconditionally run task_work from get_signal() Song reported a boot regression in a kvm image with 5.11-rc, and bisected it down to the below patch. Debugging this issue, turns out that the boot stalled when a task is waiting on a pipe being released. As we no longer run task_work from get_signal() unless it's queued with TWA_SIGNAL, the task goes idle without running the task_work. This prevents ->release() from being called on the pipe, which another boot task is waiting on. For now, re-instate the unconditional task_work run from get_signal(). For 5.12, we'll collapse TWA_RESUME and TWA_SIGNAL, as it no longer makes sense to have a distinction between the two. This will turn task_work notification into a simple boolean, whether to notify or not. Fixes: 98b89b649fce ("signal: kill JOBCTL_TASK_WORK") Reported-by: Song Liu Tested-by: John Stultz Tested-by: Douglas Anderson Tested-by: Sedat Dilek # LLVM/Clang version 11.0.1 Signed-off-by: Jens Axboe --- kernel/signal.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/signal.c b/kernel/signal.c index 5736c55aaa1af..6b9c431da08fe 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2550,6 +2550,9 @@ bool get_signal(struct ksignal *ksig) struct signal_struct *signal = current->signal; int signr; + if (unlikely(current->task_works)) + task_work_run(); + /* * For non-generic architectures, check for TIF_NOTIFY_SIGNAL so * that the arch handlers don't all have to do it. If we get here -- GitLab From eaa7995c529b54d68d97a30f6344cc6ca2f214a7 Mon Sep 17 00:00:00 2001 From: David Collins Date: Thu, 7 Jan 2021 17:16:02 -0800 Subject: [PATCH 0394/2012] regulator: core: avoid regulator_resolve_supply() race condition The final step in regulator_register() is to call regulator_resolve_supply() for each registered regulator (including the one in the process of being registered). The regulator_resolve_supply() function first checks if rdev->supply is NULL, then it performs various steps to try to find the supply. If successful, rdev->supply is set inside of set_supply(). This procedure can encounter a race condition if two concurrent tasks call regulator_register() near to each other on separate CPUs and one of the regulators has rdev->supply_name specified. There is currently nothing guaranteeing atomicity between the rdev->supply check and set steps. Thus, both tasks can observe rdev->supply==NULL in their regulator_resolve_supply() calls. This then results in both creating a struct regulator for the supply. One ends up actually stored in rdev->supply and the other is lost (though still present in the supply's consumer_list). Here is a kernel log snippet showing the issue: [ 12.421768] gpu_cc_gx_gdsc: supplied by pm8350_s5_level [ 12.425854] gpu_cc_gx_gdsc: supplied by pm8350_s5_level [ 12.429064] debugfs: Directory 'regulator.4-SUPPLY' with parent '17a00000.rsc:rpmh-regulator-gfxlvl-pm8350_s5_level' already present! Avoid this race condition by holding the rdev->mutex lock inside of regulator_resolve_supply() while checking and setting rdev->supply. Signed-off-by: David Collins Link: https://lore.kernel.org/r/1610068562-4410-1-git-send-email-collinsd@codeaurora.org Signed-off-by: Mark Brown --- drivers/regulator/core.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index ca03d8e70bd13..cfc3bc9df93a0 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1813,23 +1813,34 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) { struct regulator_dev *r; struct device *dev = rdev->dev.parent; - int ret; + int ret = 0; /* No supply to resolve? */ if (!rdev->supply_name) return 0; - /* Supply already resolved? */ + /* Supply already resolved? (fast-path without locking contention) */ if (rdev->supply) return 0; + /* + * Recheck rdev->supply with rdev->mutex lock held to avoid a race + * between rdev->supply null check and setting rdev->supply in + * set_supply() from concurrent tasks. + */ + regulator_lock(rdev); + + /* Supply just resolved by a concurrent task? */ + if (rdev->supply) + goto out; + r = regulator_dev_lookup(dev, rdev->supply_name); if (IS_ERR(r)) { ret = PTR_ERR(r); /* Did the lookup explicitly defer for us? */ if (ret == -EPROBE_DEFER) - return ret; + goto out; if (have_full_constraints()) { r = dummy_regulator_rdev; @@ -1837,15 +1848,18 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) } else { dev_err(dev, "Failed to resolve %s-supply for %s\n", rdev->supply_name, rdev->desc->name); - return -EPROBE_DEFER; + ret = -EPROBE_DEFER; + goto out; } } if (r == rdev) { dev_err(dev, "Supply for %s (%s) resolved to itself\n", rdev->desc->name, rdev->supply_name); - if (!have_full_constraints()) - return -EINVAL; + if (!have_full_constraints()) { + ret = -EINVAL; + goto out; + } r = dummy_regulator_rdev; get_device(&r->dev); } @@ -1859,7 +1873,8 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) if (r->dev.parent && r->dev.parent != rdev->dev.parent) { if (!device_is_bound(r->dev.parent)) { put_device(&r->dev); - return -EPROBE_DEFER; + ret = -EPROBE_DEFER; + goto out; } } @@ -1867,13 +1882,13 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) ret = regulator_resolve_supply(r); if (ret < 0) { put_device(&r->dev); - return ret; + goto out; } ret = set_supply(rdev, r); if (ret < 0) { put_device(&r->dev); - return ret; + goto out; } /* @@ -1886,11 +1901,13 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) if (ret < 0) { _regulator_put(rdev->supply); rdev->supply = NULL; - return ret; + goto out; } } - return 0; +out: + regulator_unlock(rdev); + return ret; } /* Internal regulator request function */ -- GitLab From 70b6ff35d62050d1573876cc0e1e078acd3e6008 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Jan 2021 23:47:41 +0100 Subject: [PATCH 0395/2012] cfg80211/mac80211: fix kernel-doc for SAR APIs A stray @ caused the kernel-doc parser to not understand this, fix that. Also add some missing kernel-doc. Reported-by: Stephen Rothwell Fixes: 6bdb68cef7bf ("nl80211: add common API to configure SAR power limitations") Fixes: c534e093d865 ("mac80211: add ieee80211_set_sar_specs") Signed-off-by: Johannes Berg Tested-by: Stephen Rothwell # build only Link: https://lore.kernel.org/r/20210106234740.96827c18f9bd.I8b9f0a9cbfe186931ef9640046f414371f216914@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 5 ++++- include/net/mac80211.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 9a4bbccddc7f7..1b3954afcda42 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1756,7 +1756,7 @@ struct cfg80211_sar_specs { /** - * @struct cfg80211_sar_chan_ranges - sar frequency ranges + * struct cfg80211_sar_chan_ranges - sar frequency ranges * @start_freq: start range edge frequency * @end_freq: end range edge frequency */ @@ -3972,6 +3972,8 @@ struct mgmt_frame_regs { * This callback may sleep. * @reset_tid_config: Reset TID specific configuration for the peer, for the * given TIDs. This callback may sleep. + * + * @set_sar_specs: Update the SAR (TX power) settings. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4929,6 +4931,7 @@ struct wiphy_iftype_akm_suites { * @max_data_retry_count: maximum supported per TID retry count for * configuration through the %NL80211_TID_CONFIG_ATTR_RETRY_SHORT and * %NL80211_TID_CONFIG_ATTR_RETRY_LONG attributes + * @sar_capa: SAR control capabilities */ struct wiphy { /* assign these fields before you register the wiphy */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d315740581f1e..2bdbf62f4ecd7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3880,6 +3880,7 @@ enum ieee80211_reconfig_type { * This callback may sleep. * @sta_set_4addr: Called to notify the driver when a station starts/stops using * 4-address mode + * @set_sar_specs: Update the SAR (TX power) settings. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, -- GitLab From 51d62f2f2c501a93d9a6a46f43731f984e227764 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Tue, 5 Jan 2021 16:56:57 +0200 Subject: [PATCH 0396/2012] cfg80211: Save the regulatory domain with a lock Saving the regulatory domain while setting custom regulatory domain was done while accessing a RCU protected pointer but without any protection. Fix this by using RTNL while accessing the pointer. Signed-off-by: Ilan Peer Reported-by: syzbot+27771d4abcd9b7a1f5d3@syzkaller.appspotmail.com Reported-by: syzbot+db4035751c56c0079282@syzkaller.appspotmail.com Reported-by: Hans de Goede Fixes: beee24695157 ("cfg80211: Save the regulatory domain when setting custom regulatory") Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20210105165657.613e9a876829.Ia38d27dbebea28bf9c56d70691d243186ede70e7@changeid Signed-off-by: Johannes Berg --- net/wireless/reg.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index bb72447ad9602..8114bba8556c7 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -5,7 +5,7 @@ * Copyright 2008-2011 Luis R. Rodriguez * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2019 Intel Corporation + * Copyright (C) 2018 - 2021 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -139,6 +139,11 @@ static const struct ieee80211_regdomain *get_cfg80211_regdom(void) return rcu_dereference_rtnl(cfg80211_regdomain); } +/* + * Returns the regulatory domain associated with the wiphy. + * + * Requires either RTNL or RCU protection + */ const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) { return rcu_dereference_rtnl(wiphy->regd); @@ -2571,9 +2576,13 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, if (IS_ERR(new_regd)) return; + rtnl_lock(); + tmp = get_wiphy_regdom(wiphy); rcu_assign_pointer(wiphy->regd, new_regd); rcu_free_regdom(tmp); + + rtnl_unlock(); } EXPORT_SYMBOL(wiphy_apply_custom_regulatory); -- GitLab From 0378c625afe80eb3f212adae42cc33c9f6f31abf Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 6 Jan 2021 18:19:05 -0500 Subject: [PATCH 0397/2012] dm: eliminate potential source of excessive kernel log noise There wasn't ever a real need to log an error in the kernel log for ioctls issued with insufficient permissions. Simply return an error and if an admin/user is sufficiently motivated they can enable DM's dynamic debugging to see an explanation for why the ioctls were disallowed. Reported-by: Nir Soffer Fixes: e980f62353c6 ("dm: don't allow ioctls to targets that don't map to whole devices") Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b3c3c8b4cb428..7bac564f3faa6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -562,7 +562,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, * subset of the parent bdev; require extra privileges. */ if (!capable(CAP_SYS_RAWIO)) { - DMWARN_LIMIT( + DMDEBUG_LIMIT( "%s: sending ioctl %x to DM device without required privilege.", current->comm, cmd); r = -ENOIOCTLCMD; -- GitLab From 9b5948267adc9e689da609eb61cf7ed49cae5fa8 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 8 Jan 2021 11:15:56 -0500 Subject: [PATCH 0398/2012] dm integrity: fix flush with external metadata device With external metadata device, flush requests are not passed down to the data device. Fix this by submitting the flush request in dm_integrity_flush_buffers. In order to not degrade performance, we overlap the data device flush with the metadata device flush. Reported-by: Lukas Straub Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 6 ++++ drivers/md/dm-integrity.c | 60 ++++++++++++++++++++++++++++++++------- include/linux/dm-bufio.h | 1 + 3 files changed, 56 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 9c1a86bde658e..fce4cbf9529d6 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1534,6 +1534,12 @@ sector_t dm_bufio_get_device_size(struct dm_bufio_client *c) } EXPORT_SYMBOL_GPL(dm_bufio_get_device_size); +struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c) +{ + return c->dm_io; +} +EXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client); + sector_t dm_bufio_get_block_number(struct dm_buffer *b) { return b->block; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 5a7a1b90e671c..11c7c538f7a98 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -1379,12 +1379,52 @@ thorough_test: #undef MAY_BE_HASH } -static void dm_integrity_flush_buffers(struct dm_integrity_c *ic) +struct flush_request { + struct dm_io_request io_req; + struct dm_io_region io_reg; + struct dm_integrity_c *ic; + struct completion comp; +}; + +static void flush_notify(unsigned long error, void *fr_) +{ + struct flush_request *fr = fr_; + if (unlikely(error != 0)) + dm_integrity_io_error(fr->ic, "flusing disk cache", -EIO); + complete(&fr->comp); +} + +static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data) { int r; + + struct flush_request fr; + + if (!ic->meta_dev) + flush_data = false; + if (flush_data) { + fr.io_req.bi_op = REQ_OP_WRITE, + fr.io_req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC, + fr.io_req.mem.type = DM_IO_KMEM, + fr.io_req.mem.ptr.addr = NULL, + fr.io_req.notify.fn = flush_notify, + fr.io_req.notify.context = &fr; + fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio), + fr.io_reg.bdev = ic->dev->bdev, + fr.io_reg.sector = 0, + fr.io_reg.count = 0, + fr.ic = ic; + init_completion(&fr.comp); + r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL); + BUG_ON(r); + } + r = dm_bufio_write_dirty_buffers(ic->bufio); if (unlikely(r)) dm_integrity_io_error(ic, "writing tags", r); + + if (flush_data) + wait_for_completion(&fr.comp); } static void sleep_on_endio_wait(struct dm_integrity_c *ic) @@ -2110,7 +2150,7 @@ offload_to_thread: if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) { integrity_metadata(&dio->work); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, false); dio->in_flight = (atomic_t)ATOMIC_INIT(1); dio->completion = NULL; @@ -2195,7 +2235,7 @@ static void integrity_commit(struct work_struct *w) flushes = bio_list_get(&ic->flush_bio_list); if (unlikely(ic->mode != 'J')) { spin_unlock_irq(&ic->endio_wait.lock); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); goto release_flush_bios; } @@ -2409,7 +2449,7 @@ skip_io: complete_journal_op(&comp); wait_for_completion_io(&comp.comp); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); } static void integrity_writer(struct work_struct *w) @@ -2451,7 +2491,7 @@ static void recalc_write_super(struct dm_integrity_c *ic) { int r; - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, false); if (dm_integrity_failed(ic)) return; @@ -2654,7 +2694,7 @@ static void bitmap_flush_work(struct work_struct *work) unsigned long limit; struct bio *bio; - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, false); range.logical_sector = 0; range.n_sectors = ic->provided_data_sectors; @@ -2663,9 +2703,7 @@ static void bitmap_flush_work(struct work_struct *work) add_new_range_and_wait(ic, &range); spin_unlock_irq(&ic->endio_wait.lock); - dm_integrity_flush_buffers(ic); - if (ic->meta_dev) - blkdev_issue_flush(ic->dev->bdev, GFP_NOIO); + dm_integrity_flush_buffers(ic, true); limit = ic->provided_data_sectors; if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { @@ -2934,11 +2972,11 @@ static void dm_integrity_postsuspend(struct dm_target *ti) if (ic->meta_dev) queue_work(ic->writer_wq, &ic->writer_work); drain_workqueue(ic->writer_wq); - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); } if (ic->mode == 'B') { - dm_integrity_flush_buffers(ic); + dm_integrity_flush_buffers(ic, true); #if 1 /* set to 0 to test bitmap replay code */ init_journal(ic, 0, ic->journal_sections, 0); diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 29d255fdd5d64..90bd558a17f51 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -150,6 +150,7 @@ void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n); unsigned dm_bufio_get_block_size(struct dm_bufio_client *c); sector_t dm_bufio_get_device_size(struct dm_bufio_client *c); +struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c); sector_t dm_bufio_get_block_number(struct dm_buffer *b); void *dm_bufio_get_block_data(struct dm_buffer *b); void *dm_bufio_get_aux_data(struct dm_buffer *b); -- GitLab From 792001f4f7aa036b1f1c1ed7bce44bb49126208a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 18 Dec 2020 15:56:12 -0800 Subject: [PATCH 0399/2012] libbpf: Add user-space variants of BPF_CORE_READ() family of macros Add BPF_CORE_READ_USER(), BPF_CORE_READ_USER_STR() and their _INTO() variations to allow reading CO-RE-relocatable kernel data structures from the user-space. One of such cases is reading input arguments of syscalls, while reaping the benefits of CO-RE relocations w.r.t. handling 32/64 bit conversions and handling missing/new fields in UAPI data structs. Suggested-by: Gilad Reti Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201218235614.2284956-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf_core_read.h | 98 +++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 39 deletions(-) diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index bbcefb3ff5a57..db0c735ceb53b 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -195,17 +195,20 @@ enum bpf_enum_value_kind { * (local) BTF, used to record relocation. */ #define bpf_core_read(dst, sz, src) \ - bpf_probe_read_kernel(dst, sz, \ - (const void *)__builtin_preserve_access_index(src)) + bpf_probe_read_kernel(dst, sz, (const void *)__builtin_preserve_access_index(src)) +#define bpf_core_read_user(dst, sz, src) \ + bpf_probe_read_user(dst, sz, (const void *)__builtin_preserve_access_index(src)) /* * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str() * additionally emitting BPF CO-RE field relocation for specified source * argument. */ #define bpf_core_read_str(dst, sz, src) \ - bpf_probe_read_kernel_str(dst, sz, \ - (const void *)__builtin_preserve_access_index(src)) + bpf_probe_read_kernel_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) + +#define bpf_core_read_user_str(dst, sz, src) \ + bpf_probe_read_user_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) #define ___concat(a, b) a ## b #define ___apply(fn, n) ___concat(fn, n) @@ -264,30 +267,29 @@ enum bpf_enum_value_kind { read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) /* "recursively" read a sequence of inner pointers using local __t var */ -#define ___rd_first(src, a) ___read(bpf_core_read, &__t, ___type(src), src, a); -#define ___rd_last(...) \ - ___read(bpf_core_read, &__t, \ - ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__)); -#define ___rd_p1(...) const void *__t; ___rd_first(__VA_ARGS__) -#define ___rd_p2(...) ___rd_p1(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p3(...) ___rd_p2(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p4(...) ___rd_p3(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p5(...) ___rd_p4(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p6(...) ___rd_p5(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p7(...) ___rd_p6(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p8(...) ___rd_p7(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p9(...) ___rd_p8(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___read_ptrs(src, ...) \ - ___apply(___rd_p, ___narg(__VA_ARGS__))(src, __VA_ARGS__) - -#define ___core_read0(fn, dst, src, a) \ +#define ___rd_first(fn, src, a) ___read(fn, &__t, ___type(src), src, a); +#define ___rd_last(fn, ...) \ + ___read(fn, &__t, ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__)); +#define ___rd_p1(fn, ...) const void *__t; ___rd_first(fn, __VA_ARGS__) +#define ___rd_p2(fn, ...) ___rd_p1(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p3(fn, ...) ___rd_p2(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p4(fn, ...) ___rd_p3(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p5(fn, ...) ___rd_p4(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p6(fn, ...) ___rd_p5(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p7(fn, ...) ___rd_p6(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p8(fn, ...) ___rd_p7(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p9(fn, ...) ___rd_p8(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___read_ptrs(fn, src, ...) \ + ___apply(___rd_p, ___narg(__VA_ARGS__))(fn, src, __VA_ARGS__) + +#define ___core_read0(fn, fn_ptr, dst, src, a) \ ___read(fn, dst, ___type(src), src, a); -#define ___core_readN(fn, dst, src, ...) \ - ___read_ptrs(src, ___nolast(__VA_ARGS__)) \ +#define ___core_readN(fn, fn_ptr, dst, src, ...) \ + ___read_ptrs(fn_ptr, src, ___nolast(__VA_ARGS__)) \ ___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t, \ ___last(__VA_ARGS__)); -#define ___core_read(fn, dst, src, a, ...) \ - ___apply(___core_read, ___empty(__VA_ARGS__))(fn, dst, \ +#define ___core_read(fn, fn_ptr, dst, src, a, ...) \ + ___apply(___core_read, ___empty(__VA_ARGS__))(fn, fn_ptr, dst, \ src, a, ##__VA_ARGS__) /* @@ -295,20 +297,32 @@ enum bpf_enum_value_kind { * BPF_CORE_READ(), in which final field is read into user-provided storage. * See BPF_CORE_READ() below for more details on general usage. */ -#define BPF_CORE_READ_INTO(dst, src, a, ...) \ - ({ \ - ___core_read(bpf_core_read, dst, (src), a, ##__VA_ARGS__) \ - }) +#define BPF_CORE_READ_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read, bpf_core_read, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* Variant of BPF_CORE_READ_INTO() for reading from user-space memory */ +#define BPF_CORE_READ_USER_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read_user, bpf_core_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) /* * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as * BPF_CORE_READ() for intermediate pointers, but then executes (and returns * corresponding error code) bpf_core_read_str() for final string read. */ -#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \ - ({ \ - ___core_read(bpf_core_read_str, dst, (src), a, ##__VA_ARGS__)\ - }) +#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read_str, bpf_core_read, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* Variant of BPF_CORE_READ_STR_INTO() for reading from user-space memory */ +#define BPF_CORE_READ_USER_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read_user_str, bpf_core_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) /* * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially @@ -334,12 +348,18 @@ enum bpf_enum_value_kind { * N.B. Only up to 9 "field accessors" are supported, which should be more * than enough for any practical purpose. */ -#define BPF_CORE_READ(src, a, ...) \ - ({ \ - ___type((src), a, ##__VA_ARGS__) __r; \ - BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ - __r; \ - }) +#define BPF_CORE_READ(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) + +/* Variant of BPF_CORE_READ() for reading from user-space memory */ +#define BPF_CORE_READ_USER(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_CORE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) #endif -- GitLab From a4b09a9ef9451b09d87550720f8db3ae280b3eea Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 18 Dec 2020 15:56:13 -0800 Subject: [PATCH 0400/2012] libbpf: Add non-CO-RE variants of BPF_CORE_READ() macro family BPF_CORE_READ(), in addition to handling CO-RE relocations, also allows much nicer way to read data structures with nested pointers. Instead of writing a sequence of bpf_probe_read() calls to follow links, one can just write BPF_CORE_READ(a, b, c, d) to effectively do a->b->c->d read. This is a welcome ability when porting BCC code, which (in most cases) allows exactly the intuitive a->b->c->d variant. This patch adds non-CO-RE variants of BPF_CORE_READ() family of macros for cases where CO-RE is not supported (e.g., old kernels). In such cases, the property of shortening a sequence of bpf_probe_read()s to a simple BPF_PROBE_READ(a, b, c, d) invocation is still desirable, especially when porting BCC code to libbpf. Yet, no CO-RE relocation is going to be emitted. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201218235614.2284956-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf_core_read.h | 38 +++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index db0c735ceb53b..9456aabcb03a2 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -308,6 +308,18 @@ enum bpf_enum_value_kind { dst, (src), a, ##__VA_ARGS__) \ }) +/* Non-CO-RE variant of BPF_CORE_READ_INTO() */ +#define BPF_PROBE_READ_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read, bpf_probe_read, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* Non-CO-RE variant of BPF_CORE_READ_USER_INTO() */ +#define BPF_PROBE_READ_USER_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read_user, bpf_probe_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + /* * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as * BPF_CORE_READ() for intermediate pointers, but then executes (and returns @@ -324,6 +336,18 @@ enum bpf_enum_value_kind { dst, (src), a, ##__VA_ARGS__) \ }) +/* Non-CO-RE variant of BPF_CORE_READ_STR_INTO() */ +#define BPF_PROBE_READ_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read_str, bpf_probe_read, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* Non-CO-RE variant of BPF_CORE_READ_USER_STR_INTO() */ +#define BPF_PROBE_READ_USER_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read_user_str, bpf_probe_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + /* * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially * when there are few pointer chasing steps. @@ -361,5 +385,19 @@ enum bpf_enum_value_kind { __r; \ }) +/* Non-CO-RE variant of BPF_CORE_READ() */ +#define BPF_PROBE_READ(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_PROBE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) + +/* Non-CO-RE variant of BPF_CORE_READ_USER() */ +#define BPF_PROBE_READ_USER(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_PROBE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) + #endif -- GitLab From 9e80114b1a271803767d4c5baa11ea9e8678aac3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 18 Dec 2020 15:56:14 -0800 Subject: [PATCH 0401/2012] selftests/bpf: Add tests for user- and non-CO-RE BPF_CORE_READ() variants Add selftests validating that newly added variations of BPF_CORE_READ(), for use with user-space addresses and for non-CO-RE reads, work as expected. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201218235614.2284956-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/core_read_macros.c | 64 +++++++++++++++++++ .../bpf/progs/test_core_read_macros.c | 50 +++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/core_read_macros.c create mode 100644 tools/testing/selftests/bpf/progs/test_core_read_macros.c diff --git a/tools/testing/selftests/bpf/prog_tests/core_read_macros.c b/tools/testing/selftests/bpf/prog_tests/core_read_macros.c new file mode 100644 index 0000000000000..96f5cf3c6fa25 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/core_read_macros.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include + +struct callback_head { + struct callback_head *next; + void (*func)(struct callback_head *head); +}; + +/* ___shuffled flavor is just an illusion for BPF code, it doesn't really + * exist and user-space needs to provide data in the memory layout that + * matches callback_head. We just defined ___shuffled flavor to make it easier + * to work with the skeleton + */ +struct callback_head___shuffled { + struct callback_head___shuffled *next; + void (*func)(struct callback_head *head); +}; + +#include "test_core_read_macros.skel.h" + +void test_core_read_macros(void) +{ + int duration = 0, err; + struct test_core_read_macros* skel; + struct test_core_read_macros__bss *bss; + struct callback_head u_probe_in; + struct callback_head___shuffled u_core_in; + + skel = test_core_read_macros__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + bss = skel->bss; + bss->my_pid = getpid(); + + /* next pointers have to be set from the kernel side */ + bss->k_probe_in.func = (void *)(long)0x1234; + bss->k_core_in.func = (void *)(long)0xabcd; + + u_probe_in.next = &u_probe_in; + u_probe_in.func = (void *)(long)0x5678; + bss->u_probe_in = &u_probe_in; + + u_core_in.next = &u_core_in; + u_core_in.func = (void *)(long)0xdbca; + bss->u_core_in = &u_core_in; + + err = test_core_read_macros__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + ASSERT_EQ(bss->k_probe_out, 0x1234, "k_probe_out"); + ASSERT_EQ(bss->k_core_out, 0xabcd, "k_core_out"); + + ASSERT_EQ(bss->u_probe_out, 0x5678, "u_probe_out"); + ASSERT_EQ(bss->u_core_out, 0xdbca, "u_core_out"); + +cleanup: + test_core_read_macros__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_core_read_macros.c b/tools/testing/selftests/bpf/progs/test_core_read_macros.c new file mode 100644 index 0000000000000..fd54caa173198 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_read_macros.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include "vmlinux.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +/* shuffled layout for relocatable (CO-RE) reads */ +struct callback_head___shuffled { + void (*func)(struct callback_head___shuffled *head); + struct callback_head___shuffled *next; +}; + +struct callback_head k_probe_in = {}; +struct callback_head___shuffled k_core_in = {}; + +struct callback_head *u_probe_in = 0; +struct callback_head___shuffled *u_core_in = 0; + +long k_probe_out = 0; +long u_probe_out = 0; + +long k_core_out = 0; +long u_core_out = 0; + +int my_pid = 0; + +SEC("raw_tracepoint/sys_enter") +int handler(void *ctx) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + + if (my_pid != pid) + return 0; + + /* next pointers for kernel address space have to be initialized from + * BPF side, user-space mmaped addresses are stil user-space addresses + */ + k_probe_in.next = &k_probe_in; + __builtin_preserve_access_index(({k_core_in.next = &k_core_in;})); + + k_probe_out = (long)BPF_PROBE_READ(&k_probe_in, next, next, func); + k_core_out = (long)BPF_CORE_READ(&k_core_in, next, next, func); + u_probe_out = (long)BPF_PROBE_READ_USER(u_probe_in, next, next, func); + u_core_out = (long)BPF_CORE_READ_USER(u_core_in, next, next, func); + + return 0; +} -- GitLab From 619775c3cfd2bc8559abc4395bf7d85b72bd723f Mon Sep 17 00:00:00 2001 From: Leah Neukirchen Date: Wed, 16 Dec 2020 11:03:06 +0100 Subject: [PATCH 0402/2012] bpf: Remove unnecessary include from preload/iterators This program does not use argp (which is a glibcism). Instead include directly, which was pulled in by . Signed-off-by: Leah Neukirchen Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201216100306.30942-1-leah@vuxu.org Signed-off-by: Alexei Starovoitov --- kernel/bpf/preload/iterators/iterators.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/preload/iterators/iterators.c b/kernel/bpf/preload/iterators/iterators.c index b7ff879391722..5d872a705470a 100644 --- a/kernel/bpf/preload/iterators/iterators.c +++ b/kernel/bpf/preload/iterators/iterators.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include +#include #include #include #include -- GitLab From ec24e11e0817404ef9e04b50170e1a68793cd9f5 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Tue, 29 Dec 2020 21:48:34 +0800 Subject: [PATCH 0403/2012] bpf: Replace fput with sockfd_put in sock map The function sockfd_lookup uses fget on the value that is stored in the file field of the returned structure, so fput should ultimately be applied to this value. This can be done directly, but it seems better to use the specific macro sockfd_put, which does the same thing. The cleanup was done using the following semantic patch: (http://www.emn.fr/x-info/coccinelle/) // @@ expression s; @@ s = sockfd_lookup(...) ... + sockfd_put(s); ?- fput(s->file); // Signed-off-by: Zheng Yongjun Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20201229134834.22962-1-zhengyongjun3@huawei.com Signed-off-by: Alexei Starovoitov --- net/core/sock_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 64b5ec14ff50c..d758fb83c8841 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -602,7 +602,7 @@ int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, ret = sock_hash_update_common(map, key, sk, flags); sock_map_sk_release(sk); out: - fput(sock->file); + sockfd_put(sock); return ret; } -- GitLab From 43b5169d8355ccf26d726fbc75f083b2429113e4 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 22 Dec 2020 22:09:28 +0100 Subject: [PATCH 0404/2012] net, xdp: Introduce xdp_init_buff utility routine Introduce xdp_init_buff utility routine to initialize xdp_buff fields const over NAPI iterations (e.g. frame_sz or rxq pointer). Rely on xdp_init_buff in all XDP capable drivers. Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Duyck Acked-by: Jesper Dangaard Brouer Acked-by: John Fastabend Acked-by: Shay Agroskin Acked-by: Martin Habets Acked-by: Camelia Groza Acked-by: Marcin Wojtas Link: https://lore.kernel.org/bpf/7f8329b6da1434dc2b05a77f2e800b29628a8913.1608670965.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 +-- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 4 ++-- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 4 ++-- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 4 ++-- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 8 ++++---- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 6 +++--- drivers/net/ethernet/intel/ice/ice_txrx.c | 6 +++--- drivers/net/ethernet/intel/igb/igb_main.c | 6 +++--- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 7 +++---- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 7 +++---- drivers/net/ethernet/marvell/mvneta.c | 3 +-- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 8 +++++--- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 +-- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 4 ++-- drivers/net/ethernet/qlogic/qede/qede_fp.c | 3 +-- drivers/net/ethernet/sfc/rx.c | 3 +-- drivers/net/ethernet/socionext/netsec.c | 3 +-- drivers/net/ethernet/ti/cpsw.c | 4 ++-- drivers/net/ethernet/ti/cpsw_new.c | 4 ++-- drivers/net/hyperv/netvsc_bpf.c | 3 +-- drivers/net/tun.c | 7 +++---- drivers/net/veth.c | 8 ++++---- drivers/net/virtio_net.c | 6 ++---- drivers/net/xen-netfront.c | 4 ++-- include/net/xdp.h | 7 +++++++ net/bpf/test_run.c | 4 ++-- net/core/dev.c | 8 ++++---- 28 files changed, 68 insertions(+), 72 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 06596fa1f9fea..43331f6967c9e 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1634,8 +1634,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "%s qid %d\n", __func__, rx_ring->qid); res_budget = budget; - xdp.rxq = &rx_ring->xdp_rxq; - xdp.frame_sz = ENA_PAGE_SIZE; + xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq); do { xdp_verdict = XDP_PASS; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index fcc262064766a..ab805d6750e59 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -133,12 +133,12 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir); txr = rxr->bnapi->tx_ring; + /* BNXT_RX_PAGE_MODE(bp) when XDP enabled */ + xdp_init_buff(&xdp, PAGE_SIZE, &rxr->xdp_rxq); xdp.data_hard_start = *data_ptr - offset; xdp.data = *data_ptr; xdp_set_data_meta_invalid(&xdp); xdp.data_end = *data_ptr + *len; - xdp.rxq = &rxr->xdp_rxq; - xdp.frame_sz = PAGE_SIZE; /* BNXT_RX_PAGE_MODE(bp) when XDP enabled */ orig_data = xdp.data; rcu_read_lock(); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index f3b7b443f9648..9fc672f075f2b 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -547,12 +547,12 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, cpu_addr = (u64)phys_to_virt(cpu_addr); page = virt_to_page((void *)cpu_addr); + xdp_init_buff(&xdp, RCV_FRAG_LEN + XDP_PACKET_HEADROOM, + &rq->xdp_rxq); xdp.data_hard_start = page_address(page); xdp.data = (void *)cpu_addr; xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + len; - xdp.rxq = &rq->xdp_rxq; - xdp.frame_sz = RCV_FRAG_LEN + XDP_PACKET_HEADROOM; orig_data = xdp.data; rcu_read_lock(); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 4360ce4d3fb6a..26e20b96fd96a 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2532,12 +2532,12 @@ static u32 dpaa_run_xdp(struct dpaa_priv *priv, struct qm_fd *fd, void *vaddr, return XDP_PASS; } + xdp_init_buff(&xdp, DPAA_BP_RAW_SIZE - DPAA_TX_PRIV_DATA_SIZE, + &dpaa_fq->xdp_rxq); xdp.data = vaddr + fd_off; xdp.data_meta = xdp.data; xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM; xdp.data_end = xdp.data + qm_fd_get_length(fd); - xdp.frame_sz = DPAA_BP_RAW_SIZE - DPAA_TX_PRIV_DATA_SIZE; - xdp.rxq = &dpaa_fq->xdp_rxq; /* We reserve a fixed headroom of 256 bytes under the erratum and we * offer it all to XDP programs to use. If no room is left for the diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index fb0bcd18ec0c1..3a7892a66fe54 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -358,14 +358,14 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv, if (!xdp_prog) goto out; + xdp_init_buff(&xdp, + DPAA2_ETH_RX_BUF_RAW_SIZE - + (dpaa2_fd_get_offset(fd) - XDP_PACKET_HEADROOM), + &ch->xdp_rxq); xdp.data = vaddr + dpaa2_fd_get_offset(fd); xdp.data_end = xdp.data + dpaa2_fd_get_len(fd); xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM; xdp_set_data_meta_invalid(&xdp); - xdp.rxq = &ch->xdp_rxq; - - xdp.frame_sz = DPAA2_ETH_RX_BUF_RAW_SIZE - - (dpaa2_fd_get_offset(fd) - XDP_PACKET_HEADROOM); xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 4aca637d4a23c..a87fb8264d0cf 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2344,7 +2344,7 @@ static void i40e_inc_ntc(struct i40e_ring *rx_ring) **/ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) { - unsigned int total_rx_bytes = 0, total_rx_packets = 0; + unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0; struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); unsigned int xdp_xmit = 0; @@ -2352,9 +2352,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) struct xdp_buff xdp; #if (PAGE_SIZE < 8192) - xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, 0); + frame_sz = i40e_rx_frame_truesize(rx_ring, 0); #endif - xdp.rxq = &rx_ring->xdp_rxq; + xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); while (likely(total_rx_packets < (unsigned int)budget)) { struct i40e_rx_buffer *rx_buffer; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index a2d0aad8cfdd7..500e93bf6238f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1089,18 +1089,18 @@ ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, */ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) { - unsigned int total_rx_bytes = 0, total_rx_pkts = 0; + unsigned int total_rx_bytes = 0, total_rx_pkts = 0, frame_sz = 0; u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); unsigned int xdp_res, xdp_xmit = 0; struct bpf_prog *xdp_prog = NULL; struct xdp_buff xdp; bool failure; - xdp.rxq = &rx_ring->xdp_rxq; /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ #if (PAGE_SIZE < 8192) - xdp.frame_sz = ice_rx_frame_truesize(rx_ring, 0); + frame_sz = ice_rx_frame_truesize(rx_ring, 0); #endif + xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); /* start the loop to process Rx packets bounded by 'budget' */ while (likely(total_rx_pkts < (unsigned int)budget)) { diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 03f78fdb0dcdd..cf9e8b7d2c70d 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -8681,13 +8681,13 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) u16 cleaned_count = igb_desc_unused(rx_ring); unsigned int xdp_xmit = 0; struct xdp_buff xdp; - - xdp.rxq = &rx_ring->xdp_rxq; + u32 frame_sz = 0; /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ #if (PAGE_SIZE < 8192) - xdp.frame_sz = igb_rx_frame_truesize(rx_ring, 0); + frame_sz = igb_rx_frame_truesize(rx_ring, 0); #endif + xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); while (likely(total_packets < budget)) { union e1000_adv_rx_desc *rx_desc; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 6cbbe09ce8a0d..7a7c86835a87d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2291,7 +2291,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, struct ixgbe_ring *rx_ring, const int budget) { - unsigned int total_rx_bytes = 0, total_rx_packets = 0; + unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0; struct ixgbe_adapter *adapter = q_vector->adapter; #ifdef IXGBE_FCOE int ddp_bytes; @@ -2301,12 +2301,11 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, unsigned int xdp_xmit = 0; struct xdp_buff xdp; - xdp.rxq = &rx_ring->xdp_rxq; - /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ #if (PAGE_SIZE < 8192) - xdp.frame_sz = ixgbe_rx_frame_truesize(rx_ring, 0); + frame_sz = ixgbe_rx_frame_truesize(rx_ring, 0); #endif + xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 4061cd7db5dd7..624efcd71569a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1121,19 +1121,18 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget) { - unsigned int total_rx_bytes = 0, total_rx_packets = 0; + unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0; struct ixgbevf_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbevf_desc_unused(rx_ring); struct sk_buff *skb = rx_ring->skb; bool xdp_xmit = false; struct xdp_buff xdp; - xdp.rxq = &rx_ring->xdp_rxq; - /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ #if (PAGE_SIZE < 8192) - xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, 0); + frame_sz = ixgbevf_rx_frame_truesize(rx_ring, 0); #endif + xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); while (likely(total_rx_packets < budget)) { struct ixgbevf_rx_buffer *rx_buffer; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index bc4d8d1444019..038c6b436cba4 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2363,9 +2363,8 @@ static int mvneta_rx_swbm(struct napi_struct *napi, u32 desc_status, frame_sz; struct xdp_buff xdp_buf; + xdp_init_buff(&xdp_buf, PAGE_SIZE, &rxq->xdp_rxq); xdp_buf.data_hard_start = NULL; - xdp_buf.frame_sz = PAGE_SIZE; - xdp_buf.rxq = &rxq->xdp_rxq; sinfo.nr_frags = 0; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 4b1808acef581..5872cb011ae15 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -3563,16 +3563,18 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, frag_size = bm_pool->frag_size; if (xdp_prog) { + struct xdp_rxq_info *xdp_rxq; + xdp.data_hard_start = data; xdp.data = data + MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM; xdp.data_end = xdp.data + rx_bytes; - xdp.frame_sz = PAGE_SIZE; if (bm_pool->pkt_size == MVPP2_BM_SHORT_PKT_SIZE) - xdp.rxq = &rxq->xdp_rxq_short; + xdp_rxq = &rxq->xdp_rxq_short; else - xdp.rxq = &rxq->xdp_rxq_long; + xdp_rxq = &rxq->xdp_rxq_long; + xdp_init_buff(&xdp, PAGE_SIZE, xdp_rxq); xdp_set_data_meta_invalid(&xdp); ret = mvpp2_run_xdp(port, rxq, xdp_prog, &xdp, pp, &ps); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index c1c9118a66c93..93da9c2d50c07 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -682,8 +682,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */ rcu_read_lock(); xdp_prog = rcu_dereference(ring->xdp_prog); - xdp.rxq = &ring->xdp_rxq; - xdp.frame_sz = priv->frag_info[0].frag_stride; + xdp_init_buff(&xdp, priv->frag_info[0].frag_stride, &ring->xdp_rxq); doorbell_pending = false; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7f5851c612181..bc7c81f2b0360 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1126,12 +1126,11 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va, static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom, u32 len, struct xdp_buff *xdp) { + xdp_init_buff(xdp, rq->buff.frame0_sz, &rq->xdp_rxq); xdp->data_hard_start = va; xdp->data = va + headroom; xdp_set_data_meta_invalid(xdp); xdp->data_end = xdp->data + len; - xdp->rxq = &rq->xdp_rxq; - xdp->frame_sz = rq->buff.frame0_sz; } static struct sk_buff * diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 7ba8f4c7f26d5..513bc60bcd09c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1822,8 +1822,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) rcu_read_lock(); xdp_prog = READ_ONCE(dp->xdp_prog); true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; - xdp.frame_sz = PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM; - xdp.rxq = &rx_ring->xdp_rxq; + xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM, + &rx_ring->xdp_rxq); tx_ring = r_vec->xdp_ring; while (pkts_polled < budget) { diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index ca0ee29a57b50..8d0c6d62022a1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1090,12 +1090,11 @@ static bool qede_rx_xdp(struct qede_dev *edev, struct xdp_buff xdp; enum xdp_action act; + xdp_init_buff(&xdp, rxq->rx_buf_seg_size, &rxq->xdp_rxq); xdp.data_hard_start = page_address(bd->data); xdp.data = xdp.data_hard_start + *data_offset; xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + *len; - xdp.rxq = &rxq->xdp_rxq; - xdp.frame_sz = rxq->rx_buf_seg_size; /* PAGE_SIZE when XDP enabled */ /* Queues always have a full reset currently, so for the time * being until there's atomic program replace just mark read diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index aaa112877561f..eaa6650955d1a 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -293,14 +293,13 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel, memcpy(rx_prefix, *ehp - efx->rx_prefix_size, efx->rx_prefix_size); + xdp_init_buff(&xdp, efx->rx_page_buf_step, &rx_queue->xdp_rxq_info); xdp.data = *ehp; xdp.data_hard_start = xdp.data - EFX_XDP_HEADROOM; /* No support yet for XDP metadata */ xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + rx_buf->len; - xdp.rxq = &rx_queue->xdp_rxq_info; - xdp.frame_sz = efx->rx_page_buf_step; xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp); rcu_read_unlock(); diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 19d20a6d0d445..945ca9517bf94 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -956,8 +956,7 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) u32 xdp_act = 0; int done = 0; - xdp.rxq = &dring->xdp_rxq; - xdp.frame_sz = PAGE_SIZE; + xdp_init_buff(&xdp, PAGE_SIZE, &dring->xdp_rxq); rcu_read_lock(); xdp_prog = READ_ONCE(priv->xdp_prog); diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index b0f00b4edd949..78a923391828e 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -392,6 +392,8 @@ static void cpsw_rx_handler(void *token, int len, int status) } if (priv->xdp_prog) { + xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]); + if (status & CPDMA_RX_VLAN_ENCAP) { xdp.data = pa + CPSW_HEADROOM + CPSW_RX_VLAN_ENCAP_HDR_SIZE; @@ -405,8 +407,6 @@ static void cpsw_rx_handler(void *token, int len, int status) xdp_set_data_meta_invalid(&xdp); xdp.data_hard_start = pa; - xdp.rxq = &priv->xdp_rxq[ch]; - xdp.frame_sz = PAGE_SIZE; port = priv->emac_port + cpsw->data.dual_emac; ret = cpsw_run_xdp(priv, ch, &xdp, page, port); diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 2f5e0ad23ad7c..1b3385ec9645f 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -335,6 +335,8 @@ static void cpsw_rx_handler(void *token, int len, int status) } if (priv->xdp_prog) { + xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]); + if (status & CPDMA_RX_VLAN_ENCAP) { xdp.data = pa + CPSW_HEADROOM + CPSW_RX_VLAN_ENCAP_HDR_SIZE; @@ -348,8 +350,6 @@ static void cpsw_rx_handler(void *token, int len, int status) xdp_set_data_meta_invalid(&xdp); xdp.data_hard_start = pa; - xdp.rxq = &priv->xdp_rxq[ch]; - xdp.frame_sz = PAGE_SIZE; ret = cpsw_run_xdp(priv, ch, &xdp, page, priv->emac_port); if (ret != CPSW_XDP_PASS) diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c index 440486d9c999e..14a7ee4c68998 100644 --- a/drivers/net/hyperv/netvsc_bpf.c +++ b/drivers/net/hyperv/netvsc_bpf.c @@ -44,12 +44,11 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan, goto out; } + xdp_init_buff(xdp, PAGE_SIZE, &nvchan->xdp_rxq); xdp->data_hard_start = page_address(page); xdp->data = xdp->data_hard_start + NETVSC_XDP_HDRM; xdp_set_data_meta_invalid(xdp); xdp->data_end = xdp->data + len; - xdp->rxq = &nvchan->xdp_rxq; - xdp->frame_sz = PAGE_SIZE; memcpy(xdp->data, data, len); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 02a93cfdb6b1a..3b7728433a863 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1599,12 +1599,11 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, struct xdp_buff xdp; u32 act; + xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq); xdp.data_hard_start = buf; xdp.data = buf + pad; xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + len; - xdp.rxq = &tfile->xdp_rxq; - xdp.frame_sz = buflen; act = bpf_prog_run_xdp(xdp_prog, &xdp); if (act == XDP_REDIRECT || act == XDP_TX) { @@ -2342,9 +2341,9 @@ static int tun_xdp_one(struct tun_struct *tun, skb_xdp = true; goto build; } + + xdp_init_buff(xdp, buflen, &tfile->xdp_rxq); xdp_set_data_meta_invalid(xdp); - xdp->rxq = &tfile->xdp_rxq; - xdp->frame_sz = buflen; act = bpf_prog_run_xdp(xdp_prog, xdp); err = tun_xdp_act(tun, xdp_prog, xdp, act); diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 02bfcdf50a7ac..25f3601fb6dd5 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -654,7 +654,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct veth_xdp_tx_bq *bq, struct veth_stats *stats) { - u32 pktlen, headroom, act, metalen; + u32 pktlen, headroom, act, metalen, frame_sz; void *orig_data, *orig_data_end; struct bpf_prog *xdp_prog; int mac_len, delta, off; @@ -714,11 +714,11 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, xdp.data = skb_mac_header(skb); xdp.data_end = xdp.data + pktlen; xdp.data_meta = xdp.data; - xdp.rxq = &rq->xdp_rxq; /* SKB "head" area always have tailroom for skb_shared_info */ - xdp.frame_sz = (void *)skb_end_pointer(skb) - xdp.data_hard_start; - xdp.frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + frame_sz = (void *)skb_end_pointer(skb) - xdp.data_hard_start; + frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + xdp_init_buff(&xdp, frame_sz, &rq->xdp_rxq); orig_data = xdp.data; orig_data_end = xdp.data_end; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 508408fbe78fb..36b0f81bcd7aa 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -689,12 +689,11 @@ static struct sk_buff *receive_small(struct net_device *dev, page = xdp_page; } + xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; xdp.data = xdp.data_hard_start + xdp_headroom; xdp.data_end = xdp.data + len; xdp.data_meta = xdp.data; - xdp.rxq = &rq->xdp_rxq; - xdp.frame_sz = buflen; orig_data = xdp.data; act = bpf_prog_run_xdp(xdp_prog, &xdp); stats->xdp_packets++; @@ -859,12 +858,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, * the descriptor on if we get an XDP_TX return code. */ data = page_address(xdp_page) + offset; + xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq); xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; xdp.data = data + vi->hdr_len; xdp.data_end = xdp.data + (len - vi->hdr_len); xdp.data_meta = xdp.data; - xdp.rxq = &rq->xdp_rxq; - xdp.frame_sz = frame_sz - vi->hdr_len; act = bpf_prog_run_xdp(xdp_prog, &xdp); stats->xdp_packets++; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index b01848ef46493..329397c60d846 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -864,12 +864,12 @@ static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata, u32 act; int err; + xdp_init_buff(xdp, XEN_PAGE_SIZE - XDP_PACKET_HEADROOM, + &queue->xdp_rxq); xdp->data_hard_start = page_address(pdata); xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM; xdp_set_data_meta_invalid(xdp); xdp->data_end = xdp->data + len; - xdp->rxq = &queue->xdp_rxq; - xdp->frame_sz = XEN_PAGE_SIZE - XDP_PACKET_HEADROOM; act = bpf_prog_run_xdp(prog, xdp); switch (act) { diff --git a/include/net/xdp.h b/include/net/xdp.h index 600acb307db61..8a589f7e08e04 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -76,6 +76,13 @@ struct xdp_buff { u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/ }; +static __always_inline void +xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) +{ + xdp->frame_sz = frame_sz; + xdp->rxq = rxq; +} + /* Reserve memory area at end-of data area. * * This macro reserves tailroom in the XDP buffer by limiting the diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index c1c30a9f76f34..a8fa5a9e41375 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -640,10 +640,10 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, xdp.data = data + headroom; xdp.data_meta = xdp.data; xdp.data_end = xdp.data + size; - xdp.frame_sz = headroom + max_data_sz + tailroom; rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); - xdp.rxq = &rxqueue->xdp_rxq; + xdp_init_buff(&xdp, headroom + max_data_sz + tailroom, + &rxqueue->xdp_rxq); bpf_prog_change_xdp(NULL, prog); ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); if (ret) diff --git a/net/core/dev.c b/net/core/dev.c index 7afbb642e203a..e6d758a3c2a92 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4606,11 +4606,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, struct netdev_rx_queue *rxqueue; void *orig_data, *orig_data_end; u32 metalen, act = XDP_DROP; + u32 mac_len, frame_sz; __be16 orig_eth_type; struct ethhdr *eth; bool orig_bcast; int hlen, off; - u32 mac_len; /* Reinjected packets coming from act_mirred or similar should * not get XDP generic processing. @@ -4649,8 +4649,8 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, xdp->data_hard_start = skb->data - skb_headroom(skb); /* SKB "head" area always have tailroom for skb_shared_info */ - xdp->frame_sz = (void *)skb_end_pointer(skb) - xdp->data_hard_start; - xdp->frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + frame_sz = (void *)skb_end_pointer(skb) - xdp->data_hard_start; + frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); orig_data_end = xdp->data_end; orig_data = xdp->data; @@ -4659,7 +4659,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, orig_eth_type = eth->h_proto; rxqueue = netif_get_rxqueue(skb); - xdp->rxq = &rxqueue->xdp_rxq; + xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq); act = bpf_prog_run_xdp(xdp_prog, xdp); -- GitLab From be9df4aff65f18caa79b35f88f42c3d5a43af14f Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 22 Dec 2020 22:09:29 +0100 Subject: [PATCH 0405/2012] net, xdp: Introduce xdp_prepare_buff utility routine Introduce xdp_prepare_buff utility routine to initialize per-descriptor xdp_buff fields (e.g. xdp_buff pointers). Rely on xdp_prepare_buff() in all XDP capable drivers. Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Reviewed-by: Alexander Duyck Acked-by: Jesper Dangaard Brouer Acked-by: John Fastabend Acked-by: Shay Agroskin Acked-by: Martin Habets Acked-by: Camelia Groza Acked-by: Marcin Wojtas Link: https://lore.kernel.org/bpf/45f46f12295972a97da8ca01990b3e71501e9d89.1608670965.git.lorenzo@kernel.org Signed-off-by: Alexei Starovoitov --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 7 +++---- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 5 +---- .../net/ethernet/cavium/thunder/nicvf_main.c | 8 ++++---- .../net/ethernet/freescale/dpaa/dpaa_eth.c | 6 ++---- .../net/ethernet/freescale/dpaa2/dpaa2-eth.c | 14 +++++-------- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 12 +++++------ drivers/net/ethernet/intel/ice/ice_txrx.c | 9 +++++---- drivers/net/ethernet/intel/igb/igb_main.c | 12 +++++------ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 12 +++++------ .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 12 +++++------ drivers/net/ethernet/marvell/mvneta.c | 7 ++----- .../net/ethernet/marvell/mvpp2/mvpp2_main.c | 8 +++----- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 6 ++---- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 5 +---- .../ethernet/netronome/nfp/nfp_net_common.c | 8 ++++---- drivers/net/ethernet/qlogic/qede/qede_fp.c | 6 ++---- drivers/net/ethernet/sfc/rx.c | 7 ++----- drivers/net/ethernet/socionext/netsec.c | 6 ++---- drivers/net/ethernet/ti/cpsw.c | 16 +++++---------- drivers/net/ethernet/ti/cpsw_new.c | 16 +++++---------- drivers/net/hyperv/netvsc_bpf.c | 5 +---- drivers/net/tun.c | 5 +---- drivers/net/veth.c | 8 ++------ drivers/net/virtio_net.c | 12 ++++------- drivers/net/xen-netfront.c | 6 ++---- include/net/xdp.h | 12 +++++++++++ net/bpf/test_run.c | 7 ++----- net/core/dev.c | 20 +++++++++---------- 28 files changed, 105 insertions(+), 152 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 43331f6967c9e..1db6cfd2b55c6 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1585,10 +1585,9 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) int ret; rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; - xdp->data = page_address(rx_info->page) + rx_info->page_offset; - xdp_set_data_meta_invalid(xdp); - xdp->data_hard_start = page_address(rx_info->page); - xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len; + xdp_prepare_buff(xdp, page_address(rx_info->page), + rx_info->page_offset, + rx_ring->ena_bufs[0].len, false); /* If for some reason we received a bigger packet than * we expect, then we simply drop it */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index ab805d6750e59..641303894341d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -135,10 +135,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, txr = rxr->bnapi->tx_ring; /* BNXT_RX_PAGE_MODE(bp) when XDP enabled */ xdp_init_buff(&xdp, PAGE_SIZE, &rxr->xdp_rxq); - xdp.data_hard_start = *data_ptr - offset; - xdp.data = *data_ptr; - xdp_set_data_meta_invalid(&xdp); - xdp.data_end = *data_ptr + *len; + xdp_prepare_buff(&xdp, *data_ptr - offset, offset, *len, false); orig_data = xdp.data; rcu_read_lock(); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 9fc672f075f2b..c33b4e8375159 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -530,6 +530,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, struct cqe_rx_t *cqe_rx, struct snd_queue *sq, struct rcv_queue *rq, struct sk_buff **skb) { + unsigned char *hard_start, *data; struct xdp_buff xdp; struct page *page; u32 action; @@ -549,10 +550,9 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, xdp_init_buff(&xdp, RCV_FRAG_LEN + XDP_PACKET_HEADROOM, &rq->xdp_rxq); - xdp.data_hard_start = page_address(page); - xdp.data = (void *)cpu_addr; - xdp_set_data_meta_invalid(&xdp); - xdp.data_end = xdp.data + len; + hard_start = page_address(page); + data = (unsigned char *)cpu_addr; + xdp_prepare_buff(&xdp, hard_start, data - hard_start, len, false); orig_data = xdp.data; rcu_read_lock(); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 26e20b96fd96a..d8e568f6caf30 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2534,10 +2534,8 @@ static u32 dpaa_run_xdp(struct dpaa_priv *priv, struct qm_fd *fd, void *vaddr, xdp_init_buff(&xdp, DPAA_BP_RAW_SIZE - DPAA_TX_PRIV_DATA_SIZE, &dpaa_fq->xdp_rxq); - xdp.data = vaddr + fd_off; - xdp.data_meta = xdp.data; - xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM; - xdp.data_end = xdp.data + qm_fd_get_length(fd); + xdp_prepare_buff(&xdp, vaddr + fd_off - XDP_PACKET_HEADROOM, + XDP_PACKET_HEADROOM, qm_fd_get_length(fd), true); /* We reserve a fixed headroom of 256 bytes under the erratum and we * offer it all to XDP programs to use. If no room is left for the diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 3a7892a66fe54..55b73c1136cac 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -350,7 +350,7 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv, struct bpf_prog *xdp_prog; struct xdp_buff xdp; u32 xdp_act = XDP_PASS; - int err; + int err, offset; rcu_read_lock(); @@ -358,14 +358,10 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv, if (!xdp_prog) goto out; - xdp_init_buff(&xdp, - DPAA2_ETH_RX_BUF_RAW_SIZE - - (dpaa2_fd_get_offset(fd) - XDP_PACKET_HEADROOM), - &ch->xdp_rxq); - xdp.data = vaddr + dpaa2_fd_get_offset(fd); - xdp.data_end = xdp.data + dpaa2_fd_get_len(fd); - xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM; - xdp_set_data_meta_invalid(&xdp); + offset = dpaa2_fd_get_offset(fd) - XDP_PACKET_HEADROOM; + xdp_init_buff(&xdp, DPAA2_ETH_RX_BUF_RAW_SIZE - offset, &ch->xdp_rxq); + xdp_prepare_buff(&xdp, vaddr + offset, XDP_PACKET_HEADROOM, + dpaa2_fd_get_len(fd), false); xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index a87fb8264d0cf..2574e78f75978 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2406,12 +2406,12 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) /* retrieve a buffer from the ring */ if (!skb) { - xdp.data = page_address(rx_buffer->page) + - rx_buffer->page_offset; - xdp.data_meta = xdp.data; - xdp.data_hard_start = xdp.data - - i40e_rx_offset(rx_ring); - xdp.data_end = xdp.data + size; + unsigned int offset = i40e_rx_offset(rx_ring); + unsigned char *hard_start; + + hard_start = page_address(rx_buffer->page) + + rx_buffer->page_offset - offset; + xdp_prepare_buff(&xdp, hard_start, offset, size, true); #if (PAGE_SIZE > 4096) /* At larger PAGE_SIZE, frame_sz depend on len size */ xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 500e93bf6238f..422f53997c026 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1104,8 +1104,10 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) /* start the loop to process Rx packets bounded by 'budget' */ while (likely(total_rx_pkts < (unsigned int)budget)) { + unsigned int offset = ice_rx_offset(rx_ring); union ice_32b_rx_flex_desc *rx_desc; struct ice_rx_buf *rx_buf; + unsigned char *hard_start; struct sk_buff *skb; unsigned int size; u16 stat_err_bits; @@ -1151,10 +1153,9 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) goto construct_skb; } - xdp.data = page_address(rx_buf->page) + rx_buf->page_offset; - xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring); - xdp.data_meta = xdp.data; - xdp.data_end = xdp.data + size; + hard_start = page_address(rx_buf->page) + rx_buf->page_offset - + offset; + xdp_prepare_buff(&xdp, hard_start, offset, size, true); #if (PAGE_SIZE > 4096) /* At larger PAGE_SIZE, frame_sz depend on len size */ xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index cf9e8b7d2c70d..6b5adbd9660b7 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -8715,12 +8715,12 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) /* retrieve a buffer from the ring */ if (!skb) { - xdp.data = page_address(rx_buffer->page) + - rx_buffer->page_offset; - xdp.data_meta = xdp.data; - xdp.data_hard_start = xdp.data - - igb_rx_offset(rx_ring); - xdp.data_end = xdp.data + size; + unsigned int offset = igb_rx_offset(rx_ring); + unsigned char *hard_start; + + hard_start = page_address(rx_buffer->page) + + rx_buffer->page_offset - offset; + xdp_prepare_buff(&xdp, hard_start, offset, size, true); #if (PAGE_SIZE > 4096) /* At larger PAGE_SIZE, frame_sz depend on len size */ xdp.frame_sz = igb_rx_frame_truesize(rx_ring, size); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 7a7c86835a87d..56dca73d158ee 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2335,12 +2335,12 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, /* retrieve a buffer from the ring */ if (!skb) { - xdp.data = page_address(rx_buffer->page) + - rx_buffer->page_offset; - xdp.data_meta = xdp.data; - xdp.data_hard_start = xdp.data - - ixgbe_rx_offset(rx_ring); - xdp.data_end = xdp.data + size; + unsigned int offset = ixgbe_rx_offset(rx_ring); + unsigned char *hard_start; + + hard_start = page_address(rx_buffer->page) + + rx_buffer->page_offset - offset; + xdp_prepare_buff(&xdp, hard_start, offset, size, true); #if (PAGE_SIZE > 4096) /* At larger PAGE_SIZE, frame_sz depend on len size */ xdp.frame_sz = ixgbe_rx_frame_truesize(rx_ring, size); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 624efcd71569a..a534a3fb392ed 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1160,12 +1160,12 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* retrieve a buffer from the ring */ if (!skb) { - xdp.data = page_address(rx_buffer->page) + - rx_buffer->page_offset; - xdp.data_meta = xdp.data; - xdp.data_hard_start = xdp.data - - ixgbevf_rx_offset(rx_ring); - xdp.data_end = xdp.data + size; + unsigned int offset = ixgbevf_rx_offset(rx_ring); + unsigned char *hard_start; + + hard_start = page_address(rx_buffer->page) + + rx_buffer->page_offset - offset; + xdp_prepare_buff(&xdp, hard_start, offset, size, true); #if (PAGE_SIZE > 4096) /* At larger PAGE_SIZE, frame_sz depend on len size */ xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, size); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 038c6b436cba4..6290bfb6494ea 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2263,11 +2263,8 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp, /* Prefetch header */ prefetch(data); - - xdp->data_hard_start = data; - xdp->data = data + pp->rx_offset_correction + MVNETA_MH_SIZE; - xdp->data_end = xdp->data + data_len; - xdp_set_data_meta_invalid(xdp); + xdp_prepare_buff(xdp, data, pp->rx_offset_correction + MVNETA_MH_SIZE, + data_len, false); sinfo = xdp_get_shared_info_from_buff(xdp); sinfo->nr_frags = 0; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 5872cb011ae15..5272f26a3e3ec 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -3565,17 +3565,15 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, if (xdp_prog) { struct xdp_rxq_info *xdp_rxq; - xdp.data_hard_start = data; - xdp.data = data + MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM; - xdp.data_end = xdp.data + rx_bytes; - if (bm_pool->pkt_size == MVPP2_BM_SHORT_PKT_SIZE) xdp_rxq = &rxq->xdp_rxq_short; else xdp_rxq = &rxq->xdp_rxq_long; xdp_init_buff(&xdp, PAGE_SIZE, xdp_rxq); - xdp_set_data_meta_invalid(&xdp); + xdp_prepare_buff(&xdp, data, + MVPP2_MH_SIZE + MVPP2_SKB_HEADROOM, + rx_bytes, false); ret = mvpp2_run_xdp(port, rxq, xdp_prog, &xdp, pp, &ps); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 93da9c2d50c07..e35e4d7ef4d1d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -776,10 +776,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud priv->frag_info[0].frag_size, DMA_FROM_DEVICE); - xdp.data_hard_start = va - frags[0].page_offset; - xdp.data = va; - xdp_set_data_meta_invalid(&xdp); - xdp.data_end = xdp.data + length; + xdp_prepare_buff(&xdp, va - frags[0].page_offset, + frags[0].page_offset, length, false); orig_data = xdp.data; act = bpf_prog_run_xdp(xdp_prog, &xdp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index bc7c81f2b0360..a63ce7c8b98f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1127,10 +1127,7 @@ static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom, u32 len, struct xdp_buff *xdp) { xdp_init_buff(xdp, rq->buff.frame0_sz, &rq->xdp_rxq); - xdp->data_hard_start = va; - xdp->data = va + headroom; - xdp_set_data_meta_invalid(xdp); - xdp->data_end = xdp->data + len; + xdp_prepare_buff(xdp, va, headroom, len, false); } static struct sk_buff * diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 513bc60bcd09c..eeb30680b4dcf 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1914,10 +1914,10 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) unsigned int dma_off; int act; - xdp.data_hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM; - xdp.data = orig_data; - xdp.data_meta = orig_data; - xdp.data_end = orig_data + pkt_len; + xdp_prepare_buff(&xdp, + rxbuf->frag + NFP_NET_RX_BUF_HEADROOM, + pkt_off - NFP_NET_RX_BUF_HEADROOM, + pkt_len, true); act = bpf_prog_run_xdp(xdp_prog, &xdp); diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index 8d0c6d62022a1..70c8d3cd85c0c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1091,10 +1091,8 @@ static bool qede_rx_xdp(struct qede_dev *edev, enum xdp_action act; xdp_init_buff(&xdp, rxq->rx_buf_seg_size, &rxq->xdp_rxq); - xdp.data_hard_start = page_address(bd->data); - xdp.data = xdp.data_hard_start + *data_offset; - xdp_set_data_meta_invalid(&xdp); - xdp.data_end = xdp.data + *len; + xdp_prepare_buff(&xdp, page_address(bd->data), *data_offset, + *len, false); /* Queues always have a full reset currently, so for the time * being until there's atomic program replace just mark read diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index eaa6650955d1a..89c5c75f479f4 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -294,12 +294,9 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel, efx->rx_prefix_size); xdp_init_buff(&xdp, efx->rx_page_buf_step, &rx_queue->xdp_rxq_info); - xdp.data = *ehp; - xdp.data_hard_start = xdp.data - EFX_XDP_HEADROOM; - /* No support yet for XDP metadata */ - xdp_set_data_meta_invalid(&xdp); - xdp.data_end = xdp.data + rx_buf->len; + xdp_prepare_buff(&xdp, *ehp - EFX_XDP_HEADROOM, EFX_XDP_HEADROOM, + rx_buf->len, false); xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp); rcu_read_unlock(); diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 945ca9517bf94..3c53051bdacfa 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1015,10 +1015,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) dma_dir); prefetch(desc->addr); - xdp.data_hard_start = desc->addr; - xdp.data = desc->addr + NETSEC_RXBUF_HEADROOM; - xdp_set_data_meta_invalid(&xdp); - xdp.data_end = xdp.data + pkt_len; + xdp_prepare_buff(&xdp, desc->addr, NETSEC_RXBUF_HEADROOM, + pkt_len, false); if (xdp_prog) { xdp_result = netsec_run_xdp(priv, xdp_prog, &xdp); diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 78a923391828e..5239318e96869 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -392,21 +392,15 @@ static void cpsw_rx_handler(void *token, int len, int status) } if (priv->xdp_prog) { - xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]); + int headroom = CPSW_HEADROOM, size = len; + xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]); if (status & CPDMA_RX_VLAN_ENCAP) { - xdp.data = pa + CPSW_HEADROOM + - CPSW_RX_VLAN_ENCAP_HDR_SIZE; - xdp.data_end = xdp.data + len - - CPSW_RX_VLAN_ENCAP_HDR_SIZE; - } else { - xdp.data = pa + CPSW_HEADROOM; - xdp.data_end = xdp.data + len; + headroom += CPSW_RX_VLAN_ENCAP_HDR_SIZE; + size -= CPSW_RX_VLAN_ENCAP_HDR_SIZE; } - xdp_set_data_meta_invalid(&xdp); - - xdp.data_hard_start = pa; + xdp_prepare_buff(&xdp, pa, headroom, size, false); port = priv->emac_port + cpsw->data.dual_emac; ret = cpsw_run_xdp(priv, ch, &xdp, page, port); diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 1b3385ec9645f..94747f82c60ba 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -335,21 +335,15 @@ static void cpsw_rx_handler(void *token, int len, int status) } if (priv->xdp_prog) { - xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]); + int headroom = CPSW_HEADROOM, size = len; + xdp_init_buff(&xdp, PAGE_SIZE, &priv->xdp_rxq[ch]); if (status & CPDMA_RX_VLAN_ENCAP) { - xdp.data = pa + CPSW_HEADROOM + - CPSW_RX_VLAN_ENCAP_HDR_SIZE; - xdp.data_end = xdp.data + len - - CPSW_RX_VLAN_ENCAP_HDR_SIZE; - } else { - xdp.data = pa + CPSW_HEADROOM; - xdp.data_end = xdp.data + len; + headroom += CPSW_RX_VLAN_ENCAP_HDR_SIZE; + size -= CPSW_RX_VLAN_ENCAP_HDR_SIZE; } - xdp_set_data_meta_invalid(&xdp); - - xdp.data_hard_start = pa; + xdp_prepare_buff(&xdp, pa, headroom, size, false); ret = cpsw_run_xdp(priv, ch, &xdp, page, priv->emac_port); if (ret != CPSW_XDP_PASS) diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c index 14a7ee4c68998..d60dcf6c9829e 100644 --- a/drivers/net/hyperv/netvsc_bpf.c +++ b/drivers/net/hyperv/netvsc_bpf.c @@ -45,10 +45,7 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan, } xdp_init_buff(xdp, PAGE_SIZE, &nvchan->xdp_rxq); - xdp->data_hard_start = page_address(page); - xdp->data = xdp->data_hard_start + NETVSC_XDP_HDRM; - xdp_set_data_meta_invalid(xdp); - xdp->data_end = xdp->data + len; + xdp_prepare_buff(xdp, page_address(page), NETVSC_XDP_HDRM, len, false); memcpy(xdp->data, data, len); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3b7728433a863..702215596889b 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1600,10 +1600,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, u32 act; xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq); - xdp.data_hard_start = buf; - xdp.data = buf + pad; - xdp_set_data_meta_invalid(&xdp); - xdp.data_end = xdp.data + len; + xdp_prepare_buff(&xdp, buf, pad, len, false); act = bpf_prog_run_xdp(xdp_prog, &xdp); if (act == XDP_REDIRECT || act == XDP_TX) { diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 25f3601fb6dd5..99caae7d16413 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -710,15 +710,11 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, skb = nskb; } - xdp.data_hard_start = skb->head; - xdp.data = skb_mac_header(skb); - xdp.data_end = xdp.data + pktlen; - xdp.data_meta = xdp.data; - /* SKB "head" area always have tailroom for skb_shared_info */ - frame_sz = (void *)skb_end_pointer(skb) - xdp.data_hard_start; + frame_sz = skb_end_pointer(skb) - skb->head; frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); xdp_init_buff(&xdp, frame_sz, &rq->xdp_rxq); + xdp_prepare_buff(&xdp, skb->head, skb->mac_header, pktlen, true); orig_data = xdp.data; orig_data_end = xdp.data_end; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 36b0f81bcd7aa..ba8e637925497 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -690,10 +690,8 @@ static struct sk_buff *receive_small(struct net_device *dev, } xdp_init_buff(&xdp, buflen, &rq->xdp_rxq); - xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; - xdp.data = xdp.data_hard_start + xdp_headroom; - xdp.data_end = xdp.data + len; - xdp.data_meta = xdp.data; + xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len, + xdp_headroom, len, true); orig_data = xdp.data; act = bpf_prog_run_xdp(xdp_prog, &xdp); stats->xdp_packets++; @@ -859,10 +857,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, */ data = page_address(xdp_page) + offset; xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq); - xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; - xdp.data = data + vi->hdr_len; - xdp.data_end = xdp.data + (len - vi->hdr_len); - xdp.data_meta = xdp.data; + xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + vi->hdr_len, + VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true); act = bpf_prog_run_xdp(xdp_prog, &xdp); stats->xdp_packets++; diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 329397c60d846..c20b78120bb42 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -866,10 +866,8 @@ static u32 xennet_run_xdp(struct netfront_queue *queue, struct page *pdata, xdp_init_buff(xdp, XEN_PAGE_SIZE - XDP_PACKET_HEADROOM, &queue->xdp_rxq); - xdp->data_hard_start = page_address(pdata); - xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM; - xdp_set_data_meta_invalid(xdp); - xdp->data_end = xdp->data + len; + xdp_prepare_buff(xdp, page_address(pdata), XDP_PACKET_HEADROOM, + len, false); act = bpf_prog_run_xdp(prog, xdp); switch (act) { diff --git a/include/net/xdp.h b/include/net/xdp.h index 8a589f7e08e04..0cf3976ce77cf 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -83,6 +83,18 @@ xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) xdp->rxq = rxq; } +static __always_inline void +xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, + int headroom, int data_len, const bool meta_valid) +{ + unsigned char *data = hard_start + headroom; + + xdp->data_hard_start = hard_start; + xdp->data = data; + xdp->data_end = data + data_len; + xdp->data_meta = meta_valid ? data : data + 1; +} + /* Reserve memory area at end-of data area. * * This macro reserves tailroom in the XDP buffer by limiting the diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index a8fa5a9e41375..23dfb2010ba69 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -636,14 +636,11 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, if (IS_ERR(data)) return PTR_ERR(data); - xdp.data_hard_start = data; - xdp.data = data + headroom; - xdp.data_meta = xdp.data; - xdp.data_end = xdp.data + size; - rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); xdp_init_buff(&xdp, headroom + max_data_sz + tailroom, &rxqueue->xdp_rxq); + xdp_prepare_buff(&xdp, data, headroom, size, true); + bpf_prog_change_xdp(NULL, prog); ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); if (ret) diff --git a/net/core/dev.c b/net/core/dev.c index e6d758a3c2a92..55499b017a425 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4603,14 +4603,14 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { + void *orig_data, *orig_data_end, *hard_start; struct netdev_rx_queue *rxqueue; - void *orig_data, *orig_data_end; u32 metalen, act = XDP_DROP; u32 mac_len, frame_sz; __be16 orig_eth_type; struct ethhdr *eth; bool orig_bcast; - int hlen, off; + int off; /* Reinjected packets coming from act_mirred or similar should * not get XDP generic processing. @@ -4642,25 +4642,23 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, * header. */ mac_len = skb->data - skb_mac_header(skb); - hlen = skb_headlen(skb) + mac_len; - xdp->data = skb->data - mac_len; - xdp->data_meta = xdp->data; - xdp->data_end = xdp->data + hlen; - xdp->data_hard_start = skb->data - skb_headroom(skb); + hard_start = skb->data - skb_headroom(skb); /* SKB "head" area always have tailroom for skb_shared_info */ - frame_sz = (void *)skb_end_pointer(skb) - xdp->data_hard_start; + frame_sz = (void *)skb_end_pointer(skb) - hard_start; frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + rxqueue = netif_get_rxqueue(skb); + xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq); + xdp_prepare_buff(xdp, hard_start, skb_headroom(skb) - mac_len, + skb_headlen(skb) + mac_len, true); + orig_data_end = xdp->data_end; orig_data = xdp->data; eth = (struct ethhdr *)xdp->data; orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest); orig_eth_type = eth->h_proto; - rxqueue = netif_get_rxqueue(skb); - xdp_init_buff(xdp, frame_sz, &rxqueue->xdp_rxq); - act = bpf_prog_run_xdp(xdp_prog, xdp); /* check if bpf_xdp_adjust_head was used */ -- GitLab From 9a8120a8d7eb872da43e61d598c226f0d6b7ce5f Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 5 Jan 2021 07:20:47 -0800 Subject: [PATCH 0406/2012] selftests/bpf: Remove duplicate include in test_lsm 'unistd.h' included in 'selftests/bpf/prog_tests/test_lsm.c' is duplicated. Signed-off-by: Menglong Dong Signed-off-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210105152047.6070-1-dong.menglong@zte.com.cn Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/test_lsm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c index 6ab29226c99b6..2755e4f814999 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c +++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c @@ -10,7 +10,6 @@ #include #include #include -#include #include "lsm.skel.h" -- GitLab From e8deee4f1543eda9b75278f63322f412cad52f6a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 8 Jan 2021 13:46:55 -0800 Subject: [PATCH 0407/2012] ARC: [hsdk]: Enable FPU_SAVE_RESTORE HSDK has hardware floating point and the common use case is with glibc+hf so enable that as default. Signed-off-by: Vineet Gupta --- arch/arc/plat-hsdk/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig index 6b5c54576f54d..a2d10c29fbcc0 100644 --- a/arch/arc/plat-hsdk/Kconfig +++ b/arch/arc/plat-hsdk/Kconfig @@ -7,6 +7,7 @@ menuconfig ARC_SOC_HSDK depends on ISA_ARCV2 select ARC_HAS_ACCL_REGS select ARC_IRQ_NO_AUTOSAVE + select ARC_FPU_SAVE_RESTORE select CLK_HSDK select RESET_CONTROLLER select RESET_HSDK -- GitLab From e22d7f05e445165e58feddb4e40cc9c0f94453bc Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 8 Jan 2021 11:44:08 -0800 Subject: [PATCH 0408/2012] libbpf: Clarify kernel type use with USER variants of CORE reading macros Add comments clarifying that USER variants of CO-RE reading macro are still only going to work with kernel types, defined in kernel or kernel module BTF. This should help preventing invalid use of those macro to read user-defined types (which doesn't work with CO-RE). Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210108194408.3468860-1-andrii@kernel.org --- tools/lib/bpf/bpf_core_read.h | 45 ++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 9456aabcb03a2..53b3e199fb254 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -197,6 +197,7 @@ enum bpf_enum_value_kind { #define bpf_core_read(dst, sz, src) \ bpf_probe_read_kernel(dst, sz, (const void *)__builtin_preserve_access_index(src)) +/* NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */ #define bpf_core_read_user(dst, sz, src) \ bpf_probe_read_user(dst, sz, (const void *)__builtin_preserve_access_index(src)) /* @@ -207,6 +208,7 @@ enum bpf_enum_value_kind { #define bpf_core_read_str(dst, sz, src) \ bpf_probe_read_kernel_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) +/* NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */ #define bpf_core_read_user_str(dst, sz, src) \ bpf_probe_read_user_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) @@ -302,7 +304,11 @@ enum bpf_enum_value_kind { dst, (src), a, ##__VA_ARGS__) \ }) -/* Variant of BPF_CORE_READ_INTO() for reading from user-space memory */ +/* + * Variant of BPF_CORE_READ_INTO() for reading from user-space memory. + * + * NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. + */ #define BPF_CORE_READ_USER_INTO(dst, src, a, ...) ({ \ ___core_read(bpf_core_read_user, bpf_core_read_user, \ dst, (src), a, ##__VA_ARGS__) \ @@ -314,7 +320,11 @@ enum bpf_enum_value_kind { dst, (src), a, ##__VA_ARGS__) \ }) -/* Non-CO-RE variant of BPF_CORE_READ_USER_INTO() */ +/* Non-CO-RE variant of BPF_CORE_READ_USER_INTO(). + * + * As no CO-RE relocations are emitted, source types can be arbitrary and are + * not restricted to kernel types only. + */ #define BPF_PROBE_READ_USER_INTO(dst, src, a, ...) ({ \ ___core_read(bpf_probe_read_user, bpf_probe_read_user, \ dst, (src), a, ##__VA_ARGS__) \ @@ -330,7 +340,11 @@ enum bpf_enum_value_kind { dst, (src), a, ##__VA_ARGS__) \ }) -/* Variant of BPF_CORE_READ_STR_INTO() for reading from user-space memory */ +/* + * Variant of BPF_CORE_READ_STR_INTO() for reading from user-space memory. + * + * NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. + */ #define BPF_CORE_READ_USER_STR_INTO(dst, src, a, ...) ({ \ ___core_read(bpf_core_read_user_str, bpf_core_read_user, \ dst, (src), a, ##__VA_ARGS__) \ @@ -342,7 +356,12 @@ enum bpf_enum_value_kind { dst, (src), a, ##__VA_ARGS__) \ }) -/* Non-CO-RE variant of BPF_CORE_READ_USER_STR_INTO() */ +/* + * Non-CO-RE variant of BPF_CORE_READ_USER_STR_INTO(). + * + * As no CO-RE relocations are emitted, source types can be arbitrary and are + * not restricted to kernel types only. + */ #define BPF_PROBE_READ_USER_STR_INTO(dst, src, a, ...) ({ \ ___core_read(bpf_probe_read_user_str, bpf_probe_read_user, \ dst, (src), a, ##__VA_ARGS__) \ @@ -378,7 +397,16 @@ enum bpf_enum_value_kind { __r; \ }) -/* Variant of BPF_CORE_READ() for reading from user-space memory */ +/* + * Variant of BPF_CORE_READ() for reading from user-space memory. + * + * NOTE: all the source types involved are still *kernel types* and need to + * exist in kernel (or kernel module) BTF, otherwise CO-RE relocation will + * fail. Custom user types are not relocatable with CO-RE. + * The typical situation in which BPF_CORE_READ_USER() might be used is to + * read kernel UAPI types from the user-space memory passed in as a syscall + * input argument. + */ #define BPF_CORE_READ_USER(src, a, ...) ({ \ ___type((src), a, ##__VA_ARGS__) __r; \ BPF_CORE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \ @@ -392,7 +420,12 @@ enum bpf_enum_value_kind { __r; \ }) -/* Non-CO-RE variant of BPF_CORE_READ_USER() */ +/* + * Non-CO-RE variant of BPF_CORE_READ_USER(). + * + * As no CO-RE relocations are emitted, source types can be arbitrary and are + * not restricted to kernel types only. + */ #define BPF_PROBE_READ_USER(src, a, ...) ({ \ ___type((src), a, ##__VA_ARGS__) __r; \ BPF_PROBE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \ -- GitLab From b2345a8a4342cf83316a2198fa915c7c99b7d6c7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jan 2021 08:52:15 +0100 Subject: [PATCH 0409/2012] ALSA: usb-audio: Fix the missing endpoints creations for quirks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recent change in the endpoint management moved the endpoint object creation from the stream open time to the parser of the audio descriptor. It works fine for the standard audio, but it overlooked the other places that create audio streams via quirks (QUIRK_AUDIO_FIXED_ENDPOINT) like the reported a few Pioneer devices; those call snd_usb_add_audio_stream() manually, hence they miss the endpoints, eventually resulting in the error at opening streams. Moreover, now the sync EP setup was moved to the explicit call of snd_usb_audioformat_set_sync_ep(), and this needs to be added for those places, too. This patch addresses those regressions for quirks. It adds a local helper function add_audio_stream_from_fixed_fmt(), which does the all needed tasks, and replaces the calls of snd_usb_add_audio_stream() with this new function. Fixes: 54cb31901b83 ("ALSA: usb-audio: Create endpoint objects at parsing phase") Reported-by: František Kučera Link: https://lore.kernel.org/r/20210108075219.21463-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 54 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index e4a690bb4c996..b70e2ebc3e29e 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -120,6 +120,40 @@ static int create_standard_audio_quirk(struct snd_usb_audio *chip, return 0; } +/* create the audio stream and the corresponding endpoints from the fixed + * audioformat object; this is used for quirks with the fixed EPs + */ +static int add_audio_stream_from_fixed_fmt(struct snd_usb_audio *chip, + struct audioformat *fp) +{ + int stream, err; + + stream = (fp->endpoint & USB_DIR_IN) ? + SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; + + snd_usb_audioformat_set_sync_ep(chip, fp); + + err = snd_usb_add_audio_stream(chip, stream, fp); + if (err < 0) + return err; + + err = snd_usb_add_endpoint(chip, fp->endpoint, + SND_USB_ENDPOINT_TYPE_DATA); + if (err < 0) + return err; + + if (fp->sync_ep) { + err = snd_usb_add_endpoint(chip, fp->sync_ep, + fp->implicit_fb ? + SND_USB_ENDPOINT_TYPE_DATA : + SND_USB_ENDPOINT_TYPE_SYNC); + if (err < 0) + return err; + } + + return 0; +} + /* * create a stream for an endpoint/altsetting without proper descriptors */ @@ -131,8 +165,8 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, struct audioformat *fp; struct usb_host_interface *alts; struct usb_interface_descriptor *altsd; - int stream, err; unsigned *rate_table = NULL; + int err; fp = kmemdup(quirk->data, sizeof(*fp), GFP_KERNEL); if (!fp) @@ -153,11 +187,6 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, fp->rate_table = rate_table; } - stream = (fp->endpoint & USB_DIR_IN) - ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; - err = snd_usb_add_audio_stream(chip, stream, fp); - if (err < 0) - goto error; if (fp->iface != get_iface_desc(&iface->altsetting[0])->bInterfaceNumber || fp->altset_idx >= iface->num_altsetting) { err = -EINVAL; @@ -176,6 +205,13 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, fp->datainterval = snd_usb_parse_datainterval(chip, alts); if (fp->maxpacksize == 0) fp->maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize); + if (!fp->fmt_type) + fp->fmt_type = UAC_FORMAT_TYPE_I; + + err = add_audio_stream_from_fixed_fmt(chip, fp); + if (err < 0) + goto error; + usb_set_interface(chip->dev, fp->iface, 0); snd_usb_init_pitch(chip, fp); snd_usb_init_sample_rate(chip, fp, fp->rate_max); @@ -417,7 +453,7 @@ static int create_uaxx_quirk(struct snd_usb_audio *chip, struct usb_host_interface *alts; struct usb_interface_descriptor *altsd; struct audioformat *fp; - int stream, err; + int err; /* both PCM and MIDI interfaces have 2 or more altsettings */ if (iface->num_altsetting < 2) @@ -482,9 +518,7 @@ static int create_uaxx_quirk(struct snd_usb_audio *chip, return -ENXIO; } - stream = (fp->endpoint & USB_DIR_IN) - ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK; - err = snd_usb_add_audio_stream(chip, stream, fp); + err = add_audio_stream_from_fixed_fmt(chip, fp); if (err < 0) { list_del(&fp->list); /* unlink for avoiding double-free */ kfree(fp); -- GitLab From 5d15f1eb456025cf47078fdbc230d7a9f1ee4cef Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jan 2021 08:52:16 +0100 Subject: [PATCH 0410/2012] ALSA: usb-audio: Choose audioformat of a counter-part substream The implicit feedback mode needs to handle two endpoints and the choice of the audioformat object for the sync EP is important since this determines the compatibility of the hw_params. The current code uses the same audioformat object if both the main EP and the sync EP point to the same iface/altsetting. This was done in consideration of the non-implicit-fb sync EP handling, and it doesn't match well with the cases where actually to endpoints are defined in the sameiface / altsetting like a few Pioneer devices. Modify snd_usb_find_implicit_fb_sync_format() to pick up the audioformat that is assigned in the counter-part substreams primarily, so that the actual capture stream can be opened properly. We keep the same audioformat object only as a fallback in case nothing found, though. Fixes: 9fddc15e8039 ("ALSA: usb-audio: Factor out the implicit feedback quirk code") Link: https://lore.kernel.org/r/20210108075219.21463-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/implicit.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 931042a6a051e..9724efe1cdce8 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -378,20 +378,19 @@ snd_usb_find_implicit_fb_sync_format(struct snd_usb_audio *chip, int stream) { struct snd_usb_substream *subs; - const struct audioformat *fp, *sync_fmt; + const struct audioformat *fp, *sync_fmt = NULL; int score, high_score; - /* When sharing the same altset, use the original audioformat */ + /* Use the original audioformat as fallback for the shared altset */ if (target->iface == target->sync_iface && target->altsetting == target->sync_altsetting) - return target; + sync_fmt = target; subs = find_matching_substream(chip, stream, target->sync_ep, target->fmt_type); if (!subs) - return NULL; + return sync_fmt; - sync_fmt = NULL; high_score = 0; list_for_each_entry(fp, &subs->fmt_list, list) { score = match_endpoint_audioformats(subs, fp, -- GitLab From 00272c61827e37bb64c47499843d8c0d8ee136a5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jan 2021 08:52:17 +0100 Subject: [PATCH 0411/2012] ALSA: usb-audio: Avoid unnecessary interface re-setup The current endpoint handling assumed (more or less) a unique 1:1 relation between the endpoint and the iface/altset. The exception was the sync EP without the implicit feedback which has usually the secondary EP of the same altset. This works fine for most devices, but it turned out that some unusual devices like Pinoeer's ones have both playback and capture endpoints in the same iface/altsetting and use both for the implicit feedback mode. For handling such a case, we need to extend the endpoint management to take the shared interface into account. This patch does that: it adds a new object snd_usb_iface_ref for managing the reference counts of the each USB interface that is used by each endpoint. The interface setup is performed only once for the (sharing) endpoints, and the doubly initialization is avoided. Along with this, the resource release of endpoints and interface refcounts are put into a single function, snd_usb_endpoint_free_all() instead of looping in the caller side. Fixes: bf6313a0ff76 ("ALSA: usb-audio: Refactor endpoint management") Link: https://lore.kernel.org/r/20210108075219.21463-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/card.c | 5 ++- sound/usb/card.h | 2 ++ sound/usb/endpoint.c | 82 ++++++++++++++++++++++++++++++++++++++------ sound/usb/endpoint.h | 2 +- sound/usb/usbaudio.h | 1 + 5 files changed, 77 insertions(+), 15 deletions(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index d731ca62d5994..e08fbf8e3ee0f 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -450,10 +450,8 @@ lookup_device_name(u32 id) static void snd_usb_audio_free(struct snd_card *card) { struct snd_usb_audio *chip = card->private_data; - struct snd_usb_endpoint *ep, *n; - list_for_each_entry_safe(ep, n, &chip->ep_list, list) - snd_usb_endpoint_free(ep); + snd_usb_endpoint_free_all(chip); mutex_destroy(&chip->mutex); if (!atomic_read(&chip->shutdown)) @@ -611,6 +609,7 @@ static int snd_usb_audio_create(struct usb_interface *intf, chip->usb_id = usb_id; INIT_LIST_HEAD(&chip->pcm_list); INIT_LIST_HEAD(&chip->ep_list); + INIT_LIST_HEAD(&chip->iface_ref_list); INIT_LIST_HEAD(&chip->midi_list); INIT_LIST_HEAD(&chip->mixer_list); diff --git a/sound/usb/card.h b/sound/usb/card.h index 6a027c349194a..de0d2aa883fa2 100644 --- a/sound/usb/card.h +++ b/sound/usb/card.h @@ -42,6 +42,7 @@ struct audioformat { }; struct snd_usb_substream; +struct snd_usb_iface_ref; struct snd_usb_endpoint; struct snd_usb_power_domain; @@ -58,6 +59,7 @@ struct snd_urb_ctx { struct snd_usb_endpoint { struct snd_usb_audio *chip; + struct snd_usb_iface_ref *iface_ref; int opened; /* open refcount; protect with chip->mutex */ atomic_t running; /* running status */ diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 162da7a500463..ae6276aded91f 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -24,6 +24,14 @@ #define EP_FLAG_RUNNING 1 #define EP_FLAG_STOPPING 2 +/* interface refcounting */ +struct snd_usb_iface_ref { + unsigned char iface; + bool need_setup; + int opened; + struct list_head list; +}; + /* * snd_usb_endpoint is a model that abstracts everything related to an * USB endpoint and its streaming. @@ -488,6 +496,28 @@ exit_clear: clear_bit(ctx->index, &ep->active_mask); } +/* + * Find or create a refcount object for the given interface + * + * The objects are released altogether in snd_usb_endpoint_free_all() + */ +static struct snd_usb_iface_ref * +iface_ref_find(struct snd_usb_audio *chip, int iface) +{ + struct snd_usb_iface_ref *ip; + + list_for_each_entry(ip, &chip->iface_ref_list, list) + if (ip->iface == iface) + return ip; + + ip = kzalloc(sizeof(*ip), GFP_KERNEL); + if (!ip) + return NULL; + ip->iface = iface; + list_add_tail(&ip->list, &chip->iface_ref_list); + return ip; +} + /* * Get the existing endpoint object corresponding EP * Returns NULL if not present. @@ -520,8 +550,8 @@ snd_usb_get_endpoint(struct snd_usb_audio *chip, int ep_num) * * Returns zero on success or a negative error code. * - * New endpoints will be added to chip->ep_list and must be freed by - * calling snd_usb_endpoint_free(). + * New endpoints will be added to chip->ep_list and freed by + * calling snd_usb_endpoint_free_all(). * * For SND_USB_ENDPOINT_TYPE_SYNC, the caller needs to guarantee that * bNumEndpoints > 1 beforehand. @@ -658,6 +688,12 @@ snd_usb_endpoint_open(struct snd_usb_audio *chip, usb_audio_dbg(chip, "Open EP 0x%x, iface=%d:%d, idx=%d\n", ep_num, ep->iface, ep->altsetting, ep->ep_idx); + ep->iface_ref = iface_ref_find(chip, ep->iface); + if (!ep->iface_ref) { + ep = NULL; + goto unlock; + } + ep->cur_audiofmt = fp; ep->cur_channels = fp->channels; ep->cur_rate = params_rate(params); @@ -681,6 +717,11 @@ snd_usb_endpoint_open(struct snd_usb_audio *chip, ep->implicit_fb_sync); } else { + if (WARN_ON(!ep->iface_ref)) { + ep = NULL; + goto unlock; + } + if (!endpoint_compatible(ep, fp, params)) { usb_audio_err(chip, "Incompatible EP setup for 0x%x\n", ep_num); @@ -692,6 +733,9 @@ snd_usb_endpoint_open(struct snd_usb_audio *chip, ep_num, ep->opened); } + if (!ep->iface_ref->opened++) + ep->iface_ref->need_setup = true; + ep->opened++; unlock: @@ -760,12 +804,16 @@ void snd_usb_endpoint_close(struct snd_usb_audio *chip, mutex_lock(&chip->mutex); usb_audio_dbg(chip, "Closing EP 0x%x (count %d)\n", ep->ep_num, ep->opened); - if (!--ep->opened) { + + if (!--ep->iface_ref->opened) endpoint_set_interface(chip, ep, false); + + if (!--ep->opened) { ep->iface = 0; ep->altsetting = 0; ep->cur_audiofmt = NULL; ep->cur_rate = 0; + ep->iface_ref = NULL; usb_audio_dbg(chip, "EP 0x%x closed\n", ep->ep_num); } mutex_unlock(&chip->mutex); @@ -775,6 +823,8 @@ void snd_usb_endpoint_close(struct snd_usb_audio *chip, void snd_usb_endpoint_suspend(struct snd_usb_endpoint *ep) { ep->need_setup = true; + if (ep->iface_ref) + ep->iface_ref->need_setup = true; } /* @@ -1195,11 +1245,13 @@ int snd_usb_endpoint_configure(struct snd_usb_audio *chip, int err = 0; mutex_lock(&chip->mutex); + if (WARN_ON(!ep->iface_ref)) + goto unlock; if (!ep->need_setup) goto unlock; - /* No need to (re-)configure the sync EP belonging to the same altset */ - if (ep->ep_idx) { + /* If the interface has been already set up, just set EP parameters */ + if (!ep->iface_ref->need_setup) { err = snd_usb_endpoint_set_params(chip, ep); if (err < 0) goto unlock; @@ -1242,6 +1294,8 @@ int snd_usb_endpoint_configure(struct snd_usb_audio *chip, goto unlock; } + ep->iface_ref->need_setup = false; + done: ep->need_setup = false; err = 1; @@ -1387,15 +1441,21 @@ void snd_usb_endpoint_release(struct snd_usb_endpoint *ep) } /** - * snd_usb_endpoint_free: Free the resources of an snd_usb_endpoint + * snd_usb_endpoint_free_all: Free the resources of an snd_usb_endpoint + * @card: The chip * - * @ep: the endpoint to free - * - * This free all resources of the given ep. + * This free all endpoints and those resources */ -void snd_usb_endpoint_free(struct snd_usb_endpoint *ep) +void snd_usb_endpoint_free_all(struct snd_usb_audio *chip) { - kfree(ep); + struct snd_usb_endpoint *ep, *en; + struct snd_usb_iface_ref *ip, *in; + + list_for_each_entry_safe(ep, en, &chip->ep_list, list) + kfree(ep); + + list_for_each_entry_safe(ip, in, &chip->iface_ref_list, list) + kfree(ip); } /* diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h index 11e3bb839fd7e..eea4ca49876d6 100644 --- a/sound/usb/endpoint.h +++ b/sound/usb/endpoint.h @@ -42,7 +42,7 @@ void snd_usb_endpoint_sync_pending_stop(struct snd_usb_endpoint *ep); void snd_usb_endpoint_suspend(struct snd_usb_endpoint *ep); int snd_usb_endpoint_activate(struct snd_usb_endpoint *ep); void snd_usb_endpoint_release(struct snd_usb_endpoint *ep); -void snd_usb_endpoint_free(struct snd_usb_endpoint *ep); +void snd_usb_endpoint_free_all(struct snd_usb_audio *chip); int snd_usb_endpoint_implicit_feedback_sink(struct snd_usb_endpoint *ep); int snd_usb_endpoint_next_packet_size(struct snd_usb_endpoint *ep, diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 980287aadd361..215c1771dd570 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -44,6 +44,7 @@ struct snd_usb_audio { struct list_head pcm_list; /* list of pcm streams */ struct list_head ep_list; /* list of audio-related endpoints */ + struct list_head iface_ref_list; /* list of interface refcounts */ int pcm_devs; struct list_head midi_list; /* list of midi interfaces */ -- GitLab From eae4d054f909d9e9589d0940f9b5b0cd68de1e2e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jan 2021 08:52:18 +0100 Subject: [PATCH 0412/2012] ALSA: usb-audio: Annotate the endpoint index in audioformat There are devices that have multiple endpoints sharing the same iface/altset not only for sync but also for the actual streams, and the audioformat for such an endpoint needs to be handled with the proper endpoint index; otherwise it confuses the endpoint management. This patch extends the audioformat to annotate the endpoint index, and put the proper ep_idx=1 to Pioneer device quirk entries accordingly. Fixes: bf6313a0ff76 ("ALSA: usb-audio: Refactor endpoint management") Link: https://lore.kernel.org/r/20210108075219.21463-5-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/card.h | 1 + sound/usb/endpoint.c | 2 +- sound/usb/quirks-table.h | 6 ++++++ sound/usb/quirks.c | 4 ++-- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/sound/usb/card.h b/sound/usb/card.h index de0d2aa883fa2..37091b1176143 100644 --- a/sound/usb/card.h +++ b/sound/usb/card.h @@ -18,6 +18,7 @@ struct audioformat { unsigned int frame_size; /* samples per frame for non-audio */ unsigned char iface; /* interface number */ unsigned char altsetting; /* corresponding alternate setting */ + unsigned char ep_idx; /* endpoint array index */ unsigned char altset_idx; /* array index of altenate setting */ unsigned char attributes; /* corresponding attributes of cs endpoint */ unsigned char endpoint; /* endpoint */ diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index ae6276aded91f..fe73fe3ff2bca 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -683,7 +683,7 @@ snd_usb_endpoint_open(struct snd_usb_audio *chip, } else { ep->iface = fp->iface; ep->altsetting = fp->altsetting; - ep->ep_idx = 0; + ep->ep_idx = fp->ep_idx; } usb_audio_dbg(chip, "Open EP 0x%x, iface=%d:%d, idx=%d\n", ep_num, ep->iface, ep->altsetting, ep->ep_idx); diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 0e11cb96fa8cf..c8a4bdf18207c 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3362,6 +3362,7 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), .altsetting = 1, .altset_idx = 1, .endpoint = 0x86, + .ep_idx = 1, .ep_attr = USB_ENDPOINT_XFER_ISOC| USB_ENDPOINT_SYNC_ASYNC| USB_ENDPOINT_USAGE_IMPLICIT_FB, @@ -3450,6 +3451,7 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), .altsetting = 1, .altset_idx = 1, .endpoint = 0x82, + .ep_idx = 1, .ep_attr = USB_ENDPOINT_XFER_ISOC| USB_ENDPOINT_SYNC_ASYNC| USB_ENDPOINT_USAGE_IMPLICIT_FB, @@ -3506,6 +3508,7 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), .altsetting = 1, .altset_idx = 1, .endpoint = 0x82, + .ep_idx = 1, .ep_attr = USB_ENDPOINT_XFER_ISOC| USB_ENDPOINT_SYNC_ASYNC| USB_ENDPOINT_USAGE_IMPLICIT_FB, @@ -3562,6 +3565,7 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), .altsetting = 1, .altset_idx = 1, .endpoint = 0x82, + .ep_idx = 1, .ep_attr = USB_ENDPOINT_XFER_ISOC| USB_ENDPOINT_SYNC_ASYNC| USB_ENDPOINT_USAGE_IMPLICIT_FB, @@ -3619,6 +3623,7 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), .altsetting = 1, .altset_idx = 1, .endpoint = 0x82, + .ep_idx = 1, .ep_attr = USB_ENDPOINT_XFER_ISOC| USB_ENDPOINT_SYNC_ASYNC| USB_ENDPOINT_USAGE_IMPLICIT_FB, @@ -3679,6 +3684,7 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), .altsetting = 1, .altset_idx = 1, .endpoint = 0x82, + .ep_idx = 1, .ep_attr = USB_ENDPOINT_XFER_ISOC| USB_ENDPOINT_SYNC_ASYNC| USB_ENDPOINT_USAGE_IMPLICIT_FB, diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index b70e2ebc3e29e..89e172642d98b 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -194,7 +194,7 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, } alts = &iface->altsetting[fp->altset_idx]; altsd = get_iface_desc(alts); - if (altsd->bNumEndpoints < 1) { + if (altsd->bNumEndpoints <= fp->ep_idx) { err = -EINVAL; goto error; } @@ -204,7 +204,7 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, if (fp->datainterval == 0) fp->datainterval = snd_usb_parse_datainterval(chip, alts); if (fp->maxpacksize == 0) - fp->maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize); + fp->maxpacksize = le16_to_cpu(get_endpoint(alts, fp->ep_idx)->wMaxPacketSize); if (!fp->fmt_type) fp->fmt_type = UAC_FORMAT_TYPE_I; -- GitLab From 167c9dc84ec384c0940359e067301883ad2b42a8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jan 2021 08:52:19 +0100 Subject: [PATCH 0413/2012] ALSA: usb-audio: Fix implicit feedback sync setup for Pioneer devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pioneer devices have both playback and capture streams sharing the same iface/altsetting, and those need to be paired as implicit feedback. Instead of a half-baked (and broken) static quirk entry, set up more generically for those devices by checking the number of endpoints and the attribute of the secondary EP. Fixes: bf6313a0ff76 ("ALSA: usb-audio: Refactor endpoint management") Reported-by: František Kučera Link: https://lore.kernel.org/r/20210108075219.21463-6-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/implicit.c | 48 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 9724efe1cdce8..1ac2cc6c33fb1 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -58,8 +58,6 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { IMPLICIT_FB_FIXED_DEV(0x0499, 0x172f, 0x81, 2), /* Steinberg UR22C */ IMPLICIT_FB_FIXED_DEV(0x0d9a, 0x00df, 0x81, 2), /* RTX6001 */ IMPLICIT_FB_FIXED_DEV(0x22f0, 0x0006, 0x81, 3), /* Allen&Heath Qu-16 */ - IMPLICIT_FB_FIXED_DEV(0x2b73, 0x000a, 0x82, 0), /* Pioneer DJ DJM-900NXS2 */ - IMPLICIT_FB_FIXED_DEV(0x2b73, 0x0017, 0x82, 0), /* Pioneer DJ DJM-250MK2 */ IMPLICIT_FB_FIXED_DEV(0x1686, 0xf029, 0x82, 2), /* Zoom UAC-2 */ IMPLICIT_FB_FIXED_DEV(0x2466, 0x8003, 0x86, 2), /* Fractal Audio Axe-Fx II */ IMPLICIT_FB_FIXED_DEV(0x0499, 0x172a, 0x86, 2), /* Yamaha MODX */ @@ -100,7 +98,7 @@ static const struct snd_usb_implicit_fb_match capture_implicit_fb_quirks[] = { /* set up sync EP information on the audioformat */ static int add_implicit_fb_sync_ep(struct snd_usb_audio *chip, struct audioformat *fmt, - int ep, int ifnum, + int ep, int ep_idx, int ifnum, const struct usb_host_interface *alts) { struct usb_interface *iface; @@ -115,7 +113,7 @@ static int add_implicit_fb_sync_ep(struct snd_usb_audio *chip, fmt->sync_ep = ep; fmt->sync_iface = ifnum; fmt->sync_altsetting = alts->desc.bAlternateSetting; - fmt->sync_ep_idx = 0; + fmt->sync_ep_idx = ep_idx; fmt->implicit_fb = 1; usb_audio_dbg(chip, "%d:%d: added %s implicit_fb sync_ep %x, iface %d:%d\n", @@ -147,7 +145,7 @@ static int add_generic_uac2_implicit_fb(struct snd_usb_audio *chip, (epd->bmAttributes & USB_ENDPOINT_USAGE_MASK) != USB_ENDPOINT_USAGE_IMPLICIT_FB) return 0; - return add_implicit_fb_sync_ep(chip, fmt, epd->bEndpointAddress, + return add_implicit_fb_sync_ep(chip, fmt, epd->bEndpointAddress, 0, ifnum, alts); } @@ -173,10 +171,32 @@ static int add_roland_implicit_fb(struct snd_usb_audio *chip, (epd->bmAttributes & USB_ENDPOINT_USAGE_MASK) != USB_ENDPOINT_USAGE_IMPLICIT_FB) return 0; - return add_implicit_fb_sync_ep(chip, fmt, epd->bEndpointAddress, + return add_implicit_fb_sync_ep(chip, fmt, epd->bEndpointAddress, 0, ifnum, alts); } +/* Pioneer devices: playback and capture streams sharing the same iface/altset + */ +static int add_pioneer_implicit_fb(struct snd_usb_audio *chip, + struct audioformat *fmt, + struct usb_host_interface *alts) +{ + struct usb_endpoint_descriptor *epd; + + if (alts->desc.bNumEndpoints != 2) + return 0; + + epd = get_endpoint(alts, 1); + if (!usb_endpoint_is_isoc_in(epd) || + (epd->bmAttributes & USB_ENDPOINT_SYNCTYPE) != USB_ENDPOINT_SYNC_ASYNC || + ((epd->bmAttributes & USB_ENDPOINT_USAGE_MASK) != + USB_ENDPOINT_USAGE_DATA && + (epd->bmAttributes & USB_ENDPOINT_USAGE_MASK) != + USB_ENDPOINT_USAGE_IMPLICIT_FB)) + return 0; + return add_implicit_fb_sync_ep(chip, fmt, epd->bEndpointAddress, 1, + alts->desc.bInterfaceNumber, alts); +} static int __add_generic_implicit_fb(struct snd_usb_audio *chip, struct audioformat *fmt, @@ -197,7 +217,7 @@ static int __add_generic_implicit_fb(struct snd_usb_audio *chip, if (!usb_endpoint_is_isoc_in(epd) || (epd->bmAttributes & USB_ENDPOINT_SYNCTYPE) != USB_ENDPOINT_SYNC_ASYNC) return 0; - return add_implicit_fb_sync_ep(chip, fmt, epd->bEndpointAddress, + return add_implicit_fb_sync_ep(chip, fmt, epd->bEndpointAddress, 0, iface, alts); } @@ -250,7 +270,7 @@ static int audioformat_implicit_fb_quirk(struct snd_usb_audio *chip, case IMPLICIT_FB_NONE: return 0; /* No quirk */ case IMPLICIT_FB_FIXED: - return add_implicit_fb_sync_ep(chip, fmt, p->ep_num, + return add_implicit_fb_sync_ep(chip, fmt, p->ep_num, 0, p->iface, NULL); } } @@ -278,6 +298,14 @@ static int audioformat_implicit_fb_quirk(struct snd_usb_audio *chip, return 1; } + /* Pioneer devices implicit feedback with vendor spec class */ + if (attr == USB_ENDPOINT_SYNC_ASYNC && + alts->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC && + USB_ID_VENDOR(chip->usb_id) == 0x2b73 /* Pioneer */) { + if (add_pioneer_implicit_fb(chip, fmt, alts)) + return 1; + } + /* Try the generic implicit fb if available */ if (chip->generic_implicit_fb) return add_generic_implicit_fb(chip, fmt, alts); @@ -295,8 +323,8 @@ static int audioformat_capture_quirk(struct snd_usb_audio *chip, p = find_implicit_fb_entry(chip, capture_implicit_fb_quirks, alts); if (p && p->type == IMPLICIT_FB_FIXED) - return add_implicit_fb_sync_ep(chip, fmt, p->ep_num, p->iface, - NULL); + return add_implicit_fb_sync_ep(chip, fmt, p->ep_num, 0, + p->iface, NULL); return 0; } -- GitLab From afba9dc1f3a5390475006061c0bdc5ad4915878e Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 6 Jan 2021 11:07:55 +0100 Subject: [PATCH 0414/2012] net: ipa: modem: add missing SET_NETDEV_DEV() for proper sysfs links At the moment it is quite hard to identify the network interface provided by IPA in userspace components: The network interface is created as virtual device, without any link to the IPA device. The interface name ("rmnet_ipa%d") is the only indication that the network interface belongs to IPA, but this is not very reliable. Add SET_NETDEV_DEV() to associate the network interface with the IPA parent device. This allows userspace services like ModemManager to properly identify that this network interface is provided by IPA and belongs to the modem. Cc: Alex Elder Fixes: a646d6ec9098 ("soc: qcom: ipa: modem and microcontroller") Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20210106100755.56800-1-stephan@gerhold.net Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_modem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c index e34fe2d77324e..9b08eb8239846 100644 --- a/drivers/net/ipa/ipa_modem.c +++ b/drivers/net/ipa/ipa_modem.c @@ -216,6 +216,7 @@ int ipa_modem_start(struct ipa *ipa) ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev; ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev; + SET_NETDEV_DEV(netdev, &ipa->pdev->dev); priv = netdev_priv(netdev); priv->ipa = ipa; -- GitLab From 53475c5dd856212e91538a9501162e821cc1f791 Mon Sep 17 00:00:00 2001 From: Dongseok Yi Date: Fri, 8 Jan 2021 11:28:38 +0900 Subject: [PATCH 0415/2012] net: fix use-after-free when UDP GRO with shared fraglist skbs in fraglist could be shared by a BPF filter loaded at TC. If TC writes, it will call skb_ensure_writable -> pskb_expand_head to create a private linear section for the head_skb. And then call skb_clone_fraglist -> skb_get on each skb in the fraglist. skb_segment_list overwrites part of the skb linear section of each fragment itself. Even after skb_clone, the frag_skbs share their linear section with their clone in PF_PACKET. Both sk_receive_queue of PF_PACKET and PF_INET (or PF_INET6) can have a link for the same frag_skbs chain. If a new skb (not frags) is queued to one of the sk_receive_queue, multiple ptypes can see and release this. It causes use-after-free. [ 4443.426215] ------------[ cut here ]------------ [ 4443.426222] refcount_t: underflow; use-after-free. [ 4443.426291] WARNING: CPU: 7 PID: 28161 at lib/refcount.c:190 refcount_dec_and_test_checked+0xa4/0xc8 [ 4443.426726] pstate: 60400005 (nZCv daif +PAN -UAO) [ 4443.426732] pc : refcount_dec_and_test_checked+0xa4/0xc8 [ 4443.426737] lr : refcount_dec_and_test_checked+0xa0/0xc8 [ 4443.426808] Call trace: [ 4443.426813] refcount_dec_and_test_checked+0xa4/0xc8 [ 4443.426823] skb_release_data+0x144/0x264 [ 4443.426828] kfree_skb+0x58/0xc4 [ 4443.426832] skb_queue_purge+0x64/0x9c [ 4443.426844] packet_set_ring+0x5f0/0x820 [ 4443.426849] packet_setsockopt+0x5a4/0xcd0 [ 4443.426853] __sys_setsockopt+0x188/0x278 [ 4443.426858] __arm64_sys_setsockopt+0x28/0x38 [ 4443.426869] el0_svc_common+0xf0/0x1d0 [ 4443.426873] el0_svc_handler+0x74/0x98 [ 4443.426880] el0_svc+0x8/0xc Fixes: 3a1296a38d0c (net: Support GRO/GSO fraglist chaining.) Signed-off-by: Dongseok Yi Acked-by: Willem de Bruijn Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/1610072918-174177-1-git-send-email-dseok.yi@samsung.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f62cae3f75d87..b6f2b520a9b74 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3655,7 +3655,8 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, unsigned int delta_truesize = 0; unsigned int delta_len = 0; struct sk_buff *tail = NULL; - struct sk_buff *nskb; + struct sk_buff *nskb, *tmp; + int err; skb_push(skb, -skb_network_offset(skb) + offset); @@ -3665,11 +3666,28 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, nskb = list_skb; list_skb = list_skb->next; + err = 0; + if (skb_shared(nskb)) { + tmp = skb_clone(nskb, GFP_ATOMIC); + if (tmp) { + consume_skb(nskb); + nskb = tmp; + err = skb_unclone(nskb, GFP_ATOMIC); + } else { + err = -ENOMEM; + } + } + if (!tail) skb->next = nskb; else tail->next = nskb; + if (unlikely(err)) { + nskb->next = list_skb; + goto err_linearize; + } + tail = nskb; delta_len += nskb->len; -- GitLab From fd2ddef043592e7de80af53f47fa46fd3573086e Mon Sep 17 00:00:00 2001 From: Baptiste Lepers Date: Thu, 7 Jan 2021 16:11:10 +1100 Subject: [PATCH 0416/2012] udp: Prevent reuseport_select_sock from reading uninitialized socks reuse->socks[] is modified concurrently by reuseport_add_sock. To prevent reading values that have not been fully initialized, only read the array up until the last known safe index instead of incorrectly re-reading the last index of the array. Fixes: acdcecc61285f ("udp: correct reuseport selection with connected sockets") Signed-off-by: Baptiste Lepers Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20210107051110.12247-1-baptiste.lepers@gmail.com Signed-off-by: Jakub Kicinski --- net/core/sock_reuseport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index bbdd3c7b6cb5b..b065f0a103ed0 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -293,7 +293,7 @@ select_by_hash: i = j = reciprocal_scale(hash, socks); while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) { i++; - if (i >= reuse->num_socks) + if (i >= socks) i = 0; if (i == j) goto out; -- GitLab From c1787ffd0d24eb93eefac2dbba0eac5700da9ff1 Mon Sep 17 00:00:00 2001 From: Tom Parkin Date: Thu, 7 Jan 2021 18:13:15 +0000 Subject: [PATCH 0417/2012] ppp: fix refcount underflow on channel unbridge When setting up a channel bridge, ppp_bridge_channels sets the pch->bridge field before taking the associated reference on the bridge file instance. This opens up a refcount underflow bug if ppp_bridge_channels called via. iotcl runs concurrently with ppp_unbridge_channels executing via. file release. The bug is triggered by ppp_bridge_channels taking the error path through the 'err_unset' label. In this scenario, pch->bridge is set, but the reference on the bridged channel will not be taken because the function errors out. If ppp_unbridge_channels observes pch->bridge before it is unset by the error path, it will erroneously drop the reference on the bridged channel and cause a refcount underflow. To avoid this, ensure that ppp_bridge_channels holds a reference on each channel in advance of setting the bridge pointers. Signed-off-by: Tom Parkin Fixes: 4cf476ced45d ("ppp: add PPPIOCBRIDGECHAN and PPPIOCUNBRIDGECHAN ioctls") Acked-by: Guillaume Nault Link: https://lore.kernel.org/r/20210107181315.3128-1-tparkin@katalix.com Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_generic.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 09c27f7773f95..d445ecb1d0c75 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -623,6 +623,7 @@ static int ppp_bridge_channels(struct channel *pch, struct channel *pchb) write_unlock_bh(&pch->upl); return -EALREADY; } + refcount_inc(&pchb->file.refcnt); rcu_assign_pointer(pch->bridge, pchb); write_unlock_bh(&pch->upl); @@ -632,19 +633,24 @@ static int ppp_bridge_channels(struct channel *pch, struct channel *pchb) write_unlock_bh(&pchb->upl); goto err_unset; } + refcount_inc(&pch->file.refcnt); rcu_assign_pointer(pchb->bridge, pch); write_unlock_bh(&pchb->upl); - refcount_inc(&pch->file.refcnt); - refcount_inc(&pchb->file.refcnt); - return 0; err_unset: write_lock_bh(&pch->upl); + /* Re-read pch->bridge with upl held in case it was modified concurrently */ + pchb = rcu_dereference_protected(pch->bridge, lockdep_is_held(&pch->upl)); RCU_INIT_POINTER(pch->bridge, NULL); write_unlock_bh(&pch->upl); synchronize_rcu(); + + if (pchb) + if (refcount_dec_and_test(&pchb->file.refcnt)) + ppp_destroy_channel(pchb); + return -EALREADY; } -- GitLab From 2b446e650b418f9a9e75f99852e2f2560cabfa17 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Jan 2021 10:40:05 -0800 Subject: [PATCH 0418/2012] docs: net: explain struct net_device lifetime Explain the two basic flows of struct net_device's operation. Signed-off-by: Jakub Kicinski --- Documentation/networking/netdevices.rst | 171 +++++++++++++++++++++++- net/core/rtnetlink.c | 2 +- 2 files changed, 166 insertions(+), 7 deletions(-) diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index e65665c5ab501..17bdcb746dcf5 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -10,18 +10,177 @@ Introduction The following is a random collection of documentation regarding network devices. -struct net_device allocation rules -================================== +struct net_device lifetime rules +================================ Network device structures need to persist even after module is unloaded and must be allocated with alloc_netdev_mqs() and friends. If device has registered successfully, it will be freed on last use -by free_netdev(). This is required to handle the pathologic case cleanly -(example: rmmod mydriver needs_free_netdev = true; + } + + static void my_destructor(struct net_device *dev) + { + some_obj_destroy(priv->obj); + some_uninit(priv); + } + + int create_link() + { + struct my_device_priv *priv; + int err; + + ASSERT_RTNL(); + + dev = alloc_netdev(sizeof(*priv), "net%d", NET_NAME_UNKNOWN, my_setup); + if (!dev) + return -ENOMEM; + priv = netdev_priv(dev); + + /* Implicit constructor */ + err = some_init(priv); + if (err) + goto err_free_dev; + + priv->obj = some_obj_create(); + if (!priv->obj) { + err = -ENOMEM; + goto err_some_uninit; + } + /* End of constructor, set the destructor: */ + dev->priv_destructor = my_destructor; + + err = register_netdevice(dev); + if (err) + /* register_netdevice() calls destructor on failure */ + goto err_free_dev; + + /* If anything fails now unregister_netdevice() (or unregister_netdev()) + * will take care of calling my_destructor and free_netdev(). + */ + + return 0; + + err_some_uninit: + some_uninit(priv); + err_free_dev: + free_netdev(dev); + return err; + } + +If struct net_device.priv_destructor is set it will be called by the core +some time after unregister_netdevice(), it will also be called if +register_netdevice() fails. The callback may be invoked with or without +``rtnl_lock`` held. + +There is no explicit constructor callback, driver "constructs" the private +netdev state after allocating it and before registration. + +Setting struct net_device.needs_free_netdev makes core call free_netdevice() +automatically after unregister_netdevice() when all references to the device +are gone. It only takes effect after a successful call to register_netdevice() +so if register_netdevice() fails driver is responsible for calling +free_netdev(). + +free_netdev() is safe to call on error paths right after unregister_netdevice() +or when register_netdevice() fails. Parts of netdev (de)registration process +happen after ``rtnl_lock`` is released, therefore in those cases free_netdev() +will defer some of the processing until ``rtnl_lock`` is released. + +Devices spawned from struct rtnl_link_ops should never free the +struct net_device directly. + +.ndo_init and .ndo_uninit +~~~~~~~~~~~~~~~~~~~~~~~~~ + +``.ndo_init`` and ``.ndo_uninit`` callbacks are called during net_device +registration and de-registration, under ``rtnl_lock``. Drivers can use +those e.g. when parts of their init process need to run under ``rtnl_lock``. + +``.ndo_init`` runs before device is visible in the system, ``.ndo_uninit`` +runs during de-registering after device is closed but other subsystems +may still have outstanding references to the netdevice. MTU === diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bb0596c41b3ef..79f514afb17d0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3441,7 +3441,7 @@ replay: if (ops->newlink) { err = ops->newlink(link_net ? : net, dev, tb, data, extack); - /* Drivers should call free_netdev() in ->destructor + /* Drivers should set dev->needs_free_netdev * and unregister it on failure after registration * so that device could be finally freed in rtnl_unlock. */ -- GitLab From c269a24ce057abfc31130960e96ab197ef6ab196 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Jan 2021 10:40:06 -0800 Subject: [PATCH 0419/2012] net: make free_netdev() more lenient with unregistering devices There are two flavors of handling netdev registration: - ones called without holding rtnl_lock: register_netdev() and unregister_netdev(); and - those called with rtnl_lock held: register_netdevice() and unregister_netdevice(). While the semantics of the former are pretty clear, the same can't be said about the latter. The netdev_todo mechanism is utilized to perform some of the device unregistering tasks and it hooks into rtnl_unlock() so the locked variants can't actually finish the work. In general free_netdev() does not mix well with locked calls. Most drivers operating under rtnl_lock set dev->needs_free_netdev to true and expect core to make the free_netdev() call some time later. The part where this becomes most problematic is error paths. There is no way to unwind the state cleanly after a call to register_netdevice(), since unreg can't be performed fully without dropping locks. Make free_netdev() more lenient, and defer the freeing if device is being unregistered. This allows error paths to simply call free_netdev() both after register_netdevice() failed, and after a call to unregister_netdevice() but before dropping rtnl_lock. Simplify the error paths which are currently doing gymnastics around free_netdev() handling. Signed-off-by: Jakub Kicinski --- net/8021q/vlan.c | 4 +--- net/core/dev.c | 11 +++++++++++ net/core/rtnetlink.c | 23 ++++++----------------- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 15bbfaf943fd1..8b644113715e9 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -284,9 +284,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) return 0; out_free_newdev: - if (new_dev->reg_state == NETREG_UNINITIALIZED || - new_dev->reg_state == NETREG_UNREGISTERED) - free_netdev(new_dev); + free_netdev(new_dev); return err; } diff --git a/net/core/dev.c b/net/core/dev.c index 8fa739259041a..adde93cbca9f0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10631,6 +10631,17 @@ void free_netdev(struct net_device *dev) struct napi_struct *p, *n; might_sleep(); + + /* When called immediately after register_netdevice() failed the unwind + * handling may still be dismantling the device. Handle that case by + * deferring the free. + */ + if (dev->reg_state == NETREG_UNREGISTERING) { + ASSERT_RTNL(); + dev->needs_free_netdev = true; + return; + } + netif_free_tx_queues(dev); netif_free_rx_queues(dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 79f514afb17d0..3d6ab194d0f58 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3439,26 +3439,15 @@ replay: dev->ifindex = ifm->ifi_index; - if (ops->newlink) { + if (ops->newlink) err = ops->newlink(link_net ? : net, dev, tb, data, extack); - /* Drivers should set dev->needs_free_netdev - * and unregister it on failure after registration - * so that device could be finally freed in rtnl_unlock. - */ - if (err < 0) { - /* If device is not registered at all, free it now */ - if (dev->reg_state == NETREG_UNINITIALIZED || - dev->reg_state == NETREG_UNREGISTERED) - free_netdev(dev); - goto out; - } - } else { + else err = register_netdevice(dev); - if (err < 0) { - free_netdev(dev); - goto out; - } + if (err < 0) { + free_netdev(dev); + goto out; } + err = rtnl_configure_link(dev, ifm); if (err < 0) goto out_unregister; -- GitLab From 766b0515d5bec4b780750773ed3009b148df8c0a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 6 Jan 2021 10:40:07 -0800 Subject: [PATCH 0420/2012] net: make sure devices go through netdev_wait_all_refs If register_netdevice() fails at the very last stage - the notifier call - some subsystems may have already seen it and grabbed a reference. struct net_device can't be freed right away without calling netdev_wait_all_refs(). Now that we have a clean interface in form of dev->needs_free_netdev and lenient free_netdev() we can undo what commit 93ee31f14f6f ("[NET]: Fix free_netdev on register_netdev failure.") has done and complete the unregistration path by bringing the net_set_todo() call back. After registration fails user is still expected to explicitly free the net_device, so make sure ->needs_free_netdev is cleared, otherwise rolling back the registration will cause the old double free for callers who release rtnl_lock before the free. This also solves the problem of priv_destructor not being called on notifier error. net_set_todo() will be moved back into unregister_netdevice_queue() in a follow up. Reported-by: Hulk Robot Reported-by: Yang Yingliang Signed-off-by: Jakub Kicinski --- net/core/dev.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index adde93cbca9f0..0071a11a6dc3c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10077,17 +10077,11 @@ int register_netdevice(struct net_device *dev) ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); ret = notifier_to_errno(ret); if (ret) { + /* Expect explicit free_netdev() on failure */ + dev->needs_free_netdev = false; rollback_registered(dev); - rcu_barrier(); - - dev->reg_state = NETREG_UNREGISTERED; - /* We should put the kobject that hold in - * netdev_unregister_kobject(), otherwise - * the net device cannot be freed when - * driver calls free_netdev(), because the - * kobject is being hold. - */ - kobject_put(&dev->dev.kobj); + net_set_todo(dev); + goto out; } /* * Prevent userspace races by waiting until the network -- GitLab From e80927079fd97b4d5457e3af2400a0087b561564 Mon Sep 17 00:00:00 2001 From: Yi Li Date: Mon, 4 Jan 2021 15:41:18 +0800 Subject: [PATCH 0421/2012] bcache: set pdev_set_uuid before scond loop iteration There is no need to reassign pdev_set_uuid in the second loop iteration, so move it to the place before second loop. Signed-off-by: Yi Li Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index a4752ac410dc4..6aa23a6fb394d 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2644,8 +2644,8 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k, } list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) { + char *pdev_set_uuid = pdev->dc->sb.set_uuid; list_for_each_entry_safe(c, tc, &bch_cache_sets, list) { - char *pdev_set_uuid = pdev->dc->sb.set_uuid; char *set_uuid = c->set_uuid; if (!memcmp(pdev_set_uuid, set_uuid, 16)) { -- GitLab From f7b4943dea48a572ad751ce1f18a245d43debe7e Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 4 Jan 2021 15:41:19 +0800 Subject: [PATCH 0422/2012] bcache: fix typo from SUUP to SUPP in features.h This patch fixes the following typos, from BCH_FEATURE_COMPAT_SUUP to BCH_FEATURE_COMPAT_SUPP from BCH_FEATURE_INCOMPAT_SUUP to BCH_FEATURE_INCOMPAT_SUPP from BCH_FEATURE_INCOMPAT_SUUP to BCH_FEATURE_RO_COMPAT_SUPP Fixes: d721a43ff69c ("bcache: increase super block version for cache device and backing device") Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket") Signed-off-by: Coly Li Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe --- drivers/md/bcache/features.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h index a1653c4780416..32c5bbda2f0dc 100644 --- a/drivers/md/bcache/features.h +++ b/drivers/md/bcache/features.h @@ -15,9 +15,9 @@ /* Incompat feature set */ #define BCH_FEATURE_INCOMPAT_LARGE_BUCKET 0x0001 /* 32bit bucket size */ -#define BCH_FEATURE_COMPAT_SUUP 0 -#define BCH_FEATURE_RO_COMPAT_SUUP 0 -#define BCH_FEATURE_INCOMPAT_SUUP BCH_FEATURE_INCOMPAT_LARGE_BUCKET +#define BCH_FEATURE_COMPAT_SUPP 0 +#define BCH_FEATURE_RO_COMPAT_SUPP 0 +#define BCH_FEATURE_INCOMPAT_SUPP BCH_FEATURE_INCOMPAT_LARGE_BUCKET #define BCH_HAS_COMPAT_FEATURE(sb, mask) \ ((sb)->feature_compat & (mask)) -- GitLab From 1dfc0686c29a9bbd3a446a29f9ccde3dec3bc75a Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 4 Jan 2021 15:41:20 +0800 Subject: [PATCH 0423/2012] bcache: check unsupported feature sets for bcache register This patch adds the check for features which is incompatible for current supported feature sets. Now if the bcache device created by bcache-tools has features that current kernel doesn't support, read_super() will fail with error messoage. E.g. if an unsupported incompatible feature detected, bcache register will fail with dmesg "bcache: register_bcache() error : Unsupported incompatible feature found". Fixes: d721a43ff69c ("bcache: increase super block version for cache device and backing device") Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket") Signed-off-by: Coly Li Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe --- drivers/md/bcache/features.h | 15 +++++++++++++++ drivers/md/bcache/super.c | 14 ++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h index 32c5bbda2f0dc..e73724c2b49b8 100644 --- a/drivers/md/bcache/features.h +++ b/drivers/md/bcache/features.h @@ -79,6 +79,21 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LARGE_BUCKET); +static inline bool bch_has_unknown_compat_features(struct cache_sb *sb) +{ + return ((sb->feature_compat & ~BCH_FEATURE_COMPAT_SUPP) != 0); +} + +static inline bool bch_has_unknown_ro_compat_features(struct cache_sb *sb) +{ + return ((sb->feature_ro_compat & ~BCH_FEATURE_RO_COMPAT_SUPP) != 0); +} + +static inline bool bch_has_unknown_incompat_features(struct cache_sb *sb) +{ + return ((sb->feature_incompat & ~BCH_FEATURE_INCOMPAT_SUPP) != 0); +} + int bch_print_cache_set_feature_compat(struct cache_set *c, char *buf, int size); int bch_print_cache_set_feature_ro_compat(struct cache_set *c, char *buf, int size); int bch_print_cache_set_feature_incompat(struct cache_set *c, char *buf, int size); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 6aa23a6fb394d..f4674a3298af5 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -228,6 +228,20 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, sb->feature_compat = le64_to_cpu(s->feature_compat); sb->feature_incompat = le64_to_cpu(s->feature_incompat); sb->feature_ro_compat = le64_to_cpu(s->feature_ro_compat); + + /* Check incompatible features */ + err = "Unsupported compatible feature found"; + if (bch_has_unknown_compat_features(sb)) + goto err; + + err = "Unsupported read-only compatible feature found"; + if (bch_has_unknown_ro_compat_features(sb)) + goto err; + + err = "Unsupported incompatible feature found"; + if (bch_has_unknown_incompat_features(sb)) + goto err; + err = read_super_common(sb, bdev, s); if (err) goto err; -- GitLab From b16671e8f493e3df40b1fb0dff4078f391c5099a Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 4 Jan 2021 15:41:21 +0800 Subject: [PATCH 0424/2012] bcache: introduce BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE for large bucket When large bucket feature was added, BCH_FEATURE_INCOMPAT_LARGE_BUCKET was introduced into the incompat feature set. It used bucket_size_hi (which was added at the tail of struct cache_sb_disk) to extend current 16bit bucket size to 32bit with existing bucket_size in struct cache_sb_disk. This is not a good idea, there are two obvious problems, - Bucket size is always value power of 2, if store log2(bucket size) in existing bucket_size of struct cache_sb_disk, it is unnecessary to add bucket_size_hi. - Macro csum_set() assumes d[SB_JOURNAL_BUCKETS] is the last member in struct cache_sb_disk, bucket_size_hi was added after d[] which makes csum_set calculate an unexpected super block checksum. To fix the above problems, this patch introduces a new incompat feature bit BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE, when this bit is set, it means bucket_size in struct cache_sb_disk stores the order of power-of-2 bucket size value. When user specifies a bucket size larger than 32768 sectors, BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE will be set to incompat feature set, and bucket_size stores log2(bucket size) more than store the real bucket size value. The obsoleted BCH_FEATURE_INCOMPAT_LARGE_BUCKET won't be used anymore, it is renamed to BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET and still only recognized by kernel driver for legacy compatible purpose. The previous bucket_size_hi is renmaed to obso_bucket_size_hi in struct cache_sb_disk and not used in bcache-tools anymore. For cache device created with BCH_FEATURE_INCOMPAT_LARGE_BUCKET feature, bcache-tools and kernel driver still recognize the feature string and display it as "obso_large_bucket". With this change, the unnecessary extra space extend of bcache on-disk super block can be avoided, and csum_set() may generate expected check sum as well. Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket") Signed-off-by: Coly Li Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe --- drivers/md/bcache/features.c | 2 +- drivers/md/bcache/features.h | 11 ++++++++--- drivers/md/bcache/super.c | 22 +++++++++++++++++++--- include/uapi/linux/bcache.h | 2 +- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/md/bcache/features.c b/drivers/md/bcache/features.c index 6469223f0b777..d636b7b2d070c 100644 --- a/drivers/md/bcache/features.c +++ b/drivers/md/bcache/features.c @@ -17,7 +17,7 @@ struct feature { }; static struct feature feature_list[] = { - {BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LARGE_BUCKET, + {BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE, "large_bucket"}, {0, 0, 0 }, }; diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h index e73724c2b49b8..84fc2c0f01015 100644 --- a/drivers/md/bcache/features.h +++ b/drivers/md/bcache/features.h @@ -13,11 +13,15 @@ /* Feature set definition */ /* Incompat feature set */ -#define BCH_FEATURE_INCOMPAT_LARGE_BUCKET 0x0001 /* 32bit bucket size */ +/* 32bit bucket size, obsoleted */ +#define BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET 0x0001 +/* real bucket size is (1 << bucket_size) */ +#define BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE 0x0002 #define BCH_FEATURE_COMPAT_SUPP 0 #define BCH_FEATURE_RO_COMPAT_SUPP 0 -#define BCH_FEATURE_INCOMPAT_SUPP BCH_FEATURE_INCOMPAT_LARGE_BUCKET +#define BCH_FEATURE_INCOMPAT_SUPP (BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET| \ + BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE) #define BCH_HAS_COMPAT_FEATURE(sb, mask) \ ((sb)->feature_compat & (mask)) @@ -77,7 +81,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ ~BCH##_FEATURE_INCOMPAT_##flagname; \ } -BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LARGE_BUCKET); +BCH_FEATURE_INCOMPAT_FUNCS(obso_large_bucket, OBSO_LARGE_BUCKET); +BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LOG_LARGE_BUCKET_SIZE); static inline bool bch_has_unknown_compat_features(struct cache_sb *sb) { diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f4674a3298af5..3999641f17753 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -64,9 +64,25 @@ static unsigned int get_bucket_size(struct cache_sb *sb, struct cache_sb_disk *s { unsigned int bucket_size = le16_to_cpu(s->bucket_size); - if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES && - bch_has_feature_large_bucket(sb)) - bucket_size |= le16_to_cpu(s->bucket_size_hi) << 16; + if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) { + if (bch_has_feature_large_bucket(sb)) { + unsigned int max, order; + + max = sizeof(unsigned int) * BITS_PER_BYTE - 1; + order = le16_to_cpu(s->bucket_size); + /* + * bcache tool will make sure the overflow won't + * happen, an error message here is enough. + */ + if (order > max) + pr_err("Bucket size (1 << %u) overflows\n", + order); + bucket_size = 1 << order; + } else if (bch_has_feature_obso_large_bucket(sb)) { + bucket_size += + le16_to_cpu(s->obso_bucket_size_hi) << 16; + } + } return bucket_size; } diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 52e8bcb339811..cf7399f03b712 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h @@ -213,7 +213,7 @@ struct cache_sb_disk { __le16 keys; }; __le64 d[SB_JOURNAL_BUCKETS]; /* journal buckets */ - __le16 bucket_size_hi; + __le16 obso_bucket_size_hi; /* obsoleted */ }; /* -- GitLab From 5342fd4255021ef0c4ce7be52eea1c4ebda11c63 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 4 Jan 2021 15:41:22 +0800 Subject: [PATCH 0425/2012] bcache: set bcache device into read-only mode for BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET If BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET is set in incompat feature set, it means the cache device is created with obsoleted layout with obso_bucket_site_hi. Now bcache does not support this feature bit, a new BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE incompat feature bit is added for a better layout to support large bucket size. For the legacy compatibility purpose, if a cache device created with obsoleted BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET feature bit, all bcache devices attached to this cache set should be set to read-only. Then the dirty data can be written back to backing device before re-create the cache device with BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE feature bit by the latest bcache-tools. This patch checks BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET feature bit when running a cache set and attach a bcache device to the cache set. If this bit is set, - When run a cache set, print an error kernel message to indicate all following attached bcache device will be read-only. - When attach a bcache device, print an error kernel message to indicate the attached bcache device will be read-only, and ask users to update to latest bcache-tools. Such change is only for cache device whose bucket size >= 32MB, this is for the zoned SSD and almost nobody uses such large bucket size at this moment. If you don't explicit set a large bucket size for a zoned SSD, such change is totally transparent to your bcache device. Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket") Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 3999641f17753..2047a9cccdb5d 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1332,6 +1332,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, bcache_device_link(&dc->disk, c, "bdev"); atomic_inc(&c->attached_dev_nr); + if (bch_has_feature_obso_large_bucket(&(c->cache->sb))) { + pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n"); + pr_err("Please update to the latest bcache-tools to create the cache device\n"); + set_disk_ro(dc->disk.disk, 1); + } + /* Allow the writeback thread to proceed */ up_write(&dc->writeback_lock); @@ -1554,6 +1560,12 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) bcache_device_link(d, c, "volume"); + if (bch_has_feature_obso_large_bucket(&c->cache->sb)) { + pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n"); + pr_err("Please update to the latest bcache-tools to create the cache device\n"); + set_disk_ro(d->disk, 1); + } + return 0; err: kobject_put(&d->kobj); @@ -2113,6 +2125,9 @@ static int run_cache_set(struct cache_set *c) c->cache->sb.last_mount = (u32)ktime_get_real_seconds(); bcache_write_super(c); + if (bch_has_feature_obso_large_bucket(&c->cache->sb)) + pr_err("Detect obsoleted large bucket layout, all attached bcache device will be read-only\n"); + list_for_each_entry_safe(dc, t, &uncached_devices, list) bch_cached_dev_attach(dc, c, NULL); -- GitLab From 55e6ac1e1f31c7f678d9f3c8d54c6f102e5f1550 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 8 Jan 2021 20:57:22 +0000 Subject: [PATCH 0426/2012] io_uring: io_rw_reissue lockdep annotations We expect io_rw_reissue() to take place only during submission with uring_lock held. Add a lockdep annotation to check that invariant. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index cb57e0360fcbb..55ba1922a3497 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2692,6 +2692,8 @@ static bool io_rw_reissue(struct io_kiocb *req, long res) if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker()) return false; + lockdep_assert_held(&req->ctx->uring_lock); + ret = io_sq_thread_acquire_mm_files(req->ctx, req); if (io_resubmit_prep(req, ret)) { -- GitLab From 4f793dc40bc605b97624fd36baf085b3c35e8bfd Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 8 Jan 2021 20:57:23 +0000 Subject: [PATCH 0427/2012] io_uring: inline io_uring_attempt_task_drop() A simple preparation change inlining io_uring_attempt_task_drop() into io_uring_flush(). Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 55ba1922a3497..1c931e7a3948f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8964,23 +8964,6 @@ static void io_uring_del_task_file(struct file *file) fput(file); } -/* - * Drop task note for this file if we're the only ones that hold it after - * pending fput() - */ -static void io_uring_attempt_task_drop(struct file *file) -{ - if (!current->io_uring) - return; - /* - * fput() is pending, will be 2 if the only other ref is our potential - * task file note. If the task is exiting, drop regardless of count. - */ - if (fatal_signal_pending(current) || (current->flags & PF_EXITING) || - atomic_long_read(&file->f_count) == 2) - io_uring_del_task_file(file); -} - static void io_uring_remove_task_files(struct io_uring_task *tctx) { struct file *file; @@ -9072,7 +9055,17 @@ void __io_uring_task_cancel(void) static int io_uring_flush(struct file *file, void *data) { - io_uring_attempt_task_drop(file); + if (!current->io_uring) + return 0; + + /* + * fput() is pending, will be 2 if the only other ref is our potential + * task file note. If the task is exiting, drop regardless of count. + */ + if (fatal_signal_pending(current) || (current->flags & PF_EXITING) || + atomic_long_read(&file->f_count) == 2) + io_uring_del_task_file(file); + return 0; } -- GitLab From 6b5733eb638b7068ab7cb34e663b55a1d1892d85 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 8 Jan 2021 20:57:24 +0000 Subject: [PATCH 0428/2012] io_uring: add warn_once for io_uring_flush() files_cancel() should cancel all relevant requests and drop file notes, so we should never have file notes after that, including on-exit fput and flush. Add a WARN_ONCE to be sure. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 1c931e7a3948f..f39671a0d84f6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9055,17 +9055,23 @@ void __io_uring_task_cancel(void) static int io_uring_flush(struct file *file, void *data) { - if (!current->io_uring) + struct io_uring_task *tctx = current->io_uring; + + if (!tctx) return 0; + /* we should have cancelled and erased it before PF_EXITING */ + WARN_ON_ONCE((current->flags & PF_EXITING) && + xa_load(&tctx->xa, (unsigned long)file)); + /* * fput() is pending, will be 2 if the only other ref is our potential * task file note. If the task is exiting, drop regardless of count. */ - if (fatal_signal_pending(current) || (current->flags & PF_EXITING) || - atomic_long_read(&file->f_count) == 2) - io_uring_del_task_file(file); + if (atomic_long_read(&file->f_count) != 2) + return 0; + io_uring_del_task_file(file); return 0; } -- GitLab From d9d05217cb6990b9a56e13b56e7a1b71e2551f6c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 8 Jan 2021 20:57:25 +0000 Subject: [PATCH 0429/2012] io_uring: stop SQPOLL submit on creator's death When the creator of SQPOLL io_uring dies (i.e. sqo_task), we don't want its internals like ->files and ->mm to be poked by the SQPOLL task, it have never been nice and recently got racy. That can happen when the owner undergoes destruction and SQPOLL tasks tries to submit new requests in parallel, and so calls io_sq_thread_acquire*(). That patch halts SQPOLL submissions when sqo_task dies by introducing sqo_dead flag. Once set, the SQPOLL task must not do any submission, which is synchronised by uring_lock as well as the new flag. The tricky part is to make sure that disabling always happens, that means either the ring is discovered by creator's do_exit() -> cancel, or if the final close() happens before it's done by the creator. The last is guaranteed by the fact that for SQPOLL the creator task and only it holds exactly one file note, so either it pins up to do_exit() or removed by the creator on the final put in flush. (see comments in uring_flush() around file->f_count == 2). One more place that can trigger io_sq_thread_acquire_*() is __io_req_task_submit(). Shoot off requests on sqo_dead there, even though actually we don't need to. That's because cancellation of sqo_task should wait for the request before going any further. note 1: io_disable_sqo_submit() does io_ring_set_wakeup_flag() so the caller would enter the ring to get an error, but it still doesn't guarantee that the flag won't be cleared. note 2: if final __userspace__ close happens not from the creator task, the file note will pin the ring until the task dies. Fixed: b1b6b5a30dce8 ("kernel/io_uring: cancel io_uring before task works") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 62 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f39671a0d84f6..2f305c097bd5b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -262,6 +262,7 @@ struct io_ring_ctx { unsigned int drain_next: 1; unsigned int eventfd_async: 1; unsigned int restricted: 1; + unsigned int sqo_dead: 1; /* * Ring buffer of indices into array of io_uring_sqe, which is @@ -2160,12 +2161,11 @@ static void io_req_task_cancel(struct callback_head *cb) static void __io_req_task_submit(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - bool fail; - fail = __io_sq_thread_acquire_mm(ctx) || - __io_sq_thread_acquire_files(ctx); mutex_lock(&ctx->uring_lock); - if (!fail) + if (!ctx->sqo_dead && + !__io_sq_thread_acquire_mm(ctx) && + !__io_sq_thread_acquire_files(ctx)) __io_queue_sqe(req, NULL); else __io_req_task_cancel(req, -EFAULT); @@ -6954,7 +6954,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) if (!list_empty(&ctx->iopoll_list)) io_do_iopoll(ctx, &nr_events, 0); - if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs))) + if (to_submit && !ctx->sqo_dead && + likely(!percpu_ref_is_dying(&ctx->refs))) ret = io_submit_sqes(ctx, to_submit); mutex_unlock(&ctx->uring_lock); } @@ -8712,6 +8713,10 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); percpu_ref_kill(&ctx->refs); + + if (WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && !ctx->sqo_dead)) + ctx->sqo_dead = 1; + /* if force is set, the ring is going away. always drop after that */ ctx->cq_overflow_flushed = 1; if (ctx->rings) @@ -8874,6 +8879,18 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, } } +static void io_disable_sqo_submit(struct io_ring_ctx *ctx) +{ + WARN_ON_ONCE(ctx->sqo_task != current); + + mutex_lock(&ctx->uring_lock); + ctx->sqo_dead = 1; + mutex_unlock(&ctx->uring_lock); + + /* make sure callers enter the ring to get error */ + io_ring_set_wakeup_flag(ctx); +} + /* * We need to iteratively cancel requests, in case a request has dependent * hard links. These persist even for failure of cancelations, hence keep @@ -8885,6 +8902,8 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, struct task_struct *task = current; if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { + /* for SQPOLL only sqo_task has task notes */ + io_disable_sqo_submit(ctx); task = ctx->sq_data->thread; atomic_inc(&task->io_uring->in_idle); io_sq_thread_park(ctx->sq_data); @@ -9056,6 +9075,7 @@ void __io_uring_task_cancel(void) static int io_uring_flush(struct file *file, void *data) { struct io_uring_task *tctx = current->io_uring; + struct io_ring_ctx *ctx = file->private_data; if (!tctx) return 0; @@ -9071,7 +9091,16 @@ static int io_uring_flush(struct file *file, void *data) if (atomic_long_read(&file->f_count) != 2) return 0; - io_uring_del_task_file(file); + if (ctx->flags & IORING_SETUP_SQPOLL) { + /* there is only one file note, which is owned by sqo_task */ + WARN_ON_ONCE((ctx->sqo_task == current) == + !xa_load(&tctx->xa, (unsigned long)file)); + + io_disable_sqo_submit(ctx); + } + + if (!(ctx->flags & IORING_SETUP_SQPOLL) || ctx->sqo_task == current) + io_uring_del_task_file(file); return 0; } @@ -9145,8 +9174,9 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file, #endif /* !CONFIG_MMU */ -static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) +static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) { + int ret = 0; DEFINE_WAIT(wait); do { @@ -9155,6 +9185,11 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE); + if (unlikely(ctx->sqo_dead)) { + ret = -EOWNERDEAD; + goto out; + } + if (!io_sqring_full(ctx)) break; @@ -9162,6 +9197,8 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) } while (!signal_pending(current)); finish_wait(&ctx->sqo_sq_wait, &wait); +out: + return ret; } static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz, @@ -9235,10 +9272,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, if (ctx->flags & IORING_SETUP_SQPOLL) { io_cqring_overflow_flush(ctx, false, NULL, NULL); + ret = -EOWNERDEAD; + if (unlikely(ctx->sqo_dead)) + goto out; if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); - if (flags & IORING_ENTER_SQ_WAIT) - io_sqpoll_wait_sq(ctx); + if (flags & IORING_ENTER_SQ_WAIT) { + ret = io_sqpoll_wait_sq(ctx); + if (ret) + goto out; + } submitted = to_submit; } else if (to_submit) { ret = io_uring_add_task_file(ctx, f.file); @@ -9665,6 +9708,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; err: + io_disable_sqo_submit(ctx); io_ring_ctx_wait_and_kill(ctx); return ret; } -- GitLab From 6bae85bd70d063b63fbe262d943cc321eab31b17 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 8 Jan 2021 22:46:02 -0800 Subject: [PATCH 0430/2012] maintainers: update my email address Change my email contact ahead of a likely painful eleven-month migration to a certain cobalt enteprisey groupware cloud product that will totally break my workflow. Some day I may get used to having to email being sequestered behind both claret and cerulean oath2+sms 2fa layers, but for now I'll stick with keying in one password to receive an email vs. the required four. Signed-off-by: Darrick J. Wong Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3d1b8fe972613..292394098e39f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9273,7 +9273,7 @@ F: drivers/net/ethernet/sgi/ioc3-eth.c IOMAP FILESYSTEM LIBRARY M: Christoph Hellwig -M: Darrick J. Wong +M: Darrick J. Wong M: linux-xfs@vger.kernel.org M: linux-fsdevel@vger.kernel.org L: linux-xfs@vger.kernel.org @@ -19504,7 +19504,7 @@ F: arch/x86/xen/*swiotlb* F: drivers/xen/*swiotlb* XFS FILESYSTEM -M: Darrick J. Wong +M: Darrick J. Wong M: linux-xfs@vger.kernel.org L: linux-xfs@vger.kernel.org S: Supported -- GitLab From a2bc221b972db91e4be1970e776e98f16aa87904 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Thu, 7 Jan 2021 02:15:20 -0800 Subject: [PATCH 0431/2012] netxen_nic: fix MSI/MSI-x interrupts For all PCI functions on the netxen_nic adapter, interrupt mode (INTx or MSI) configuration is dependent on what has been configured by the PCI function zero in the shared interrupt register, as these adapters do not support mixed mode interrupts among the functions of a given adapter. Logic for setting MSI/MSI-x interrupt mode in the shared interrupt register based on PCI function id zero check is not appropriate for all family of netxen adapters, as for some of the netxen family adapters PCI function zero is not really meant to be probed/loaded in the host but rather just act as a management function on the device, which caused all the other PCI functions on the adapter to always use legacy interrupt (INTx) mode instead of choosing MSI/MSI-x interrupt mode. This patch replaces that check with port number so that for all type of adapters driver attempts for MSI/MSI-x interrupt modes. Fixes: b37eb210c076 ("netxen_nic: Avoid mixed mode interrupts") Signed-off-by: Manish Chopra Signed-off-by: Igor Russkikh Link: https://lore.kernel.org/r/20210107101520.6735-1-manishc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index f21847739ef1f..d258e0ccf9465 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -564,11 +564,6 @@ static const struct net_device_ops netxen_netdev_ops = { .ndo_set_features = netxen_set_features, }; -static inline bool netxen_function_zero(struct pci_dev *pdev) -{ - return (PCI_FUNC(pdev->devfn) == 0) ? true : false; -} - static inline void netxen_set_interrupt_mode(struct netxen_adapter *adapter, u32 mode) { @@ -664,7 +659,7 @@ static int netxen_setup_intr(struct netxen_adapter *adapter) netxen_initialize_interrupt_registers(adapter); netxen_set_msix_bit(pdev, 0); - if (netxen_function_zero(pdev)) { + if (adapter->portnum == 0) { if (!netxen_setup_msi_interrupts(adapter, num_msix)) netxen_set_interrupt_mode(adapter, NETXEN_MSI_MODE); else -- GitLab From b210de4f8c97d57de051e805686248ec4c6cfc52 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 7 Jan 2021 15:50:18 +0200 Subject: [PATCH 0432/2012] net: ipv6: Validate GSO SKB before finish IPv6 processing There are cases where GSO segment's length exceeds the egress MTU: - Forwarding of a TCP GRO skb, when DF flag is not set. - Forwarding of an skb that arrived on a virtualisation interface (virtio-net/vhost/tap) with TSO/GSO size set by other network stack. - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an interface with a smaller MTU. - Arriving GRO skb (or GSO skb in a virtualised environment) that is bridged to a NETIF_F_TSO tunnel stacked over an interface with an insufficient MTU. If so: - Consume the SKB and its segments. - Issue an ICMP packet with 'Packet Too Big' message containing the MTU, allowing the source host to reduce its Path MTU appropriately. Note: These cases are handled in the same manner in IPv4 output finish. This patch aligns the behavior of IPv6 and the one of IPv4. Fixes: 9e50849054a4 ("netfilter: ipv6: move POSTROUTING invocation before fragmentation") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/1610027418-30438-1-git-send-email-ayal@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 749ad72386b23..077d43af8226b 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -125,8 +125,43 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * return -EINVAL; } +static int +ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, + struct sk_buff *skb, unsigned int mtu) +{ + struct sk_buff *segs, *nskb; + netdev_features_t features; + int ret = 0; + + /* Please see corresponding comment in ip_finish_output_gso + * describing the cases where GSO segment length exceeds the + * egress MTU. + */ + features = netif_skb_features(skb); + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR_OR_NULL(segs)) { + kfree_skb(skb); + return -ENOMEM; + } + + consume_skb(skb); + + skb_list_walk_safe(segs, segs, nskb) { + int err; + + skb_mark_not_on_list(segs); + err = ip6_fragment(net, sk, segs, ip6_finish_output2); + if (err && ret == 0) + ret = err; + } + + return ret; +} + static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { + unsigned int mtu; + #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { @@ -135,7 +170,11 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff } #endif - if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || + mtu = ip6_skb_dst_mtu(skb); + if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)) + return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); + + if ((skb->len > mtu && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)) || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(net, sk, skb, ip6_finish_output2); -- GitLab From 0ea02c73775277001c651ad4a0e83781a9acf406 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 11 Nov 2020 19:52:16 +0800 Subject: [PATCH 0433/2012] riscv: Drop a duplicated PAGE_KERNEL_EXEC commit b91540d52a08 ("RISC-V: Add EFI runtime services") add a duplicated PAGE_KERNEL_EXEC, kill it. Signed-off-by: Kefeng Wang Reviewed-by: Pekka Enberg Reviewed-by: Atish Patra Fixes: b91540d52a08 ("RISC-V: Add EFI runtime services") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 41a72861987cc..251e1db088fa2 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -99,7 +99,6 @@ | _PAGE_DIRTY) #define PAGE_KERNEL __pgprot(_PAGE_KERNEL) -#define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC) #define PAGE_KERNEL_READ __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_KERNEL | _PAGE_EXEC) #define PAGE_KERNEL_READ_EXEC __pgprot((_PAGE_KERNEL & ~_PAGE_WRITE) \ -- GitLab From 3502bd9b5762154ff11665f3f18f6d7dcc6f781c Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Sat, 9 Jan 2021 00:37:45 +0300 Subject: [PATCH 0434/2012] selftests/tls: fix selftests after adding ChaCha20-Poly1305 TLS selftests where broken because of wrong variable types used. Fix it by changing u16 -> uint16_t Fixes: 4f336e88a870 ("selftests/tls: add CHACHA20-POLY1305 to tls selftests") Reported-by: kernel test robot Signed-off-by: Vadim Fedorenko Link: https://lore.kernel.org/r/1610141865-7142-1-git-send-email-vfedorenko@novek.ru Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index cb0d1890a860f..e0088c2d38a5d 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -103,8 +103,8 @@ FIXTURE(tls) FIXTURE_VARIANT(tls) { - u16 tls_version; - u16 cipher_type; + uint16_t tls_version; + uint16_t cipher_type; }; FIXTURE_VARIANT_ADD(tls, 12_gcm) -- GitLab From b77413446408fdd256599daf00d5be72b5f3e7c6 Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Fri, 8 Jan 2021 14:13:37 +0700 Subject: [PATCH 0435/2012] tipc: fix NULL deref in tipc_link_xmit() The buffer list can have zero skb as following path: tipc_named_node_up()->tipc_node_xmit()->tipc_link_xmit(), so we need to check the list before casting an &sk_buff. Fault report: [] tipc: Bulk publication failure [] general protection fault, probably for non-canonical [#1] PREEMPT [...] [] KASAN: null-ptr-deref in range [0x00000000000000c8-0x00000000000000cf] [] CPU: 0 PID: 0 Comm: swapper/0 Kdump: loaded Not tainted 5.10.0-rc4+ #2 [] Hardware name: Bochs ..., BIOS Bochs 01/01/2011 [] RIP: 0010:tipc_link_xmit+0xc1/0x2180 [] Code: 24 b8 00 00 00 00 4d 39 ec 4c 0f 44 e8 e8 d7 0a 10 f9 48 [...] [] RSP: 0018:ffffc90000006ea0 EFLAGS: 00010202 [] RAX: dffffc0000000000 RBX: ffff8880224da000 RCX: 1ffff11003d3cc0d [] RDX: 0000000000000019 RSI: ffffffff886007b9 RDI: 00000000000000c8 [] RBP: ffffc90000007018 R08: 0000000000000001 R09: fffff52000000ded [] R10: 0000000000000003 R11: fffff52000000dec R12: ffffc90000007148 [] R13: 0000000000000000 R14: 0000000000000000 R15: ffffc90000007018 [] FS: 0000000000000000(0000) GS:ffff888037400000(0000) knlGS:000[...] [] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [] CR2: 00007fffd2db5000 CR3: 000000002b08f000 CR4: 00000000000006f0 Fixes: af9b028e270fd ("tipc: make media xmit call outside node spinlock context") Acked-by: Jon Maloy Signed-off-by: Hoang Le Link: https://lore.kernel.org/r/20210108071337.3598-1-hoang.h.le@dektech.com.au Signed-off-by: Jakub Kicinski --- net/tipc/link.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 6ae2140eb4f74..a6a694b789274 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1030,7 +1030,6 @@ void tipc_link_reset(struct tipc_link *l) int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, struct sk_buff_head *xmitq) { - struct tipc_msg *hdr = buf_msg(skb_peek(list)); struct sk_buff_head *backlogq = &l->backlogq; struct sk_buff_head *transmq = &l->transmq; struct sk_buff *skb, *_skb; @@ -1038,13 +1037,18 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, u16 ack = l->rcv_nxt - 1; u16 seqno = l->snd_nxt; int pkt_cnt = skb_queue_len(list); - int imp = msg_importance(hdr); unsigned int mss = tipc_link_mss(l); unsigned int cwin = l->window; unsigned int mtu = l->mtu; + struct tipc_msg *hdr; bool new_bundle; int rc = 0; + int imp; + + if (pkt_cnt <= 0) + return 0; + hdr = buf_msg(skb_peek(list)); if (unlikely(msg_size(hdr) > mtu)) { pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n", skb_queue_len(list), msg_user(hdr), @@ -1053,6 +1057,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, return -EMSGSIZE; } + imp = msg_importance(hdr); /* Allow oversubscription of one data msg per source at congestion */ if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) { if (imp == TIPC_SYSTEM_IMPORTANCE) { -- GitLab From 57726ebe2733891c9f59105eff028735f73d05fb Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Fri, 8 Jan 2021 16:52:09 +0200 Subject: [PATCH 0436/2012] mlxsw: core: Add validation of transceiver temperature thresholds Validate thresholds to avoid a single failure due to some transceiver unreliability. Ignore the last readouts in case warning temperature is above alarm temperature, since it can cause unexpected thermal shutdown. Stay with the previous values and refresh threshold within the next iteration. This is a rare scenario, but it was observed at a customer site. Fixes: 6a79507cfe94 ("mlxsw: core: Extend thermal module with per QSFP module thermal zones") Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 8fa286ccdd6bb..250a850496977 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -176,6 +176,12 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, if (err) return err; + if (crit_temp > emerg_temp) { + dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n", + tz->tzdev->type, crit_temp, emerg_temp); + return 0; + } + /* According to the system thermal requirements, the thermal zones are * defined with four trip points. The critical and emergency * temperature thresholds, provided by QSFP module are set as "active" @@ -190,11 +196,8 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp; tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp; - if (emerg_temp > crit_temp) - tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp + + tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp + MLXSW_THERMAL_MODULE_TEMP_SHIFT; - else - tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp; return 0; } -- GitLab From b06ca3d5a43ca2dd806f7688a17e8e7e0619a80a Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Fri, 8 Jan 2021 16:52:10 +0200 Subject: [PATCH 0437/2012] mlxsw: core: Increase critical threshold for ASIC thermal zone Increase critical threshold for ASIC thermal zone from 110C to 140C according to the system hardware requirements. All the supported ASICs (Spectrum-1, Spectrum-2, Spectrum-3) could be still operational with ASIC temperature below 140C. With the old critical threshold value system can perform unjustified shutdown. All the systems equipped with the above ASICs implement thermal protection mechanism at firmware level and firmware could decide to perform system thermal shutdown in case the temperature is below 140C. So with the new threshold system will not meltdown, while thermal operating range will be aligned with hardware abilities. Fixes: 41e760841d26 ("mlxsw: core: Replace thermal temperature trips with defines") Fixes: a50c1e35650b ("mlxsw: core: Implement thermal zone") Signed-off-by: Vadim Pasternak Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 250a850496977..bf85ce9835d7f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -19,7 +19,7 @@ #define MLXSW_THERMAL_ASIC_TEMP_NORM 75000 /* 75C */ #define MLXSW_THERMAL_ASIC_TEMP_HIGH 85000 /* 85C */ #define MLXSW_THERMAL_ASIC_TEMP_HOT 105000 /* 105C */ -#define MLXSW_THERMAL_ASIC_TEMP_CRIT 110000 /* 110C */ +#define MLXSW_THERMAL_ASIC_TEMP_CRIT 140000 /* 140C */ #define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ #define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) #define MLXSW_THERMAL_ZONE_MAX_NAME 16 -- GitLab From f97844f9c518172f813b7ece18a9956b1f70c1bb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 5 Jan 2021 16:15:16 +0100 Subject: [PATCH 0438/2012] dt-bindings: net: renesas,etheravb: RZ/G2H needs tx-internal-delay-ps The merge resolution of the interaction of commits 307eea32b202864c ("dt-bindings: net: renesas,ravb: Add support for r8a774e1 SoC") and d7adf6331189cbe9 ("dt-bindings: net: renesas,etheravb: Convert to json-schema") missed that "tx-internal-delay-ps" should be a required property on RZ/G2H. Fixes: 8b0308fe319b8002 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210105151516.1540653-1-geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/renesas,etheravb.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml index 244befb6402aa..de9dd574a2f95 100644 --- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml +++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml @@ -163,6 +163,7 @@ allOf: enum: - renesas,etheravb-r8a774a1 - renesas,etheravb-r8a774b1 + - renesas,etheravb-r8a774e1 - renesas,etheravb-r8a7795 - renesas,etheravb-r8a7796 - renesas,etheravb-r8a77961 -- GitLab From fab336b42441e0b2eb1d81becedb45fbdf99606e Mon Sep 17 00:00:00 2001 From: Chen Yi Date: Tue, 5 Jan 2021 23:31:20 +0800 Subject: [PATCH 0439/2012] selftests: netfilter: Pass family parameter "-f" to conntrack tool Fix nft_conntrack_helper.sh false fail report: 1) Conntrack tool need "-f ipv6" parameter to show out ipv6 traffic items. 2) Sleep 1 second after background nc send packet, to make sure check is after this statement executed. False report: FAIL: ns1-lkjUemYw did not show attached helper ip set via ruleset PASS: ns1-lkjUemYw connection on port 2121 has ftp helper attached ... After fix: PASS: ns1-2hUniwU2 connection on port 2121 has ftp helper attached PASS: ns2-2hUniwU2 connection on port 2121 has ftp helper attached ... Fixes: 619ae8e0697a6 ("selftests: netfilter: add test case for conntrack helper assignment") Signed-off-by: Chen Yi Signed-off-by: Pablo Neira Ayuso --- .../selftests/netfilter/nft_conntrack_helper.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh index edf0a48da6bf8..bf6b9626c7dd2 100755 --- a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh +++ b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh @@ -94,7 +94,13 @@ check_for_helper() local message=$2 local port=$3 - ip netns exec ${netns} conntrack -L -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp' + if echo $message |grep -q 'ipv6';then + local family="ipv6" + else + local family="ipv4" + fi + + ip netns exec ${netns} conntrack -L -f $family -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp' if [ $? -ne 0 ] ; then echo "FAIL: ${netns} did not show attached helper $message" 1>&2 ret=1 @@ -111,8 +117,8 @@ test_helper() sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null & - sleep 1 sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null & + sleep 1 check_for_helper "$ns1" "ip $msg" $port check_for_helper "$ns2" "ip $msg" $port @@ -128,8 +134,8 @@ test_helper() sleep 3 | ip netns exec ${ns2} nc -w 2 -6 -l -p $port > /dev/null & - sleep 1 sleep 1 | ip netns exec ${ns1} nc -w 2 -6 dead:1::2 $port > /dev/null & + sleep 1 check_for_helper "$ns1" "ipv6 $msg" $port check_for_helper "$ns2" "ipv6 $msg" $port -- GitLab From f6351c3f1c27c80535d76cac2299aec44c36291e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 8 Jan 2021 12:44:33 +0100 Subject: [PATCH 0440/2012] netfilter: conntrack: fix reading nf_conntrack_buckets The old way of changing the conntrack hashsize runtime was through changing the module param via file /sys/module/nf_conntrack/parameters/hashsize. This was extended to sysctl change in commit 3183ab8997a4 ("netfilter: conntrack: allow increasing bucket size via sysctl too"). The commit introduced second "user" variable nf_conntrack_htable_size_user which shadow actual variable nf_conntrack_htable_size. When hashsize is changed via module param this "user" variable isn't updated. This results in sysctl net/netfilter/nf_conntrack_buckets shows the wrong value when users update via the old way. This patch fix the issue by always updating "user" variable when reading the proc file. This will take care of changes to the actual variable without sysctl need to be aware. Fixes: 3183ab8997a4 ("netfilter: conntrack: allow increasing bucket size via sysctl too") Reported-by: Yoel Caspersen Signed-off-by: Jesper Dangaard Brouer Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 46c5557c1fecf..0ee702d374b02 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -523,6 +523,9 @@ nf_conntrack_hash_sysctl(struct ctl_table *table, int write, { int ret; + /* module_param hashsize could have changed value */ + nf_conntrack_htable_size_user = nf_conntrack_htable_size; + ret = proc_dointvec(table, write, buffer, lenp, ppos); if (ret < 0 || !write) return ret; -- GitLab From b8e594fa20d2e33d40c7a8c7c106549a35c38972 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 8 Jan 2021 10:29:01 -0600 Subject: [PATCH 0441/2012] irqchip/pruss: Simplify the TI_PRUSS_INTC Kconfig The TI PRUSS INTC irqchip driver handles the local interrupt controller which is a child device of it's parent PRUSS/ICSSG device. The driver was upstreamed in parallel with the PRUSS platform driver, and was configurable independently previously. The PRUSS interrupt controller is an integral part of the overall PRUSS software architecture, and is not useful at all by itself. Simplify the TI_PRUSS_INTC Kconfig dependencies by making it silent and selected automatically when the TI_PRUSS platform driver is enabled. Signed-off-by: Suman Anna Reviewed-by: David Lechner Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210108162901.6003-1-s-anna@ti.com --- drivers/irqchip/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 94920a51c6286..b147f22a78f48 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -493,8 +493,9 @@ config TI_SCI_INTA_IRQCHIP TI System Controller, say Y here. Otherwise, say N. config TI_PRUSS_INTC - tristate "TI PRU-ICSS Interrupt Controller" - depends on ARCH_DAVINCI || SOC_AM33XX || SOC_AM43XX || SOC_DRA7XX || ARCH_KEYSTONE || ARCH_K3 + tristate + depends on TI_PRUSS + default TI_PRUSS select IRQ_DOMAIN help This enables support for the PRU-ICSS Local Interrupt Controller -- GitLab From 599b3063adf4bf041a87a69244ee36aded0d878f Mon Sep 17 00:00:00 2001 From: Mathias Kresin Date: Thu, 7 Jan 2021 22:36:03 +0100 Subject: [PATCH 0442/2012] irqchip/mips-cpu: Set IPI domain parent chip Since commit 55567976629e ("genirq/irqdomain: Allow partial trimming of irq_data hierarchy") the irq_data chain is valided. The irq_domain_trim_hierarchy() function doesn't consider the irq + ipi domain hierarchy as valid, since the ipi domain has the irq domain set as parent, but the parent domain has no chip set. Hence the boot ends in a kernel panic. Set the chip for the parent domain as it is done in the mips gic irq driver, to have a valid irq_data chain. Fixes: 3838a547fda2 ("irqchip: mips-cpu: Introduce IPI IRQ domain support") Cc: # v5.10+ Signed-off-by: Mathias Kresin Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210107213603.1637781-1-dev@kresin.me --- drivers/irqchip/irq-mips-cpu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/irqchip/irq-mips-cpu.c b/drivers/irqchip/irq-mips-cpu.c index 95d4fd8f7a968..0bbb0b2d0dd5f 100644 --- a/drivers/irqchip/irq-mips-cpu.c +++ b/drivers/irqchip/irq-mips-cpu.c @@ -197,6 +197,13 @@ static int mips_cpu_ipi_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; + ret = irq_domain_set_hwirq_and_chip(domain->parent, virq + i, hwirq, + &mips_mt_cpu_irq_controller, + NULL); + + if (ret) + return ret; + ret = irq_set_irq_type(virq + i, IRQ_TYPE_LEVEL_HIGH); if (ret) return ret; -- GitLab From 1653e3d470629d25c64cd8a2f84adb20a9348b0c Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Tue, 15 Dec 2020 22:26:22 +0100 Subject: [PATCH 0443/2012] arm64: dts: ls1028a: fix the offset of the reset register The offset of the reset request register is 0, the absolute address is 0x1e60000. Boards without PSCI support will fail to perform a reset: [ 26.734700] reboot: Restarting system [ 27.743259] Unable to restart system [ 27.746845] Reboot failed -- System halted Fixes: 8897f3255c9c ("arm64: dts: Add support for NXP LS1028A SoC") Signed-off-by: Michael Walle Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 60ff19fa53b40..6c8a61c2cc740 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -101,7 +101,7 @@ reboot { compatible ="syscon-reboot"; regmap = <&rst>; - offset = <0xb0>; + offset = <0>; mask = <0x02>; }; -- GitLab From a0adc8eabb402cfb9f32d15edd9f65f65e35cdce Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 7 Jan 2021 20:26:16 +0000 Subject: [PATCH 0444/2012] dma-buf: cma_heap: Fix memory leak in CMA heap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bing Song noticed the CMA heap was leaking memory due to a flub I made in commit a5d2d29e24be ("dma-buf: heaps: Move heap-helper logic into the cma_heap implementation"), and provided this fix which ensures the pagelist is also freed on release. Cc: Bing Song Cc: Sumit Semwal Cc: Liam Mark Cc: Laura Abbott Cc: Brian Starkey Cc: Hridya Valsaraju Cc: Suren Baghdasaryan Cc: Sandeep Patil Cc: Daniel Mentz Cc: Chris Goldsworthy Cc: Ørjan Eide Cc: Robin Murphy Cc: Ezequiel Garcia Cc: Simon Ser Cc: James Jones Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Reported-by: Bing Song Fixes: a5d2d29e24be ("dma-buf: heaps: Move heap-helper logic into the cma_heap implementation") Signed-off-by: John Stultz Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20210107202616.75170-1-john.stultz@linaro.org --- drivers/dma-buf/heaps/cma_heap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index 3c4e343011721..364fc2f3e4995 100644 --- a/drivers/dma-buf/heaps/cma_heap.c +++ b/drivers/dma-buf/heaps/cma_heap.c @@ -251,6 +251,9 @@ static void cma_heap_dma_buf_release(struct dma_buf *dmabuf) buffer->vaddr = NULL; } + /* free page list */ + kfree(buffer->pages); + /* release memory */ cma_release(cma_heap->cma, buffer->cma_pages, buffer->pagecount); kfree(buffer); } -- GitLab From c98e9daa59a611ff4e163689815f40380c912415 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 5 Jan 2021 08:54:32 -0500 Subject: [PATCH 0445/2012] NFS: Adjust fs_context error logging Several existing dprink()/dfprintk() calls were converted to use the new mount API logging macros by commit ce8866f0913f ("NFS: Attach supplementary error information to fs_context"). If the fs_context was not created using fsopen() then it will not have had a log buffer allocated for it, and the new mount API logging macros will wind up calling printk(). This can result in syslog messages being logged where previously there were none... most notably "NFS4: Couldn't follow remote path", which can happen if the client is auto-negotiating a protocol version with an NFS server that doesn't support the higher v4.x versions. Convert the nfs_errorf(), nfs_invalf(), and nfs_warnf() macros to check for the existence of the fs_context's log buffer and call dprintk() if it doesn't exist. Add nfs_ferrorf(), nfs_finvalf(), and nfs_warnf(), which do the same thing but take an NFS debug flag as an argument and call dfprintk(). Finally, modify the "NFS4: Couldn't follow remote path" message to use nfs_ferrorf(). Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207385 Signed-off-by: Scott Mayhew Reviewed-by: Benjamin Coddington Fixes: ce8866f0913f ("NFS: Attach supplementary error information to fs_context.") Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 26 +++++++++++++++++++++++--- fs/nfs/nfs4super.c | 4 ++-- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b840d0a91c9d8..6bdee7ab3a6c2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -136,9 +136,29 @@ struct nfs_fs_context { } clone_data; }; -#define nfs_errorf(fc, fmt, ...) errorf(fc, fmt, ## __VA_ARGS__) -#define nfs_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__) -#define nfs_warnf(fc, fmt, ...) warnf(fc, fmt, ## __VA_ARGS__) +#define nfs_errorf(fc, fmt, ...) ((fc)->log.log ? \ + errorf(fc, fmt, ## __VA_ARGS__) : \ + ({ dprintk(fmt "\n", ## __VA_ARGS__); })) + +#define nfs_ferrorf(fc, fac, fmt, ...) ((fc)->log.log ? \ + errorf(fc, fmt, ## __VA_ARGS__) : \ + ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); })) + +#define nfs_invalf(fc, fmt, ...) ((fc)->log.log ? \ + invalf(fc, fmt, ## __VA_ARGS__) : \ + ({ dprintk(fmt "\n", ## __VA_ARGS__); -EINVAL; })) + +#define nfs_finvalf(fc, fac, fmt, ...) ((fc)->log.log ? \ + invalf(fc, fmt, ## __VA_ARGS__) : \ + ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); -EINVAL; })) + +#define nfs_warnf(fc, fmt, ...) ((fc)->log.log ? \ + warnf(fc, fmt, ## __VA_ARGS__) : \ + ({ dprintk(fmt "\n", ## __VA_ARGS__); })) + +#define nfs_fwarnf(fc, fac, fmt, ...) ((fc)->log.log ? \ + warnf(fc, fmt, ## __VA_ARGS__) : \ + ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); })) static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc) { diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 984cc42ee54d8..d09bcfd7db894 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -227,7 +227,7 @@ int nfs4_try_get_tree(struct fs_context *fc) fc, ctx->nfs_server.hostname, ctx->nfs_server.export_path); if (err) { - nfs_errorf(fc, "NFS4: Couldn't follow remote path"); + nfs_ferrorf(fc, MOUNT, "NFS4: Couldn't follow remote path"); dfprintk(MOUNT, "<-- nfs4_try_get_tree() = %d [error]\n", err); } else { dfprintk(MOUNT, "<-- nfs4_try_get_tree() = 0\n"); @@ -250,7 +250,7 @@ int nfs4_get_referral_tree(struct fs_context *fc) fc, ctx->nfs_server.hostname, ctx->nfs_server.export_path); if (err) { - nfs_errorf(fc, "NFS4: Couldn't follow remote path"); + nfs_ferrorf(fc, MOUNT, "NFS4: Couldn't follow remote path"); dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = %d [error]\n", err); } else { dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = 0\n"); -- GitLab From 86b53fbf08f48d353a86a06aef537e78e82ba721 Mon Sep 17 00:00:00 2001 From: "j.nixdorf@avm.de" Date: Tue, 5 Jan 2021 15:17:01 +0100 Subject: [PATCH 0446/2012] net: sunrpc: interpret the return value of kstrtou32 correctly A return value of 0 means success. This is documented in lib/kstrtox.c. This was found by trying to mount an NFS share from a link-local IPv6 address with the interface specified by its index: mount("[fe80::1%1]:/srv/nfs", "/mnt", "nfs", 0, "nolock,addr=fe80::1%1") Before this commit this failed with EINVAL and also caused the following message in dmesg: [...] NFS: bad IP address specified: addr=fe80::1%1 The syscall using the same address based on the interface name instead of its index succeeds. Credits for this patch go to my colleague Christian Speich, who traced the origin of this bug to this line of code. Signed-off-by: Johannes Nixdorf Fixes: 00cfaa943ec3 ("replace strict_strto calls") Signed-off-by: Trond Myklebust --- net/sunrpc/addr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 010dcb876f9d7..6e4dbd577a39f 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -185,7 +185,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf, scope_id = dev->ifindex; dev_put(dev); } else { - if (kstrtou32(p, 10, &scope_id) == 0) { + if (kstrtou32(p, 10, &scope_id) != 0) { kfree(p); return 0; } -- GitLab From 67bbceedc9bb8ad48993a8bd6486054756d711f4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Jan 2021 13:35:46 -0500 Subject: [PATCH 0447/2012] pNFS: Mark layout for return if return-on-close was not sent If the layout return-on-close failed because the layoutreturn was never sent, then we should mark the layout for return again. Fixes: 9c47b18cf722 ("pNFS: Ensure we do clear the return-on-close layout stateid on fatal errors") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 07f59dc8cb2e7..ccc89fab18020 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1560,12 +1560,18 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, int ret) { struct pnfs_layout_hdr *lo = args->layout; + struct inode *inode = args->inode; const nfs4_stateid *arg_stateid = NULL; const nfs4_stateid *res_stateid = NULL; struct nfs4_xdr_opaque_data *ld_private = args->ld_private; switch (ret) { case -NFS4ERR_NOMATCHING_LAYOUT: + spin_lock(&inode->i_lock); + if (pnfs_layout_is_valid(lo) && + nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid)) + pnfs_set_plh_return_info(lo, args->range.iomode, 0); + spin_unlock(&inode->i_lock); break; case 0: if (res->lrs_present) -- GitLab From 078000d02d57f02dde61de4901f289672e98c8bc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Jan 2021 13:18:03 -0500 Subject: [PATCH 0448/2012] pNFS: We want return-on-close to complete when evicting the inode If the inode is being evicted, it should be safe to run return-on-close, so we should do it to ensure we don't inadvertently leak layout segments. Fixes: 1c5bd76d17cc ("pNFS: Enable layoutreturn operation for return-on-close") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 26 ++++++++++---------------- fs/nfs/pnfs.c | 8 +++----- fs/nfs/pnfs.h | 8 +++----- 3 files changed, 16 insertions(+), 26 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 14acd2f791072..2f4679a62712a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3536,10 +3536,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status); /* Handle Layoutreturn errors */ - if (pnfs_roc_done(task, calldata->inode, - &calldata->arg.lr_args, - &calldata->res.lr_res, - &calldata->res.lr_ret) == -EAGAIN) + if (pnfs_roc_done(task, &calldata->arg.lr_args, &calldata->res.lr_res, + &calldata->res.lr_ret) == -EAGAIN) goto out_restart; /* hmm. we are done with the inode, and in the process of freeing @@ -6384,10 +6382,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); /* Handle Layoutreturn errors */ - if (pnfs_roc_done(task, data->inode, - &data->args.lr_args, - &data->res.lr_res, - &data->res.lr_ret) == -EAGAIN) + if (pnfs_roc_done(task, &data->args.lr_args, &data->res.lr_res, + &data->res.lr_ret) == -EAGAIN) goto out_restart; switch (task->tk_status) { @@ -6441,10 +6437,10 @@ static void nfs4_delegreturn_release(void *calldata) struct nfs4_delegreturndata *data = calldata; struct inode *inode = data->inode; + if (data->lr.roc) + pnfs_roc_release(&data->lr.arg, &data->lr.res, + data->res.lr_ret); if (inode) { - if (data->lr.roc) - pnfs_roc_release(&data->lr.arg, &data->lr.res, - data->res.lr_ret); nfs_post_op_update_inode_force_wcc(inode, &data->fattr); nfs_iput_and_deactive(inode); } @@ -6520,16 +6516,14 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; - data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, cred); data->inode = nfs_igrab_and_active(inode); - if (data->inode) { + if (data->inode || issync) { + data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, + cred); if (data->lr.roc) { data->args.lr_args = &data->lr.arg; data->res.lr_res = &data->lr.res; } - } else if (data->lr.roc) { - pnfs_roc_release(&data->lr.arg, &data->lr.res, 0); - data->lr.roc = false; } task_setup_data.callback_data = data; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ccc89fab18020..a18b1992b2fb3 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1509,10 +1509,8 @@ out_noroc: return false; } -int pnfs_roc_done(struct rpc_task *task, struct inode *inode, - struct nfs4_layoutreturn_args **argpp, - struct nfs4_layoutreturn_res **respp, - int *ret) +int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, + struct nfs4_layoutreturn_res **respp, int *ret) { struct nfs4_layoutreturn_args *arg = *argpp; int retval = -EAGAIN; @@ -1545,7 +1543,7 @@ int pnfs_roc_done(struct rpc_task *task, struct inode *inode, return 0; case -NFS4ERR_OLD_STATEID: if (!nfs4_layout_refresh_old_stateid(&arg->stateid, - &arg->range, inode)) + &arg->range, arg->inode)) break; *ret = -NFS4ERR_NOMATCHING_LAYOUT; return -EAGAIN; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index bbd3de1025f23..d810ae674f4e8 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -297,10 +297,8 @@ bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, const struct cred *cred); -int pnfs_roc_done(struct rpc_task *task, struct inode *inode, - struct nfs4_layoutreturn_args **argpp, - struct nfs4_layoutreturn_res **respp, - int *ret); +int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, + struct nfs4_layoutreturn_res **respp, int *ret); void pnfs_roc_release(struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, int ret); @@ -772,7 +770,7 @@ pnfs_roc(struct inode *ino, } static inline int -pnfs_roc_done(struct rpc_task *task, struct inode *inode, +pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, struct nfs4_layoutreturn_res **respp, int *ret) -- GitLab From c18d1e17ba2f6a1c9257b0b5d2882a6e3f772673 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 4 Jan 2021 15:01:18 -0500 Subject: [PATCH 0449/2012] pNFS: Clean up pnfs_layoutreturn_free_lsegs() Remove the check for whether or not the stateid is NULL, and fix up the callers. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a18b1992b2fb3..16a37214aba99 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1152,7 +1152,7 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, LIST_HEAD(freeme); spin_lock(&inode->i_lock); - if (!pnfs_layout_is_valid(lo) || !arg_stateid || + if (!pnfs_layout_is_valid(lo) || !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) goto out_unlock; if (stateid) { @@ -1559,7 +1559,6 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, { struct pnfs_layout_hdr *lo = args->layout; struct inode *inode = args->inode; - const nfs4_stateid *arg_stateid = NULL; const nfs4_stateid *res_stateid = NULL; struct nfs4_xdr_opaque_data *ld_private = args->ld_private; @@ -1569,6 +1568,7 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, if (pnfs_layout_is_valid(lo) && nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid)) pnfs_set_plh_return_info(lo, args->range.iomode, 0); + pnfs_clear_layoutreturn_waitbit(lo); spin_unlock(&inode->i_lock); break; case 0: @@ -1576,11 +1576,10 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, res_stateid = &res->stateid; fallthrough; default: - arg_stateid = &args->stateid; + pnfs_layoutreturn_free_lsegs(lo, &args->stateid, &args->range, + res_stateid); } trace_nfs4_layoutreturn_on_close(args->inode, &args->stateid, ret); - pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, - res_stateid); if (ld_private && ld_private->ops && ld_private->ops->free) ld_private->ops->free(ld_private); pnfs_put_layout_hdr(lo); -- GitLab From 2c8d5fc37fe2384a9bdb6965443ab9224d46f704 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jan 2021 06:43:45 -0500 Subject: [PATCH 0450/2012] pNFS: Stricter ordering of layoutget and layoutreturn If a layout return is in progress, we should wait for it to complete, in case the layout segment we are picking up gets returned too. Fixes: 30cb3ee299cb ("pNFS: Handle NFS4ERR_OLD_STATEID on layoutreturn by bumping the state seqid") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 16a37214aba99..fc13a3c8bc483 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2018,6 +2018,27 @@ lookup_again: goto lookup_again; } + /* + * Because we free lsegs when sending LAYOUTRETURN, we need to wait + * for LAYOUTRETURN. + */ + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { + spin_unlock(&ino->i_lock); + dprintk("%s wait for layoutreturn\n", __func__); + lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo)); + if (!IS_ERR(lseg)) { + pnfs_put_layout_hdr(lo); + dprintk("%s retrying\n", __func__); + trace_pnfs_update_layout(ino, pos, count, iomode, lo, + lseg, + PNFS_UPDATE_LAYOUT_RETRY); + goto lookup_again; + } + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, + PNFS_UPDATE_LAYOUT_RETURN); + goto out_put_layout_hdr; + } + lseg = pnfs_find_lseg(lo, &arg, strict_iomode); if (lseg) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, @@ -2070,28 +2091,6 @@ lookup_again: nfs4_stateid_copy(&stateid, &lo->plh_stateid); } - /* - * Because we free lsegs before sending LAYOUTRETURN, we need to wait - * for LAYOUTRETURN even if first is true. - */ - if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { - spin_unlock(&ino->i_lock); - dprintk("%s wait for layoutreturn\n", __func__); - lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo)); - if (!IS_ERR(lseg)) { - if (first) - pnfs_clear_first_layoutget(lo); - pnfs_put_layout_hdr(lo); - dprintk("%s retrying\n", __func__); - trace_pnfs_update_layout(ino, pos, count, iomode, lo, - lseg, PNFS_UPDATE_LAYOUT_RETRY); - goto lookup_again; - } - trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, - PNFS_UPDATE_LAYOUT_RETURN); - goto out_put_layout_hdr; - } - if (pnfs_layoutgets_blocked(lo)) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_BLOCKED); -- GitLab From 1757655d780d9d29bc4b60e708342e94924f7ef3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Jan 2021 11:28:30 -0500 Subject: [PATCH 0451/2012] NFS/pNFS: Don't call pnfs_free_bucket_lseg() before removing the request In pnfs_generic_clear_request_commit(), we try calling pnfs_free_bucket_lseg() before we remove the request from the DS bucket. That will always fail, since the point is to test for whether or not that bucket is empty. Fixes: c84bea59449a ("NFS/pNFS: Simplify bucket layout segment reference counting") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 2efcfdd348a11..df20bbe8d15e3 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -78,22 +78,18 @@ void pnfs_generic_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo) { - struct pnfs_layout_segment *freeme = NULL; + struct pnfs_commit_bucket *bucket = NULL; if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) goto out; cinfo->ds->nwritten--; - if (list_is_singular(&req->wb_list)) { - struct pnfs_commit_bucket *bucket; - + if (list_is_singular(&req->wb_list)) bucket = list_first_entry(&req->wb_list, - struct pnfs_commit_bucket, - written); - freeme = pnfs_free_bucket_lseg(bucket); - } + struct pnfs_commit_bucket, written); out: nfs_request_remove_commit_list(req, cinfo); - pnfs_put_lseg(freeme); + if (bucket) + pnfs_put_lseg(pnfs_free_bucket_lseg(bucket)); } EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit); -- GitLab From 46c9ea1d4fee4cf1f8cc6001b9c14aae61b3d502 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Jan 2021 11:54:57 -0500 Subject: [PATCH 0452/2012] NFS/pNFS: Don't leak DS commits in pnfs_generic_retry_commit() We must ensure that we pass a layout segment to nfs_retry_commit() when we're cleaning up after pnfs_bucket_alloc_ds_commits(). Otherwise, requests that should be committed to the DS will get committed to the MDS. Do so by ensuring that pnfs_bucket_get_committing() always tries to return a layout segment when it returns a non-empty page list. Fixes: c84bea59449a ("NFS/pNFS: Simplify bucket layout segment reference counting") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index df20bbe8d15e3..49d3389bd8130 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -403,12 +403,16 @@ pnfs_bucket_get_committing(struct list_head *head, struct pnfs_commit_bucket *bucket, struct nfs_commit_info *cinfo) { + struct pnfs_layout_segment *lseg; struct list_head *pos; list_for_each(pos, &bucket->committing) cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, head); - return pnfs_free_bucket_lseg(bucket); + lseg = pnfs_free_bucket_lseg(bucket); + if (!lseg) + lseg = pnfs_get_lseg(bucket->lseg); + return lseg; } static struct nfs_commit_data * @@ -420,8 +424,6 @@ pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket, if (!data) return NULL; data->lseg = pnfs_bucket_get_committing(&data->pages, bucket, cinfo); - if (!data->lseg) - data->lseg = pnfs_get_lseg(bucket->lseg); return data; } -- GitLab From cb2856c5971723910a86b7d1d0cf623d6919cbc4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Jan 2021 14:13:22 -0500 Subject: [PATCH 0453/2012] NFS/pNFS: Fix a leak of the layout 'plh_outstanding' counter If we exit _lgopen_prepare_attached() without setting a layout, we will currently leak the plh_outstanding counter. Fixes: 411ae722d10a ("pNFS: Wait for stale layoutget calls to complete in pnfs_update_layout()") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index fc13a3c8bc483..4f274f21c4abc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2244,6 +2244,7 @@ static void _lgopen_prepare_attached(struct nfs4_opendata *data, &rng, GFP_KERNEL); if (!lgp) { pnfs_clear_first_layoutget(lo); + nfs_layoutget_end(lo); pnfs_put_layout_hdr(lo); return; } -- GitLab From 5625dcfbbcf892e40e8d60abbb5f56701a1d031c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 7 Jan 2021 17:12:08 +0530 Subject: [PATCH 0454/2012] Documentation: kbuild: Fix section reference Section 3.11 was incorrectly called 3.9, fix it. Signed-off-by: Viresh Kumar Signed-off-by: Masahiro Yamada --- Documentation/kbuild/makefiles.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index d36768cf12506..9f6a118819513 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -598,7 +598,7 @@ more details, with real examples. explicitly added to $(targets). Assignments to $(targets) are without $(obj)/ prefix. if_changed may be - used in conjunction with custom rules as defined in "3.9 Custom Rules". + used in conjunction with custom rules as defined in "3.11 Custom Rules". Note: It is a typical mistake to forget the FORCE prerequisite. Another common pitfall is that whitespace is sometimes significant; for -- GitLab From 113aac6d567bda783af36d08f73bfda47d8e9a40 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 10 Jan 2021 15:46:06 -0500 Subject: [PATCH 0455/2012] NFS: nfs_delegation_find_inode_server must first reference the superblock Before referencing the inode, we must ensure that the superblock can be referenced. Otherwise, we can end up with iput() calling superblock operations that are no longer valid or accessible. Fixes: e39d8a186ed0 ("NFSv4: Fix an Oops during delegation callbacks") Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 816e1427f17eb..04bf8066980c1 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1011,22 +1011,24 @@ nfs_delegation_find_inode_server(struct nfs_server *server, const struct nfs_fh *fhandle) { struct nfs_delegation *delegation; - struct inode *freeme, *res = NULL; + struct super_block *freeme = NULL; + struct inode *res = NULL; list_for_each_entry_rcu(delegation, &server->delegations, super_list) { spin_lock(&delegation->lock); if (delegation->inode != NULL && !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { - freeme = igrab(delegation->inode); - if (freeme && nfs_sb_active(freeme->i_sb)) - res = freeme; + if (nfs_sb_active(server->super)) { + freeme = server->super; + res = igrab(delegation->inode); + } spin_unlock(&delegation->lock); if (res != NULL) return res; if (freeme) { rcu_read_unlock(); - iput(freeme); + nfs_sb_deactive(freeme); rcu_read_lock(); } return ERR_PTR(-EAGAIN); -- GitLab From 896567ee7f17a8a736cda8a28cc987228410a2ac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 10 Jan 2021 15:58:08 -0500 Subject: [PATCH 0456/2012] NFS: nfs_igrab_and_active must first reference the superblock Before referencing the inode, we must ensure that the superblock can be referenced. Otherwise, we can end up with iput() calling superblock operations that are no longer valid or accessible. Fixes: ea7c38fef0b7 ("NFSv4: Ensure we reference the inode for return-on-close in delegreturn") Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 6bdee7ab3a6c2..62d3189745cdc 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -599,12 +599,14 @@ extern void nfs4_test_session_trunk(struct rpc_clnt *clnt, static inline struct inode *nfs_igrab_and_active(struct inode *inode) { - inode = igrab(inode); - if (inode != NULL && !nfs_sb_active(inode->i_sb)) { - iput(inode); - inode = NULL; + struct super_block *sb = inode->i_sb; + + if (sb && nfs_sb_active(sb)) { + if (igrab(inode)) + return inode; + nfs_sb_deactive(sb); } - return inode; + return NULL; } static inline void nfs_iput_and_deactive(struct inode *inode) -- GitLab From 7c53f6b671f4aba70ff15e1b05148b10d58c2837 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 10 Jan 2021 14:34:50 -0800 Subject: [PATCH 0457/2012] Linux 5.11-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8b2c3f88ee5ea..9e73f82e0d863 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- GitLab From 869f4fdaf4ca7bb6e0d05caf6fa1108dddc346a7 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sat, 9 Jan 2021 20:01:21 +0800 Subject: [PATCH 0458/2012] netfilter: nf_nat: Fix memleak in nf_nat_init When register_pernet_subsys() fails, nf_nat_bysource should be freed just like when nf_ct_extend_register() fails. Fixes: 1cd472bf036ca ("netfilter: nf_nat: add nat hook register functions to nf_nat") Signed-off-by: Dinghao Liu Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index ea923f8cf9c42..b7c3c902290f1 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1174,6 +1174,7 @@ static int __init nf_nat_init(void) ret = register_pernet_subsys(&nat_net_ops); if (ret < 0) { nf_ct_extend_unregister(&nat_extend); + kvfree(nf_nat_bysource); return ret; } -- GitLab From fd25c883667b61f845a4188b6be110bb45de0bac Mon Sep 17 00:00:00 2001 From: Soeren Moch Date: Tue, 22 Dec 2020 16:59:08 +0100 Subject: [PATCH 0459/2012] ARM: dts: tbs2910: rename MMC node aliases to be consistent with kernel versions up to v5.9 (mmc aliases not used here). usdhc1 is not wired up on this board and therefore cannot be used. Start mmc aliases with usdhc2. Signed-off-by: Soeren Moch Cc: stable@vger.kernel.org # 5.10.x Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6q-tbs2910.dts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/boot/dts/imx6q-tbs2910.dts b/arch/arm/boot/dts/imx6q-tbs2910.dts index 861e05d53157e..343364d3e4f7d 100644 --- a/arch/arm/boot/dts/imx6q-tbs2910.dts +++ b/arch/arm/boot/dts/imx6q-tbs2910.dts @@ -16,6 +16,13 @@ stdout-path = &uart1; }; + aliases { + mmc0 = &usdhc2; + mmc1 = &usdhc3; + mmc2 = &usdhc4; + /delete-property/ mmc3; + }; + memory@10000000 { device_type = "memory"; reg = <0x10000000 0x80000000>; -- GitLab From 097530bf8cd469ef7b3d52ef00cafb64b33bacb1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Dec 2020 16:17:51 +0100 Subject: [PATCH 0460/2012] ARM: imx: fix imx8m dependencies Selecting ARM_GIC_V3 on non-CP15 processors leads to build failures like arch/arm/include/asm/arch_gicv3.h: In function 'write_ICC_AP1R3_EL1': arch/arm/include/asm/arch_gicv3.h:36:40: error: 'c12' undeclared (first use in this function) 36 | #define __ICC_AP1Rx(x) __ACCESS_CP15(c12, 0, c9, x) | ^~~ Add a dependency to only enable the gic driver when building for at an ARMv7 target, which is the closes approximation to the ARMv8 processor that is actually in this chip. Fixes: fc40200ebf82 ("soc: imx: increase build coverage for imx8m soc driver") Signed-off-by: Arnd Bergmann Signed-off-by: Shawn Guo --- drivers/soc/imx/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/imx/Kconfig b/drivers/soc/imx/Kconfig index a9370f4aacca9..05812f8ae7340 100644 --- a/drivers/soc/imx/Kconfig +++ b/drivers/soc/imx/Kconfig @@ -13,7 +13,7 @@ config SOC_IMX8M depends on ARCH_MXC || COMPILE_TEST default ARCH_MXC && ARM64 select SOC_BUS - select ARM_GIC_V3 if ARCH_MXC + select ARM_GIC_V3 if ARCH_MXC && ARCH_MULTI_V7 help If you say yes here you get support for the NXP i.MX8M family support, it will provide the SoC info like SoC family, -- GitLab From 70b6ff4c549a62b59b286445f66cfec6c5327ac8 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Tue, 5 Jan 2021 10:24:07 +0100 Subject: [PATCH 0461/2012] ARM: dts: imx6qdl-kontron-samx6i: fix i2c_lcd/cam default status Fix typo so the gpio i2c busses are really disabled. Fixes: 2125212785c9 ("ARM: dts: imx6qdl-kontron-samx6i: add Kontron SMARC SoM Support") Signed-off-by: Marco Felsch Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi index 36c2f0d9ce164..b167b33bd108d 100644 --- a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi +++ b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi @@ -167,7 +167,7 @@ i2c-gpio,delay-us = <2>; /* ~100 kHz */ #address-cells = <1>; #size-cells = <0>; - status = "disabld"; + status = "disabled"; }; i2c_cam: i2c-gpio-cam { @@ -179,7 +179,7 @@ i2c-gpio,delay-us = <2>; /* ~100 kHz */ #address-cells = <1>; #size-cells = <0>; - status = "disabld"; + status = "disabled"; }; }; -- GitLab From 5a22747b76ca2384057d8e783265404439d31d7f Mon Sep 17 00:00:00 2001 From: Koen Vandeputte Date: Thu, 7 Jan 2021 10:19:06 +0100 Subject: [PATCH 0462/2012] ARM: dts: imx6qdl-gw52xx: fix duplicate regulator naming 2 regulator descriptions carry identical naming. This leads to following boot warning: [ 0.173138] debugfs: Directory 'vdd1p8' with parent 'regulator' already present! Fix this by renaming the one used for audio. Fixes: 5051bff33102 ("ARM: dts: imx: ventana: add LTC3676 PMIC support") Signed-off-by: Tim Harvey Signed-off-by: Koen Vandeputte Cc: stable@vger.kernel.org # v4.11 Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-gw52xx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi index 736074f1c3ef9..959d8ac2e393b 100644 --- a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi @@ -418,7 +418,7 @@ /* VDD_AUD_1P8: Audio codec */ reg_aud_1p8v: ldo3 { - regulator-name = "vdd1p8"; + regulator-name = "vdd1p8a"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-boot-on; -- GitLab From 00cb645fd7e29bdd20967cd20fa8f77bcdf422f9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 18 Nov 2020 13:40:58 +0100 Subject: [PATCH 0463/2012] drm/i915/dsi: Use unconditional msleep for the panel_on_delay when there is no reset-deassert MIPI-sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 25b4620ee822 ("drm/i915/dsi: Skip delays for v3 VBTs in vid-mode") added an intel_dsi_msleep() helper which skips sleeping if the MIPI-sequences have a version of 3 or newer and the panel is in vid-mode; and it moved a bunch of msleep-s over to this new helper. This was based on my reading of the big comment around line 730 which starts with "Panel enable/disable sequences from the VBT spec.", where the "v3 video mode seq" column does not have any wait t# entries. Given that this code has been used on a lot of different devices without issues until now, it seems that my interpretation of the spec here is mostly correct. But now I have encountered one device, an Acer Aspire Switch 10 E SW3-016, where the panel will not light up unless we do actually honor the panel_on_delay after exexuting the MIPI_SEQ_PANEL_ON sequence. What seems to set this model apart is that it is lacking a MIPI_SEQ_DEASSERT_RESET sequence, which is where the power-on delay usually happens. Fix the panel not lighting up on this model by using an unconditional msleep(panel_on_delay) instead of intel_dsi_msleep() when there is no MIPI_SEQ_DEASSERT_RESET sequence. Fixes: 25b4620ee822 ("drm/i915/dsi: Skip delays for v3 VBTs in vid-mode") Signed-off-by: Hans de Goede Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20201118124058.26021-1-hdegoede@redhat.com (cherry picked from commit 6fdb335f1c9c0845b50625de1624d8445c4c4a07) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/vlv_dsi.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index d52f9c1779081..f94025ec603a6 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -812,10 +812,20 @@ static void intel_dsi_pre_enable(struct intel_atomic_state *state, intel_dsi_prepare(encoder, pipe_config); intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_POWER_ON); - intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay); - /* Deassert reset */ - intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); + /* + * Give the panel time to power-on and then deassert its reset. + * Depending on the VBT MIPI sequences version the deassert-seq + * may contain the necessary delay, intel_dsi_msleep() will skip + * the delay in that case. If there is no deassert-seq, then an + * unconditional msleep is used to give the panel time to power-on. + */ + if (dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET]) { + intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay); + intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); + } else { + msleep(intel_dsi->panel_on_delay); + } if (IS_GEMINILAKE(dev_priv)) { glk_cold_boot = glk_dsi_enable_io(encoder); -- GitLab From 057fe3535eb35696ad5a849d01d61efa930d2182 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 Jan 2021 20:39:05 +0000 Subject: [PATCH 0464/2012] drm/i915: Disable RPM wakeref assertions during driver shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As with the regular suspend paths, also disable the wakeref assertions as we disable the driver during shutdown. Reported-by: Hans de Goede Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2899 Fixes: fe0f1e3bfdfe ("drm/i915: Shut down displays gracefully on reboot") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Hans de Goede Tested-by: Hans de Goede Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210104203905.19248-1-chris@chris-wilson.co.uk (cherry picked from commit 19fe4ac6f0e7163daf9375a4d39947389ae465fa) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 88ad754962af1..99eb0d7bbc447 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1047,6 +1047,8 @@ static void intel_shutdown_encoders(struct drm_i915_private *dev_priv) void i915_driver_shutdown(struct drm_i915_private *i915) { + disable_rpm_wakeref_asserts(&i915->runtime_pm); + i915_gem_suspend(i915); drm_kms_helper_poll_disable(&i915->drm); @@ -1060,6 +1062,8 @@ void i915_driver_shutdown(struct drm_i915_private *i915) intel_suspend_encoders(i915); intel_shutdown_encoders(i915); + + enable_rpm_wakeref_asserts(&i915->runtime_pm); } static bool suspend_to_idle(struct drm_i915_private *dev_priv) -- GitLab From bb83d5fb550bb7db75b29e6342417fda2bbb691c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 8 Jan 2021 17:28:41 +0200 Subject: [PATCH 0465/2012] drm/i915/backlight: fix CPU mode backlight takeover on LPT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pch_get_backlight(), lpt_get_backlight(), and lpt_set_backlight() functions operate directly on the hardware registers. If inverting the value is needed, using intel_panel_compute_brightness(), it should only be done in the interface between hardware registers and panel->backlight.level. The CPU mode takeover code added in commit 5b1ec9ac7ab5 ("drm/i915/backlight: Fix backlight takeover on LPT, v3.") reads the hardware register and converts to panel->backlight.level correctly, however the value written back should remain in the hardware register "domain". This hasn't been an issue, because GM45 machines are the only known users of i915.invert_brightness and the brightness invert quirk, and without one of them no conversion is made. It's likely nobody's ever hit the problem. Fixes: 5b1ec9ac7ab5 ("drm/i915/backlight: Fix backlight takeover on LPT, v3.") Cc: Maarten Lankhorst Cc: Ville Syrjälä Cc: Lyude Paul Cc: # v5.1+ Reviewed-by: Lyude Paul Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210108152841.6944-1-jani.nikula@intel.com (cherry picked from commit 0d4ced1c5bfe649196877d90442d4fd618e19153) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_panel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c index 9f23bac0d7924..d64fce1a17cbc 100644 --- a/drivers/gpu/drm/i915/display/intel_panel.c +++ b/drivers/gpu/drm/i915/display/intel_panel.c @@ -1650,16 +1650,13 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus val = pch_get_backlight(connector); else val = lpt_get_backlight(connector); - val = intel_panel_compute_brightness(connector, val); - panel->backlight.level = clamp(val, panel->backlight.min, - panel->backlight.max); if (cpu_mode) { drm_dbg_kms(&dev_priv->drm, "CPU backlight register was enabled, switching to PCH override\n"); /* Write converted CPU PWM value to PCH override register */ - lpt_set_backlight(connector->base.state, panel->backlight.level); + lpt_set_backlight(connector->base.state, val); intel_de_write(dev_priv, BLC_PWM_PCH_CTL1, pch_ctl1 | BLM_PCH_OVERRIDE_ENABLE); @@ -1667,6 +1664,10 @@ static int lpt_setup_backlight(struct intel_connector *connector, enum pipe unus cpu_ctl2 & ~BLM_PWM_ENABLE); } + val = intel_panel_compute_brightness(connector, val); + panel->backlight.level = clamp(val, panel->backlight.min, + panel->backlight.max); + return 0; } -- GitLab From adc5d8757288a3a5628436d16e78fb696d802e39 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 7 Dec 2020 01:02:52 +0100 Subject: [PATCH 0466/2012] signal: Add missing __user annotation to copy_siginfo_from_user_any copy_siginfo_from_user_any() takes a userspace pointer as second argument; annotate the parameter type accordingly. Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20201207000252.138564-1-jannh@google.com Signed-off-by: Christian Brauner --- kernel/signal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index 5736c55aaa1af..546b860c6514d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3701,7 +3701,8 @@ static bool access_pidfd_pidns(struct pid *pid) return true; } -static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, siginfo_t *info) +static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, + siginfo_t __user *info) { #ifdef CONFIG_COMPAT /* -- GitLab From 96e1e9846c6691f90009ae4d8e486e0ce5c628a7 Mon Sep 17 00:00:00 2001 From: Alexander Guril Date: Sat, 26 Dec 2020 12:40:21 +0100 Subject: [PATCH 0467/2012] Kernel: fork.c: Fix coding style: Do not use {} around single-line statements Fixed two coding style issues in kernel/fork.c Do not use {} around single-line statements. Cc: linux-kernel@vger.kernel.org Acked-by: Christian Brauner Signed-off-by: Alexander Guril Link: https://lore.kernel.org/r/20201226114021.2589-1-alexander.guril02@gmail.com Signed-off-by: Christian Brauner --- kernel/fork.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index 37720a6d04eaa..d66cd1014211b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -819,9 +819,8 @@ void __init fork_init(void) init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_NPROC]; - for (i = 0; i < UCOUNT_COUNTS; i++) { + for (i = 0; i < UCOUNT_COUNTS; i++) init_user_ns.ucount_max[i] = max_threads/2; - } #ifdef CONFIG_VMAP_STACK cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", @@ -1654,9 +1653,8 @@ static inline void init_task_pid_links(struct task_struct *task) { enum pid_type type; - for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) { + for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) INIT_HLIST_NODE(&task->pid_links[type]); - } } static inline void -- GitLab From 06b831588b639ad9d94e4789b0250562228722c2 Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Tue, 5 Jan 2021 10:30:23 +0100 Subject: [PATCH 0468/2012] media: rc: fix timeout handling after switch to microsecond durations Commit 528222d853f92 ("media: rc: harmonize infrared durations to microseconds") missed to switch some timeout calculations from nanoseconds to microseconds. This resulted in spurious key_up+key_down events at the last scancode if the rc device uses a long timeout (eg 100ms on nuvoton-cir) as the device timeout wasn't properly accounted for in the keyup timeout calculation. Fix this by applying the proper conversion functions. Cc: stable@vger.kernel.org Fixes: 528222d853f92 ("media: rc: harmonize infrared durations to microseconds") Signed-off-by: Matthias Reichl Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/ir-mce_kbd-decoder.c | 2 +- drivers/media/rc/rc-main.c | 4 ++-- drivers/media/rc/serial_ir.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/media/rc/ir-mce_kbd-decoder.c b/drivers/media/rc/ir-mce_kbd-decoder.c index be8f2756a444e..1524dc0fc566e 100644 --- a/drivers/media/rc/ir-mce_kbd-decoder.c +++ b/drivers/media/rc/ir-mce_kbd-decoder.c @@ -320,7 +320,7 @@ again: data->body); spin_lock(&data->keylock); if (scancode) { - delay = nsecs_to_jiffies(dev->timeout) + + delay = usecs_to_jiffies(dev->timeout) + msecs_to_jiffies(100); mod_timer(&data->rx_timeout, jiffies + delay); } else { diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index 29d4d01896ff6..1fd62c1dac768 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -737,7 +737,7 @@ static unsigned int repeat_period(int protocol) void rc_repeat(struct rc_dev *dev) { unsigned long flags; - unsigned int timeout = nsecs_to_jiffies(dev->timeout) + + unsigned int timeout = usecs_to_jiffies(dev->timeout) + msecs_to_jiffies(repeat_period(dev->last_protocol)); struct lirc_scancode sc = { .scancode = dev->last_scancode, .rc_proto = dev->last_protocol, @@ -855,7 +855,7 @@ void rc_keydown(struct rc_dev *dev, enum rc_proto protocol, u64 scancode, ir_do_keydown(dev, protocol, scancode, keycode, toggle); if (dev->keypressed) { - dev->keyup_jiffies = jiffies + nsecs_to_jiffies(dev->timeout) + + dev->keyup_jiffies = jiffies + usecs_to_jiffies(dev->timeout) + msecs_to_jiffies(repeat_period(protocol)); mod_timer(&dev->timer_keyup, dev->keyup_jiffies); } diff --git a/drivers/media/rc/serial_ir.c b/drivers/media/rc/serial_ir.c index 8cc28c92d05d6..96ae0294ac102 100644 --- a/drivers/media/rc/serial_ir.c +++ b/drivers/media/rc/serial_ir.c @@ -385,7 +385,7 @@ static irqreturn_t serial_ir_irq_handler(int i, void *blah) } while (!(sinp(UART_IIR) & UART_IIR_NO_INT)); /* still pending ? */ mod_timer(&serial_ir.timeout_timer, - jiffies + nsecs_to_jiffies(serial_ir.rcdev->timeout)); + jiffies + usecs_to_jiffies(serial_ir.rcdev->timeout)); ir_raw_event_handle(serial_ir.rcdev); -- GitLab From 9eb09dc2f4650de8c6ce286d3153511e6f6314c0 Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Tue, 15 Dec 2020 15:07:44 +0100 Subject: [PATCH 0469/2012] media: venus: core: Fix platform driver shutdown With TZ system reboot cannot finish successfully. To fix that enable core clocks by runtime pm before TZ calls and disable clocks after that. Tested-by: Shawn Guo Signed-off-by: Stanimir Varbanov Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/qcom/venus/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c index bdd293faaad0f..7233a73117577 100644 --- a/drivers/media/platform/qcom/venus/core.c +++ b/drivers/media/platform/qcom/venus/core.c @@ -349,8 +349,10 @@ static void venus_core_shutdown(struct platform_device *pdev) { struct venus_core *core = platform_get_drvdata(pdev); + pm_runtime_get_sync(core->dev); venus_shutdown(core); venus_firmware_deinit(core); + pm_runtime_put_sync(core->dev); } static __maybe_unused int venus_runtime_suspend(struct device *dev) -- GitLab From cb5021ca622fe83923e0789f99fe7227cbcd3f68 Mon Sep 17 00:00:00 2001 From: Yanfei Xu Date: Mon, 11 Jan 2021 18:48:07 +0800 Subject: [PATCH 0470/2012] kthread: remove comments about old _do_fork() helper The old _do_fork() helper has been removed in favor of kernel_clone(). Here correct some comments which still contain _do_fork() Link: https://lore.kernel.org/r/20210111104807.18022-1-yanfei.xu@windriver.com Cc: christian@brauner.io Cc: linux-kernel@vger.kernel.org Acked-by: Christian Brauner Signed-off-by: Yanfei Xu Signed-off-by: Christian Brauner --- include/trace/events/sched.h | 2 +- kernel/kthread.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 5039af667645d..cbe3e152d24c0 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -366,7 +366,7 @@ TRACE_EVENT(sched_process_wait, ); /* - * Tracepoint for do_fork: + * Tracepoint for kernel_clone: */ TRACE_EVENT(sched_process_fork, diff --git a/kernel/kthread.c b/kernel/kthread.c index a5eceecd4513c..fb9c3dcbb68db 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -294,7 +294,7 @@ static int kthread(void *_create) do_exit(ret); } -/* called from do_fork() to get node information for about to be created task */ +/* called from kernel_clone() to get node information for about to be created task */ int tsk_fork_get_node(struct task_struct *tsk) { #ifdef CONFIG_NUMA -- GitLab From d434ab6db524ab1efd0afad4ffa1ee65ca6ac097 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 11 Jan 2021 04:00:30 +0000 Subject: [PATCH 0471/2012] io_uring: drop mm and files after task_work_run __io_req_task_submit() run by task_work can set mm and files, but io_sq_thread() in some cases, and because __io_sq_thread_acquire_mm() and __io_sq_thread_acquire_files() do a simple current->mm/files check it may end up submitting IO with mm/files of another task. We also need to drop it after in the end to drop potentially grabbed references to them. Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 2f305c097bd5b..7af74c1ec9091 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7056,6 +7056,7 @@ static int io_sq_thread(void *data) if (sqt_spin || !time_after(jiffies, timeout)) { io_run_task_work(); + io_sq_thread_drop_mm_files(); cond_resched(); if (sqt_spin) timeout = jiffies + sqd->sq_thread_idle; @@ -7093,6 +7094,7 @@ static int io_sq_thread(void *data) } io_run_task_work(); + io_sq_thread_drop_mm_files(); if (cur_css) io_sq_thread_unassociate_blkcg(); -- GitLab From 621fadc22365f3cf307bcd9048e3372e9ee9cdcc Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 11 Jan 2021 04:00:31 +0000 Subject: [PATCH 0472/2012] io_uring: don't take files/mm for a dead task In rare cases a task may be exiting while io_ring_exit_work() trying to cancel/wait its requests. It's ok for __io_sq_thread_acquire_mm() because of SQPOLL check, but is not for __io_sq_thread_acquire_files(). Play safe and fail for both of them. Cc: stable@vger.kernel.org # 5.5+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 7af74c1ec9091..b0e6d8e607a33 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1106,6 +1106,9 @@ static void io_sq_thread_drop_mm_files(void) static int __io_sq_thread_acquire_files(struct io_ring_ctx *ctx) { + if (current->flags & PF_EXITING) + return -EFAULT; + if (!current->files) { struct files_struct *files; struct nsproxy *nsproxy; @@ -1133,6 +1136,8 @@ static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx) { struct mm_struct *mm; + if (current->flags & PF_EXITING) + return -EFAULT; if (current->mm) return 0; -- GitLab From ca1219c0a7432272324660fc9f61a9940f90c50b Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 29 Dec 2020 16:16:25 +0800 Subject: [PATCH 0473/2012] mmc: sdhci-of-dwcmshc: fix rpmb access Commit a44f7cb93732 ("mmc: core: use mrq->sbc when sending CMD23 for RPMB") began to use ACMD23 for RPMB if the host supports ACMD23. In RPMB ACM23 case, we need to set bit 31 to CMD23 argument, otherwise RPMB write operation will return general fail. However, no matter V4 is enabled or not, the dwcmshc's ARGUMENT2 register is 32-bit block count register which doesn't support stuff bits of CMD23 argument. So let's handle this specific ACMD23 case. From another side, this patch also prepare for future v4 enabling for dwcmshc, because from the 4.10 spec, the ARGUMENT2 register is redefined as 32bit block count which doesn't support stuff bits of CMD23 argument. Fixes: a44f7cb93732 ("mmc: core: use mrq->sbc when sending CMD23 for RPMB") Signed-off-by: Jisheng Zhang Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20201229161625.38255233@xhacker.debian Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-dwcmshc.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c index 4b673792b5a42..d90020ed36227 100644 --- a/drivers/mmc/host/sdhci-of-dwcmshc.c +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c @@ -16,6 +16,8 @@ #include "sdhci-pltfm.h" +#define SDHCI_DWCMSHC_ARG2_STUFF GENMASK(31, 16) + /* DWCMSHC specific Mode Select value */ #define DWCMSHC_CTRL_HS400 0x7 @@ -49,6 +51,29 @@ static void dwcmshc_adma_write_desc(struct sdhci_host *host, void **desc, sdhci_adma_write_desc(host, desc, addr, len, cmd); } +static void dwcmshc_check_auto_cmd23(struct mmc_host *mmc, + struct mmc_request *mrq) +{ + struct sdhci_host *host = mmc_priv(mmc); + + /* + * No matter V4 is enabled or not, ARGUMENT2 register is 32-bit + * block count register which doesn't support stuff bits of + * CMD23 argument on dwcmsch host controller. + */ + if (mrq->sbc && (mrq->sbc->arg & SDHCI_DWCMSHC_ARG2_STUFF)) + host->flags &= ~SDHCI_AUTO_CMD23; + else + host->flags |= SDHCI_AUTO_CMD23; +} + +static void dwcmshc_request(struct mmc_host *mmc, struct mmc_request *mrq) +{ + dwcmshc_check_auto_cmd23(mmc, mrq); + + sdhci_request(mmc, mrq); +} + static void dwcmshc_set_uhs_signaling(struct sdhci_host *host, unsigned int timing) { @@ -133,6 +158,8 @@ static int dwcmshc_probe(struct platform_device *pdev) sdhci_get_of_property(pdev); + host->mmc_host_ops.request = dwcmshc_request; + err = sdhci_add_host(host); if (err) goto err_clk; -- GitLab From 1a3ed0dc3594d99ff341ec63865a40519ea24b8d Mon Sep 17 00:00:00 2001 From: Alex Leibovich Date: Fri, 11 Dec 2020 15:16:56 +0100 Subject: [PATCH 0474/2012] mmc: sdhci-xenon: fix 1.8v regulator stabilization Automatic Clock Gating is a feature used for the power consumption optimisation. It turned out that during early init phase it may prevent the stable voltage switch to 1.8V - due to that on some platforms an endless printout in dmesg can be observed: "mmc1: 1.8V regulator output did not became stable" Fix the problem by disabling the ACG at very beginning of the sdhci_init and let that be enabled later. Fixes: 3a3748dba881 ("mmc: sdhci-xenon: Add Marvell Xenon SDHC core functionality") Signed-off-by: Alex Leibovich Signed-off-by: Marcin Wojtas Cc: stable@vger.kernel.org Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20201211141656.24915-1-mw@semihalf.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-xenon.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c index c67611fdaa8aa..d19eef5f725f8 100644 --- a/drivers/mmc/host/sdhci-xenon.c +++ b/drivers/mmc/host/sdhci-xenon.c @@ -168,7 +168,12 @@ static void xenon_reset_exit(struct sdhci_host *host, /* Disable tuning request and auto-retuning again */ xenon_retune_setup(host); - xenon_set_acg(host, true); + /* + * The ACG should be turned off at the early init time, in order + * to solve a possible issues with the 1.8V regulator stabilization. + * The feature is enabled in later stage. + */ + xenon_set_acg(host, false); xenon_set_sdclk_off_idle(host, sdhc_id, false); -- GitLab From 2af5268180410b874fc06be91a1b2fbb22b1be0c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 9 Dec 2020 17:39:52 +0200 Subject: [PATCH 0475/2012] drm/i915/icl: Fix initing the DSI DSC power refcount during HW readout For an enabled DSC during HW readout the corresponding power reference is taken along the CRTC power domain references in get_crtc_power_domains(). Remove the incorrect get ref from the DSI encoder hook. Fixes: 2b68392e638d ("drm/i915/dsi: add support for DSC") Cc: Vandita Kulkarni Cc: Jani Nikula Signed-off-by: Imre Deak Reviewed-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20201209153952.3397959-1-imre.deak@intel.com (cherry picked from commit 3a9ec563a4ff770ae647f6ee539810f1866866c9) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/icl_dsi.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index a9439b4156037..b3533a32f8ba2 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -1616,10 +1616,6 @@ static void gen11_dsi_get_power_domains(struct intel_encoder *encoder, get_dsi_io_power_domains(i915, enc_to_intel_dsi(encoder)); - - if (crtc_state->dsc.compression_enable) - intel_display_power_get(i915, - intel_dsc_power_domain(crtc_state)); } static bool gen11_dsi_get_hw_state(struct intel_encoder *encoder, -- GitLab From 7024f60d655272bd2ca1d3a4c9e0a63319b1eea1 Mon Sep 17 00:00:00 2001 From: "Hyunwook (Wooky) Baek" Date: Sat, 9 Jan 2021 23:11:02 -0800 Subject: [PATCH 0476/2012] x86/sev-es: Handle string port IO to kernel memory properly Don't assume dest/source buffers are userspace addresses when manually copying data for string I/O or MOVS MMIO, as {get,put}_user() will fail if handed a kernel address and ultimately lead to a kernel panic. When invoking INSB/OUTSB instructions in kernel space in a SEV-ES-enabled VM, the kernel crashes with the following message: "SEV-ES: Unsupported exception in #VC instruction emulation - can't continue" Handle that case properly. [ bp: Massage commit message. ] Fixes: f980f9c31a92 ("x86/sev-es: Compile early handler code into kernel image") Signed-off-by: Hyunwook (Wooky) Baek Signed-off-by: Borislav Petkov Acked-by: David Rientjes Link: https://lkml.kernel.org/r/20210110071102.2576186-1-baekhw@google.com --- arch/x86/kernel/sev-es.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 0bd1a0fc587e0..ab31c34ba508c 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -286,6 +286,12 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; + /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ + if (!user_mode(ctxt->regs) && !access_ok(target, size)) { + memcpy(dst, buf, size); + return ES_OK; + } + switch (size) { case 1: memcpy(&d1, buf, 1); @@ -335,6 +341,12 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; + /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ + if (!user_mode(ctxt->regs) && !access_ok(s, size)) { + memcpy(buf, src, size); + return ES_OK; + } + switch (size) { case 1: if (get_user(d1, s)) -- GitLab From a58015d638cd4e4555297b04bec9b49028369075 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 7 Jan 2021 23:23:48 -0800 Subject: [PATCH 0477/2012] ACPI: scan: Harden acpi_device_add() against device ID overflows Linux VM on Hyper-V crashes with the latest mainline: [ 4.069624] detected buffer overflow in strcpy [ 4.077733] kernel BUG at lib/string.c:1149! .. [ 4.085819] RIP: 0010:fortify_panic+0xf/0x11 ... [ 4.085819] Call Trace: [ 4.085819] acpi_device_add.cold.15+0xf2/0xfb [ 4.085819] acpi_add_single_object+0x2a6/0x690 [ 4.085819] acpi_bus_check_add+0xc6/0x280 [ 4.085819] acpi_ns_walk_namespace+0xda/0x1aa [ 4.085819] acpi_walk_namespace+0x9a/0xc2 [ 4.085819] acpi_bus_scan+0x78/0x90 [ 4.085819] acpi_scan_init+0xfa/0x248 [ 4.085819] acpi_init+0x2c1/0x321 [ 4.085819] do_one_initcall+0x44/0x1d0 [ 4.085819] kernel_init_freeable+0x1ab/0x1f4 This is because of the recent buffer overflow detection in the commit 6a39e62abbaf ("lib: string.h: detect intra-object overflow in fortified string functions") Here acpi_device_bus_id->bus_id can only hold 14 characters, while the the acpi_device_hid(device) returns a 22-char string "HYPER_V_GEN_COUNTER_V1". Per ACPI Spec v6.2, Section 6.1.5 _HID (Hardware ID), if the ID is a string, it must be of the form AAA#### or NNNN####, i.e. 7 chars or 8 chars. The field bus_id in struct acpi_device_bus_id was originally defined as char bus_id[9], and later was enlarged to char bus_id[15] in 2007 in the commit bb0958544f3c ("ACPI: use more understandable bus_id for ACPI devices") Fix the issue by changing the field bus_id to const char *, and use kstrdup_const() to initialize it. Signed-off-by: Dexuan Cui Tested-By: Jethro Beekman [ rjw: Subject change, whitespace adjustment ] Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/acpi/internal.h | 2 +- drivers/acpi/scan.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index cb229e24c5637..e6a5d997241c4 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -97,7 +97,7 @@ void acpi_scan_table_handler(u32 event, void *table, void *context); extern struct list_head acpi_bus_id_list; struct acpi_device_bus_id { - char bus_id[15]; + const char *bus_id; unsigned int instance_no; struct list_head node; }; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 80b668c80073a..58ff36340cd7c 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -486,6 +486,7 @@ static void acpi_device_del(struct acpi_device *device) acpi_device_bus_id->instance_no--; else { list_del(&acpi_device_bus_id->node); + kfree_const(acpi_device_bus_id->bus_id); kfree(acpi_device_bus_id); } break; @@ -674,7 +675,14 @@ int acpi_device_add(struct acpi_device *device, } if (!found) { acpi_device_bus_id = new_bus_id; - strcpy(acpi_device_bus_id->bus_id, acpi_device_hid(device)); + acpi_device_bus_id->bus_id = + kstrdup_const(acpi_device_hid(device), GFP_KERNEL); + if (!acpi_device_bus_id->bus_id) { + pr_err(PREFIX "Memory allocation error for bus id\n"); + result = -ENOMEM; + goto err_free_new_bus_id; + } + acpi_device_bus_id->instance_no = 0; list_add_tail(&acpi_device_bus_id->node, &acpi_bus_id_list); } @@ -709,6 +717,11 @@ int acpi_device_add(struct acpi_device *device, if (device->parent) list_del(&device->node); list_del(&device->wakeup_list); + + err_free_new_bus_id: + if (!found) + kfree(new_bus_id); + mutex_unlock(&acpi_device_lock); err_detach: -- GitLab From 843010a815e87b45fc6b64848f02e42f6aee3f22 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Mon, 11 Jan 2021 11:40:33 -0500 Subject: [PATCH 0478/2012] drm/ttm: Fix address passed to dma_mapping_error() in ttm_pool_map() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit check_unmap() is producing a warning about a missing map error check. The return value from dma_map_page() should be checked for an error, not the caller-provided dma_addr. Fixes: d099fc8f540a ("drm/ttm: new TT backend allocation pool v3") Signed-off-by: Jeremy Cline Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/413432/ Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index a00b7ab9c14cf..255c457349799 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -190,7 +190,7 @@ static int ttm_pool_map(struct ttm_pool *pool, unsigned int order, size_t size = (1ULL << order) * PAGE_SIZE; addr = dma_map_page(pool->dev, p, 0, size, DMA_BIDIRECTIONAL); - if (dma_mapping_error(pool->dev, **dma_addr)) + if (dma_mapping_error(pool->dev, addr)) return -EFAULT; } -- GitLab From 7bb83f6fc4ee84e95d0ac0d14452c2619fb3fe70 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 8 Jan 2021 13:19:38 +0900 Subject: [PATCH 0479/2012] tracing/kprobes: Do the notrace functions check without kprobes on ftrace Enable the notrace function check on the architecture which doesn't support kprobes on ftrace but support dynamic ftrace. This notrace function check is not only for the kprobes on ftrace but also sw-breakpoint based kprobes. Thus there is no reason to limit this check for the arch which supports kprobes on ftrace. This also changes the dependency of Kconfig. Because kprobe event uses the function tracer's address list for identifying notrace function, if the CONFIG_DYNAMIC_FTRACE=n, it can not check whether the target function is notrace or not. Link: https://lkml.kernel.org/r/20210105065730.2634785-1-naveen.n.rao@linux.vnet.ibm.com Link: https://lkml.kernel.org/r/161007957862.114704.4512260007555399463.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 45408c4f92506 ("tracing: kprobes: Prohibit probing on notrace function") Acked-by: Naveen N. Rao Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 2 +- kernel/trace/trace_kprobe.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d5a19413d4f8a..c1a62ae7e8128 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -538,7 +538,7 @@ config KPROBE_EVENTS config KPROBE_EVENTS_ON_NOTRACE bool "Do NOT protect notrace function from kprobe events" depends on KPROBE_EVENTS - depends on KPROBES_ON_FTRACE + depends on DYNAMIC_FTRACE default n help This is only for the developers who want to debug ftrace itself diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9c31f42245e93..e6fba1798771b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -434,7 +434,7 @@ static int disable_trace_kprobe(struct trace_event_call *call, return 0; } -#if defined(CONFIG_KPROBES_ON_FTRACE) && \ +#if defined(CONFIG_DYNAMIC_FTRACE) && \ !defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE) static bool __within_notrace_func(unsigned long addr) { -- GitLab From a5e92ef3c3fd46320d4e293bdec0cdd4b80a6e0f Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sun, 10 Jan 2021 03:11:42 +0100 Subject: [PATCH 0480/2012] drm: Check actual format for legacy pageflip. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With modifiers one can actually have different format_info structs for the same format, which now matters for AMDGPU since we convert implicit modifiers to explicit modifiers with multiple planes. I checked other drivers and it doesn't look like they end up triggering this case so I think this is safe to relax. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Daniel Vetter Reviewed-by: Zhan Liu Acked-by: Christian König Acked-by: Alex Deucher Fixes: 816853f9dc40 ("drm/amd/display: Set new format info for converted metadata.") Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20210110021142.28221-1-bas@basnieuwenhuizen.nl --- drivers/gpu/drm/drm_plane.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index e6231947f9872..a0cb746bcb0a9 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -1163,7 +1163,14 @@ retry: if (ret) goto out; - if (old_fb->format != fb->format) { + /* + * Only check the FOURCC format code, excluding modifiers. This is + * enough for all legacy drivers. Atomic drivers have their own + * checks in their ->atomic_check implementation, which will + * return -EINVAL if any hw or driver constraint is violated due + * to modifier changes. + */ + if (old_fb->format->format != fb->format->format) { DRM_DEBUG_KMS("Page flip is not allowed to change frame buffer format.\n"); ret = -EINVAL; goto out; -- GitLab From 27b7c6e096264cc7b91bb80a4f65f8c0a66f079f Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Mon, 11 Jan 2021 18:08:32 +0200 Subject: [PATCH 0481/2012] i2c: tegra: Wait for config load atomically while in ISR Upon a communication error, the interrupt handler can call tegra_i2c_disable_packet_mode. This causes a sleeping poll to happen unless the current transaction was marked atomic. Fix this by making the poll happen atomically if we are in an IRQ. This matches the behavior prior to the patch mentioned in the Fixes tag. Fixes: ede2299f7101 ("i2c: tegra: Support atomic transfers") Cc: stable@vger.kernel.org Signed-off-by: Mikko Perttunen Reviewed-by: Dmitry Osipenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 6f08c0c3238d5..0727383f49402 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -533,7 +533,7 @@ static int tegra_i2c_poll_register(struct tegra_i2c_dev *i2c_dev, void __iomem *addr = i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg); u32 val; - if (!i2c_dev->atomic_mode) + if (!i2c_dev->atomic_mode && !in_irq()) return readl_relaxed_poll_timeout(addr, val, !(val & mask), delay_us, timeout_us); -- GitLab From 2896c93811e39d63a4d9b63ccf12a8fbc226e5e4 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Tue, 3 Nov 2020 02:21:58 +0100 Subject: [PATCH 0482/2012] scsi: target: Fix XCOPY NAA identifier lookup When attempting to match EXTENDED COPY CSCD descriptors with corresponding se_devices, target_xcopy_locate_se_dev_e4() currently iterates over LIO's global devices list which includes all configured backstores. This change ensures that only initiator-accessible backstores are considered during CSCD descriptor lookup, according to the session's se_node_acl LUN list. To avoid LUN removal race conditions, device pinning is changed from being configfs based to instead using the se_node_acl lun_ref. Reference: CVE-2020-28374 Fixes: cbf031f425fd ("target: Add support for EXTENDED_COPY copy offload emulation") Reviewed-by: Lee Duncan Signed-off-by: David Disseldorp Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/target/target_core_xcopy.c | 119 +++++++++++++++++------------ drivers/target/target_core_xcopy.h | 1 + 2 files changed, 71 insertions(+), 49 deletions(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 44e15d7fb2f09..66d6f1d06f219 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -46,60 +46,83 @@ static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf) return 0; } -struct xcopy_dev_search_info { - const unsigned char *dev_wwn; - struct se_device *found_dev; -}; - +/** + * target_xcopy_locate_se_dev_e4_iter - compare XCOPY NAA device identifiers + * + * @se_dev: device being considered for match + * @dev_wwn: XCOPY requested NAA dev_wwn + * @return: 1 on match, 0 on no-match + */ static int target_xcopy_locate_se_dev_e4_iter(struct se_device *se_dev, - void *data) + const unsigned char *dev_wwn) { - struct xcopy_dev_search_info *info = data; unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; int rc; - if (!se_dev->dev_attrib.emulate_3pc) + if (!se_dev->dev_attrib.emulate_3pc) { + pr_debug("XCOPY: emulate_3pc disabled on se_dev %p\n", se_dev); return 0; + } memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]); - rc = memcmp(&tmp_dev_wwn[0], info->dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN); - if (rc != 0) - return 0; - - info->found_dev = se_dev; - pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); - - rc = target_depend_item(&se_dev->dev_group.cg_item); + rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN); if (rc != 0) { - pr_err("configfs_depend_item attempt failed: %d for se_dev: %p\n", - rc, se_dev); - return rc; + pr_debug("XCOPY: skip non-matching: %*ph\n", + XCOPY_NAA_IEEE_REGEX_LEN, tmp_dev_wwn); + return 0; } + pr_debug("XCOPY 0xe4: located se_dev: %p\n", se_dev); - pr_debug("Called configfs_depend_item for se_dev: %p se_dev->se_dev_group: %p\n", - se_dev, &se_dev->dev_group); return 1; } -static int target_xcopy_locate_se_dev_e4(const unsigned char *dev_wwn, - struct se_device **found_dev) +static int target_xcopy_locate_se_dev_e4(struct se_session *sess, + const unsigned char *dev_wwn, + struct se_device **_found_dev, + struct percpu_ref **_found_lun_ref) { - struct xcopy_dev_search_info info; - int ret; - - memset(&info, 0, sizeof(info)); - info.dev_wwn = dev_wwn; - - ret = target_for_each_device(target_xcopy_locate_se_dev_e4_iter, &info); - if (ret == 1) { - *found_dev = info.found_dev; - return 0; - } else { - pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); - return -EINVAL; + struct se_dev_entry *deve; + struct se_node_acl *nacl; + struct se_lun *this_lun = NULL; + struct se_device *found_dev = NULL; + + /* cmd with NULL sess indicates no associated $FABRIC_MOD */ + if (!sess) + goto err_out; + + pr_debug("XCOPY 0xe4: searching for: %*ph\n", + XCOPY_NAA_IEEE_REGEX_LEN, dev_wwn); + + nacl = sess->se_node_acl; + rcu_read_lock(); + hlist_for_each_entry_rcu(deve, &nacl->lun_entry_hlist, link) { + struct se_device *this_dev; + int rc; + + this_lun = rcu_dereference(deve->se_lun); + this_dev = rcu_dereference_raw(this_lun->lun_se_dev); + + rc = target_xcopy_locate_se_dev_e4_iter(this_dev, dev_wwn); + if (rc) { + if (percpu_ref_tryget_live(&this_lun->lun_ref)) + found_dev = this_dev; + break; + } } + rcu_read_unlock(); + if (found_dev == NULL) + goto err_out; + + pr_debug("lun_ref held for se_dev: %p se_dev->se_dev_group: %p\n", + found_dev, &found_dev->dev_group); + *_found_dev = found_dev; + *_found_lun_ref = &this_lun->lun_ref; + return 0; +err_out: + pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); + return -EINVAL; } static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop, @@ -246,12 +269,16 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, switch (xop->op_origin) { case XCOL_SOURCE_RECV_OP: - rc = target_xcopy_locate_se_dev_e4(xop->dst_tid_wwn, - &xop->dst_dev); + rc = target_xcopy_locate_se_dev_e4(se_cmd->se_sess, + xop->dst_tid_wwn, + &xop->dst_dev, + &xop->remote_lun_ref); break; case XCOL_DEST_RECV_OP: - rc = target_xcopy_locate_se_dev_e4(xop->src_tid_wwn, - &xop->src_dev); + rc = target_xcopy_locate_se_dev_e4(se_cmd->se_sess, + xop->src_tid_wwn, + &xop->src_dev, + &xop->remote_lun_ref); break; default: pr_err("XCOPY CSCD descriptor IDs not found in CSCD list - " @@ -391,18 +418,12 @@ static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd) static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop) { - struct se_device *remote_dev; - if (xop->op_origin == XCOL_SOURCE_RECV_OP) - remote_dev = xop->dst_dev; + pr_debug("putting dst lun_ref for %p\n", xop->dst_dev); else - remote_dev = xop->src_dev; - - pr_debug("Calling configfs_undepend_item for" - " remote_dev: %p remote_dev->dev_group: %p\n", - remote_dev, &remote_dev->dev_group.cg_item); + pr_debug("putting src lun_ref for %p\n", xop->src_dev); - target_undepend_item(&remote_dev->dev_group.cg_item); + percpu_ref_put(xop->remote_lun_ref); } static void xcopy_pt_release_cmd(struct se_cmd *se_cmd) diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h index c56a1bde9417b..e5f20005179a8 100644 --- a/drivers/target/target_core_xcopy.h +++ b/drivers/target/target_core_xcopy.h @@ -27,6 +27,7 @@ struct xcopy_op { struct se_device *dst_dev; unsigned char dst_tid_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; unsigned char local_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN]; + struct percpu_ref *remote_lun_ref; sector_t src_lba; sector_t dst_lba; -- GitLab From 5541075a348b6ca6ac668653f7d2c423ae8e00b6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 11 Jan 2021 20:16:50 +0100 Subject: [PATCH 0483/2012] bpf: Prevent double bpf_prog_put call from bpf_tracing_prog_attach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bpf_tracing_prog_attach error path calls bpf_prog_put on prog, which causes refcount underflow when it's called from link_create function. link_create prog = bpf_prog_get <-- get ... tracing_bpf_link_attach(prog.. bpf_tracing_prog_attach(prog.. out_put_prog: bpf_prog_put(prog); <-- put if (ret < 0) bpf_prog_put(prog); <-- put Removing bpf_prog_put call from bpf_tracing_prog_attach and making sure its callers call it instead. Fixes: 4a1e7c0c63e0 ("bpf: Support attaching freplace programs to multiple attach points") Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Toke Høiland-Jørgensen Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210111191650.1241578-1-jolsa@kernel.org --- kernel/bpf/syscall.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c3bb03c8371fc..e5999d86c76ea 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2712,7 +2712,6 @@ out_unlock: out_put_prog: if (tgt_prog_fd && tgt_prog) bpf_prog_put(tgt_prog); - bpf_prog_put(prog); return err; } @@ -2825,7 +2824,10 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) tp_name = prog->aux->attach_func_name; break; } - return bpf_tracing_prog_attach(prog, 0, 0); + err = bpf_tracing_prog_attach(prog, 0, 0); + if (err >= 0) + return err; + goto out_put_prog; case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: if (strncpy_from_user(buf, -- GitLab From 938288349ca8a9d4b936bf5d2f6dd4526a598974 Mon Sep 17 00:00:00 2001 From: Seb Laveze Date: Mon, 11 Jan 2021 09:14:07 +0100 Subject: [PATCH 0484/2012] dt-bindings: net: dwmac: fix queue priority documentation The priority field is not the queue priority (queue priority is fixed) but a bitmask of priorities assigned to this queue. In receive, priorities relate to tagged frames priorities. In transmit, priorities relate to PFC frames. Signed-off-by: Seb Laveze Link: https://lore.kernel.org/r/20210111081406.1348622-1-sebastien.laveze@oss.nxp.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/snps,dwmac.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index b2f6083f556af..dfbf5fe4547ab 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -161,7 +161,8 @@ properties: * snps,route-dcbcp, DCB Control Packets * snps,route-up, Untagged Packets * snps,route-multi-broad, Multicast & Broadcast Packets - * snps,priority, RX queue priority (Range 0x0 to 0xF) + * snps,priority, bitmask of the tagged frames priorities assigned to + the queue snps,mtl-tx-config: $ref: /schemas/types.yaml#/definitions/phandle @@ -188,7 +189,10 @@ properties: * snps,idle_slope, unlock on WoL * snps,high_credit, max write outstanding req. limit * snps,low_credit, max read outstanding req. limit - * snps,priority, TX queue priority (Range 0x0 to 0xF) + * snps,priority, bitmask of the priorities assigned to the queue. + When a PFC frame is received with priorities matching the bitmask, + the queue is blocked from transmitting for the pause time specified + in the PFC frame. snps,reset-gpio: deprecated: true -- GitLab From beb401ec50067bfef39e74f0cf80be3de3313e7d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 10 Jan 2021 13:17:52 +0100 Subject: [PATCH 0485/2012] r8169: deprecate support for RTL_GIGA_MAC_VER_27 RTL8168dp is ancient anyway, and I haven't seen any trace of its early version 27 yet. This chip versions needs quite some special handling, therefore it would facilitate driver maintenance if support for it could be dropped. For now just disable detection of this chip version. If nobody complains we can remove support for it in the near future. v2: - extend unknown chip version error message Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/ca98f018-a0e1-8762-e95c-f0ad773a0271@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index f06e130512a83..ddcf4870f0256 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1981,7 +1981,11 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii) { 0x7c8, 0x280, RTL_GIGA_MAC_VER_26 }, /* 8168DP family. */ - { 0x7cf, 0x288, RTL_GIGA_MAC_VER_27 }, + /* It seems this early RTL8168dp version never made it to + * the wild. Let's see whether somebody complains, if not + * we'll remove support for this chip version completely. + * { 0x7cf, 0x288, RTL_GIGA_MAC_VER_27 }, + */ { 0x7cf, 0x28a, RTL_GIGA_MAC_VER_28 }, { 0x7cf, 0x28b, RTL_GIGA_MAC_VER_31 }, @@ -5264,7 +5268,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* Identify chip attached to board */ chipset = rtl8169_get_mac_version(xid, tp->supports_gmii); if (chipset == RTL_GIGA_MAC_NONE) { - dev_err(&pdev->dev, "unknown chip XID %03x\n", xid); + dev_err(&pdev->dev, "unknown chip XID %03x, contact r8169 maintainers (see MAINTAINERS file)\n", xid); return -ENODEV; } -- GitLab From b7a9e0da2d1c954b7c38217a29e002528b90d174 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 9 Jan 2021 02:01:46 +0200 Subject: [PATCH 0486/2012] net: switchdev: remove vid_begin -> vid_end range from VLAN objects The call path of a switchdev VLAN addition to the bridge looks something like this today: nbp_vlan_init | __br_vlan_set_default_pvid | | | | | br_afspec | | | | | | | v | | | br_process_vlan_info | | | | | | | v | | | br_vlan_info | | | / \ / | | / \ / | | / \ / | | / \ / v v v v v nbp_vlan_add br_vlan_add ------+ | ^ ^ | | | / | | | | / / / | \ br_vlan_get_master/ / v \ ^ / / br_vlan_add_existing \ | / / | \ | / / / \ | / / / \ | / / / \ | / / / v | | v / __vlan_add / / | / / | / v | / __vlan_vid_add | / \ | / v v v br_switchdev_port_vlan_add The ranges UAPI was introduced to the bridge in commit bdced7ef7838 ("bridge: support for multiple vlans and vlan ranges in setlink and dellink requests") (Jan 10 2015). But the VLAN ranges (parsed in br_afspec) have always been passed one by one, through struct bridge_vlan_info tmp_vinfo, to br_vlan_info. So the range never went too far in depth. Then Scott Feldman introduced the switchdev_port_bridge_setlink function in commit 47f8328bb1a4 ("switchdev: add new switchdev bridge setlink"). That marked the introduction of the SWITCHDEV_OBJ_PORT_VLAN, which made full use of the range. But switchdev_port_bridge_setlink was called like this: br_setlink -> br_afspec -> switchdev_port_bridge_setlink Basically, the switchdev and the bridge code were not tightly integrated. Then commit 41c498b9359e ("bridge: restore br_setlink back to original") came, and switchdev drivers were required to implement .ndo_bridge_setlink = switchdev_port_bridge_setlink for a while. In the meantime, commits such as 0944d6b5a2fa ("bridge: try switchdev op first in __vlan_vid_add/del") finally made switchdev penetrate the br_vlan_info() barrier and start to develop the call path we have today. But remember, br_vlan_info() still receives VLANs one by one. Then Arkadi Sharshevsky refactored the switchdev API in 2017 in commit 29ab586c3d83 ("net: switchdev: Remove bridge bypass support from switchdev") so that drivers would not implement .ndo_bridge_setlink any longer. The switchdev_port_bridge_setlink also got deleted. This refactoring removed the parallel bridge_setlink implementation from switchdev, and left the only switchdev VLAN objects to be the ones offloaded from __vlan_vid_add (basically RX filtering) and __vlan_add (the latter coming from commit 9c86ce2c1ae3 ("net: bridge: Notify about bridge VLANs")). That is to say, today the switchdev VLAN object ranges are not used in the kernel. Refactoring the above call path is a bit complicated, when the bridge VLAN call path is already a bit complicated. Let's go off and finish the job of commit 29ab586c3d83 by deleting the bogus iteration through the VLAN ranges from the drivers. Some aspects of this feature never made too much sense in the first place. For example, what is a range of VLANs all having the BRIDGE_VLAN_INFO_PVID flag supposed to mean, when a port can obviously have a single pvid? This particular configuration _is_ denied as of commit 6623c60dc28e ("bridge: vlan: enforce no pvid flag in vlan ranges"), but from an API perspective, the driver still has to play pretend, and only offload the vlan->vid_end as pvid. And the addition of a switchdev VLAN object can modify the flags of another, completely unrelated, switchdev VLAN object! (a VLAN that is PVID will invalidate the PVID flag from whatever other VLAN had previously been offloaded with switchdev and had that flag. Yet switchdev never notifies about that change, drivers are supposed to guess). Nonetheless, having a VLAN range in the API makes error handling look scarier than it really is - unwinding on errors and all of that. When in reality, no one really calls this API with more than one VLAN. It is all unnecessary complexity. And despite appearing pretentious (two-phase transactional model and all), the switchdev API is really sloppy because the VLAN addition and removal operations are not paired with one another (you can add a VLAN 100 times and delete it just once). The bridge notifies through switchdev of a VLAN addition not only when the flags of an existing VLAN change, but also when nothing changes. There are switchdev drivers out there who don't like adding a VLAN that has already been added, and those checks don't really belong at driver level. But the fact that the API contains ranges is yet another factor that prevents this from being addressed in the future. Of the existing switchdev pieces of hardware, it appears that only Mellanox Spectrum supports offloading more than one VLAN at a time, through mlxsw_sp_port_vlan_set. I have kept that code internal to the driver, because there is some more bookkeeping that makes use of it, but I deleted it from the switchdev API. But since the switchdev support for ranges has already been de facto deleted by a Mellanox employee and nobody noticed for 4 years, I'm going to assume it's not a biggie. Signed-off-by: Vladimir Oltean Reviewed-by: Ido Schimmel # switchdev and mlxsw Reviewed-by: Florian Fainelli Reviewed-by: Kurt Kanzenbach # hellcreek Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 54 ++++---- drivers/net/dsa/bcm_sf2_cfp.c | 3 +- drivers/net/dsa/dsa_loop.c | 46 +++---- drivers/net/dsa/hirschmann/hellcreek.c | 22 ++-- drivers/net/dsa/lantiq_gswip.c | 61 ++++------ drivers/net/dsa/microchip/ksz8795.c | 62 +++++----- drivers/net/dsa/microchip/ksz9477.c | 66 +++++----- drivers/net/dsa/mt7530.c | 32 ++--- drivers/net/dsa/mv88e6xxx/chip.c | 89 +++++++------- drivers/net/dsa/ocelot/felix.c | 42 ++----- drivers/net/dsa/qca8k.c | 17 +-- drivers/net/dsa/rtl8366.c | 115 ++++++++---------- drivers/net/dsa/sja1105/sja1105_main.c | 33 ++--- .../marvell/prestera/prestera_switchdev.c | 18 +-- .../mellanox/mlxsw/spectrum_switchdev.c | 63 +++------- drivers/net/ethernet/mscc/ocelot_net.c | 41 ++----- drivers/net/ethernet/rocker/rocker_ofdpa.c | 20 +-- drivers/net/ethernet/ti/cpsw_switchdev.c | 33 +---- drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 47 +++---- include/net/switchdev.h | 3 +- net/bridge/br_switchdev.c | 6 +- net/dsa/slave.c | 23 ++-- 22 files changed, 324 insertions(+), 572 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 85dddd87bcfcf..bab174c052b3e 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1393,7 +1393,7 @@ int b53_vlan_prepare(struct dsa_switch *ds, int port, { struct b53_device *dev = ds->priv; - if ((is5325(dev) || is5365(dev)) && vlan->vid_begin == 0) + if ((is5325(dev) || is5365(dev)) && vlan->vid == 0) return -EOPNOTSUPP; /* Port 7 on 7278 connects to the ASP's UniMAC which is not capable of @@ -1404,7 +1404,7 @@ int b53_vlan_prepare(struct dsa_switch *ds, int port, !(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED)) return -EINVAL; - if (vlan->vid_end > dev->num_vlans) + if (vlan->vid > dev->num_vlans) return -ERANGE; b53_enable_vlan(dev, true, ds->vlan_filtering); @@ -1420,30 +1420,27 @@ void b53_vlan_add(struct dsa_switch *ds, int port, bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; struct b53_vlan *vl; - u16 vid; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - vl = &dev->vlans[vid]; + vl = &dev->vlans[vlan->vid]; - b53_get_vlan_entry(dev, vid, vl); + b53_get_vlan_entry(dev, vlan->vid, vl); - if (vid == 0 && vid == b53_default_pvid(dev)) - untagged = true; + if (vlan->vid == 0 && vlan->vid == b53_default_pvid(dev)) + untagged = true; - vl->members |= BIT(port); - if (untagged && !dsa_is_cpu_port(ds, port)) - vl->untag |= BIT(port); - else - vl->untag &= ~BIT(port); + vl->members |= BIT(port); + if (untagged && !dsa_is_cpu_port(ds, port)) + vl->untag |= BIT(port); + else + vl->untag &= ~BIT(port); - b53_set_vlan_entry(dev, vid, vl); - b53_fast_age_vlan(dev, vid); - } + b53_set_vlan_entry(dev, vlan->vid, vl); + b53_fast_age_vlan(dev, vlan->vid); if (pvid && !dsa_is_cpu_port(ds, port)) { b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), - vlan->vid_end); - b53_fast_age_vlan(dev, vid); + vlan->vid); + b53_fast_age_vlan(dev, vlan->vid); } } EXPORT_SYMBOL(b53_vlan_add); @@ -1454,27 +1451,24 @@ int b53_vlan_del(struct dsa_switch *ds, int port, struct b53_device *dev = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; struct b53_vlan *vl; - u16 vid; u16 pvid; b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid); - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - vl = &dev->vlans[vid]; + vl = &dev->vlans[vlan->vid]; - b53_get_vlan_entry(dev, vid, vl); + b53_get_vlan_entry(dev, vlan->vid, vl); - vl->members &= ~BIT(port); + vl->members &= ~BIT(port); - if (pvid == vid) - pvid = b53_default_pvid(dev); + if (pvid == vlan->vid) + pvid = b53_default_pvid(dev); - if (untagged && !dsa_is_cpu_port(ds, port)) - vl->untag &= ~(BIT(port)); + if (untagged && !dsa_is_cpu_port(ds, port)) + vl->untag &= ~(BIT(port)); - b53_set_vlan_entry(dev, vid, vl); - b53_fast_age_vlan(dev, vid); - } + b53_set_vlan_entry(dev, vlan->vid, vl); + b53_fast_age_vlan(dev, vlan->vid); b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), pvid); b53_fast_age_vlan(dev, pvid); diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index d82cee5d92022..59d799ac1b607 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -885,8 +885,7 @@ static int bcm_sf2_cfp_rule_insert(struct dsa_switch *ds, int port, return -EINVAL; vid = be16_to_cpu(fs->h_ext.vlan_tci) & VLAN_VID_MASK; - vlan.vid_begin = vid; - vlan.vid_end = vid; + vlan.vid = vid; if (cpu_to_be32(fs->h_ext.data[1]) & 1) vlan.flags = BRIDGE_VLAN_INFO_UNTAGGED; else diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index e38906ae8f235..3be9f665d174b 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -206,13 +206,12 @@ dsa_loop_port_vlan_prepare(struct dsa_switch *ds, int port, struct dsa_loop_priv *ps = ds->priv; struct mii_bus *bus = ps->bus; - dev_dbg(ds->dev, "%s: port: %d, vlan: %d-%d", - __func__, port, vlan->vid_begin, vlan->vid_end); + dev_dbg(ds->dev, "%s: port: %d, vlan: %d", __func__, port, vlan->vid); /* Just do a sleeping operation to make lockdep checks effective */ mdiobus_read(bus, ps->port_base + port, MII_BMSR); - if (vlan->vid_end > ARRAY_SIZE(ps->vlans)) + if (vlan->vid > ARRAY_SIZE(ps->vlans)) return -ERANGE; return 0; @@ -226,26 +225,23 @@ static void dsa_loop_port_vlan_add(struct dsa_switch *ds, int port, struct dsa_loop_priv *ps = ds->priv; struct mii_bus *bus = ps->bus; struct dsa_loop_vlan *vl; - u16 vid; /* Just do a sleeping operation to make lockdep checks effective */ mdiobus_read(bus, ps->port_base + port, MII_BMSR); - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - vl = &ps->vlans[vid]; + vl = &ps->vlans[vlan->vid]; - vl->members |= BIT(port); - if (untagged) - vl->untagged |= BIT(port); - else - vl->untagged &= ~BIT(port); + vl->members |= BIT(port); + if (untagged) + vl->untagged |= BIT(port); + else + vl->untagged &= ~BIT(port); - dev_dbg(ds->dev, "%s: port: %d vlan: %d, %stagged, pvid: %d\n", - __func__, port, vid, untagged ? "un" : "", pvid); - } + dev_dbg(ds->dev, "%s: port: %d vlan: %d, %stagged, pvid: %d\n", + __func__, port, vlan->vid, untagged ? "un" : "", pvid); if (pvid) - ps->ports[port].pvid = vid; + ps->ports[port].pvid = vlan->vid; } static int dsa_loop_port_vlan_del(struct dsa_switch *ds, int port, @@ -253,26 +249,24 @@ static int dsa_loop_port_vlan_del(struct dsa_switch *ds, int port, { bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; struct dsa_loop_priv *ps = ds->priv; + u16 pvid = ps->ports[port].pvid; struct mii_bus *bus = ps->bus; struct dsa_loop_vlan *vl; - u16 vid, pvid = ps->ports[port].pvid; /* Just do a sleeping operation to make lockdep checks effective */ mdiobus_read(bus, ps->port_base + port, MII_BMSR); - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - vl = &ps->vlans[vid]; + vl = &ps->vlans[vlan->vid]; - vl->members &= ~BIT(port); - if (untagged) - vl->untagged &= ~BIT(port); + vl->members &= ~BIT(port); + if (untagged) + vl->untagged &= ~BIT(port); - if (pvid == vid) - pvid = 1; + if (pvid == vlan->vid) + pvid = 1; - dev_dbg(ds->dev, "%s: port: %d vlan: %d, %stagged, pvid: %d\n", - __func__, port, vid, untagged ? "un" : "", pvid); - } + dev_dbg(ds->dev, "%s: port: %d vlan: %d, %stagged, pvid: %d\n", + __func__, port, vlan->vid, untagged ? "un" : "", pvid); ps->ports[port].pvid = pvid; return 0; diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 6420b76ea37c2..a59cc40170fc4 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -348,14 +348,12 @@ static int hellcreek_vlan_prepare(struct dsa_switch *ds, int port, */ for (i = 0; i < hellcreek->pdata->num_ports; ++i) { const u16 restricted_vid = hellcreek_private_vid(i); - u16 vid; if (!dsa_is_user_port(ds, i)) continue; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) - if (vid == restricted_vid) - return -EBUSY; + if (vlan->vid == restricted_vid) + return -EBUSY; } return 0; @@ -446,28 +444,22 @@ static void hellcreek_vlan_add(struct dsa_switch *ds, int port, bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; struct hellcreek *hellcreek = ds->priv; - u16 vid; - dev_dbg(hellcreek->dev, "Add VLANs (%d -- %d) on port %d, %s, %s\n", - vlan->vid_begin, vlan->vid_end, port, - untagged ? "untagged" : "tagged", + dev_dbg(hellcreek->dev, "Add VLAN %d on port %d, %s, %s\n", + vlan->vid, port, untagged ? "untagged" : "tagged", pvid ? "PVID" : "no PVID"); - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) - hellcreek_apply_vlan(hellcreek, port, vid, pvid, untagged); + hellcreek_apply_vlan(hellcreek, port, vlan->vid, pvid, untagged); } static int hellcreek_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { struct hellcreek *hellcreek = ds->priv; - u16 vid; - dev_dbg(hellcreek->dev, "Remove VLANs (%d -- %d) on port %d\n", - vlan->vid_begin, vlan->vid_end, port); + dev_dbg(hellcreek->dev, "Remove VLAN %d on port %d\n", vlan->vid, port); - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) - hellcreek_unapply_vlan(hellcreek, port, vid); + hellcreek_unapply_vlan(hellcreek, port, vlan->vid); return 0; } diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 662e68a0e7e61..72cadd16056fd 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1146,43 +1146,38 @@ static int gswip_port_vlan_prepare(struct dsa_switch *ds, int port, struct gswip_priv *priv = ds->priv; struct net_device *bridge = dsa_to_port(ds, port)->bridge_dev; unsigned int max_ports = priv->hw_info->max_ports; - u16 vid; - int i; int pos = max_ports; + int i, idx = -1; /* We only support VLAN filtering on bridges */ if (!dsa_is_cpu_port(ds, port) && !bridge) return -EOPNOTSUPP; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - int idx = -1; + /* Check if there is already a page for this VLAN */ + for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { + if (priv->vlans[i].bridge == bridge && + priv->vlans[i].vid == vlan->vid) { + idx = i; + break; + } + } - /* Check if there is already a page for this VLAN */ - for (i = max_ports; i < ARRAY_SIZE(priv->vlans); i++) { - if (priv->vlans[i].bridge == bridge && - priv->vlans[i].vid == vid) { - idx = i; + /* If this VLAN is not programmed yet, we have to reserve + * one entry in the VLAN table. Make sure we start at the + * next position round. + */ + if (idx == -1) { + /* Look for a free slot */ + for (; pos < ARRAY_SIZE(priv->vlans); pos++) { + if (!priv->vlans[pos].bridge) { + idx = pos; + pos++; break; } } - /* If this VLAN is not programmed yet, we have to reserve - * one entry in the VLAN table. Make sure we start at the - * next position round. - */ - if (idx == -1) { - /* Look for a free slot */ - for (; pos < ARRAY_SIZE(priv->vlans); pos++) { - if (!priv->vlans[pos].bridge) { - idx = pos; - pos++; - break; - } - } - - if (idx == -1) - return -ENOSPC; - } + if (idx == -1) + return -ENOSPC; } return 0; @@ -1195,7 +1190,6 @@ static void gswip_port_vlan_add(struct dsa_switch *ds, int port, struct net_device *bridge = dsa_to_port(ds, port)->bridge_dev; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; - u16 vid; /* We have to receive all packets on the CPU port and should not * do any VLAN filtering here. This is also called with bridge @@ -1205,8 +1199,7 @@ static void gswip_port_vlan_add(struct dsa_switch *ds, int port, if (dsa_is_cpu_port(ds, port)) return; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) - gswip_vlan_add_aware(priv, bridge, port, vid, untagged, pvid); + gswip_vlan_add_aware(priv, bridge, port, vlan->vid, untagged, pvid); } static int gswip_port_vlan_del(struct dsa_switch *ds, int port, @@ -1215,8 +1208,6 @@ static int gswip_port_vlan_del(struct dsa_switch *ds, int port, struct gswip_priv *priv = ds->priv; struct net_device *bridge = dsa_to_port(ds, port)->bridge_dev; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; - u16 vid; - int err; /* We have to receive all packets on the CPU port and should not * do any VLAN filtering here. This is also called with bridge @@ -1226,13 +1217,7 @@ static int gswip_port_vlan_del(struct dsa_switch *ds, int port, if (dsa_is_cpu_port(ds, port)) return 0; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - err = gswip_vlan_remove(priv, bridge, port, vid, pvid, true); - if (err) - return err; - } - - return 0; + return gswip_vlan_remove(priv, bridge, port, vlan->vid, pvid, true); } static void gswip_port_fast_age(struct dsa_switch *ds, int port) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index c973db101b729..a4c814f6a4b56 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -801,32 +801,32 @@ static void ksz8795_port_vlan_add(struct dsa_switch *ds, int port, { bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; struct ksz_device *dev = ds->priv; - u16 data, vid, new_pvid = 0; + u16 data, new_pvid = 0; u8 fid, member, valid; ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged); - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - ksz8795_r_vlan_table(dev, vid, &data); - ksz8795_from_vlan(data, &fid, &member, &valid); + ksz8795_r_vlan_table(dev, vlan->vid, &data); + ksz8795_from_vlan(data, &fid, &member, &valid); - /* First time to setup the VLAN entry. */ - if (!valid) { - /* Need to find a way to map VID to FID. */ - fid = 1; - valid = 1; - } - member |= BIT(port); + /* First time to setup the VLAN entry. */ + if (!valid) { + /* Need to find a way to map VID to FID. */ + fid = 1; + valid = 1; + } + member |= BIT(port); - ksz8795_to_vlan(fid, member, valid, &data); - ksz8795_w_vlan_table(dev, vid, data); + ksz8795_to_vlan(fid, member, valid, &data); + ksz8795_w_vlan_table(dev, vlan->vid, data); - /* change PVID */ - if (vlan->flags & BRIDGE_VLAN_INFO_PVID) - new_pvid = vid; - } + /* change PVID */ + if (vlan->flags & BRIDGE_VLAN_INFO_PVID) + new_pvid = vlan->vid; if (new_pvid) { + u16 vid; + ksz_pread16(dev, port, REG_PORT_CTRL_VID, &vid); vid &= 0xfff; vid |= new_pvid; @@ -839,7 +839,7 @@ static int ksz8795_port_vlan_del(struct dsa_switch *ds, int port, { bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; struct ksz_device *dev = ds->priv; - u16 data, vid, pvid, new_pvid = 0; + u16 data, pvid, new_pvid = 0; u8 fid, member, valid; ksz_pread16(dev, port, REG_PORT_CTRL_VID, &pvid); @@ -847,24 +847,22 @@ static int ksz8795_port_vlan_del(struct dsa_switch *ds, int port, ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged); - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - ksz8795_r_vlan_table(dev, vid, &data); - ksz8795_from_vlan(data, &fid, &member, &valid); + ksz8795_r_vlan_table(dev, vlan->vid, &data); + ksz8795_from_vlan(data, &fid, &member, &valid); - member &= ~BIT(port); + member &= ~BIT(port); - /* Invalidate the entry if no more member. */ - if (!member) { - fid = 0; - valid = 0; - } + /* Invalidate the entry if no more member. */ + if (!member) { + fid = 0; + valid = 0; + } - if (pvid == vid) - new_pvid = 1; + if (pvid == vlan->vid) + new_pvid = 1; - ksz8795_to_vlan(fid, member, valid, &data); - ksz8795_w_vlan_table(dev, vid, data); - } + ksz8795_to_vlan(fid, member, valid, &data); + ksz8795_w_vlan_table(dev, vlan->vid, data); if (new_pvid != pvid) ksz_pwrite16(dev, port, REG_PORT_CTRL_VID, pvid); diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 42e647b67abd5..5a6ac0749ab0e 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -519,33 +519,30 @@ static void ksz9477_port_vlan_add(struct dsa_switch *ds, int port, { struct ksz_device *dev = ds->priv; u32 vlan_table[3]; - u16 vid; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - if (ksz9477_get_vlan_table(dev, vid, vlan_table)) { - dev_dbg(dev->dev, "Failed to get vlan table\n"); - return; - } - - vlan_table[0] = VLAN_VALID | (vid & VLAN_FID_M); - if (untagged) - vlan_table[1] |= BIT(port); - else - vlan_table[1] &= ~BIT(port); - vlan_table[1] &= ~(BIT(dev->cpu_port)); + if (ksz9477_get_vlan_table(dev, vlan->vid, vlan_table)) { + dev_dbg(dev->dev, "Failed to get vlan table\n"); + return; + } - vlan_table[2] |= BIT(port) | BIT(dev->cpu_port); + vlan_table[0] = VLAN_VALID | (vlan->vid & VLAN_FID_M); + if (untagged) + vlan_table[1] |= BIT(port); + else + vlan_table[1] &= ~BIT(port); + vlan_table[1] &= ~(BIT(dev->cpu_port)); - if (ksz9477_set_vlan_table(dev, vid, vlan_table)) { - dev_dbg(dev->dev, "Failed to set vlan table\n"); - return; - } + vlan_table[2] |= BIT(port) | BIT(dev->cpu_port); - /* change PVID */ - if (vlan->flags & BRIDGE_VLAN_INFO_PVID) - ksz_pwrite16(dev, port, REG_PORT_DEFAULT_VID, vid); + if (ksz9477_set_vlan_table(dev, vlan->vid, vlan_table)) { + dev_dbg(dev->dev, "Failed to set vlan table\n"); + return; } + + /* change PVID */ + if (vlan->flags & BRIDGE_VLAN_INFO_PVID) + ksz_pwrite16(dev, port, REG_PORT_DEFAULT_VID, vlan->vid); } static int ksz9477_port_vlan_del(struct dsa_switch *ds, int port, @@ -554,30 +551,27 @@ static int ksz9477_port_vlan_del(struct dsa_switch *ds, int port, struct ksz_device *dev = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; u32 vlan_table[3]; - u16 vid; u16 pvid; ksz_pread16(dev, port, REG_PORT_DEFAULT_VID, &pvid); pvid = pvid & 0xFFF; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - if (ksz9477_get_vlan_table(dev, vid, vlan_table)) { - dev_dbg(dev->dev, "Failed to get vlan table\n"); - return -ETIMEDOUT; - } + if (ksz9477_get_vlan_table(dev, vlan->vid, vlan_table)) { + dev_dbg(dev->dev, "Failed to get vlan table\n"); + return -ETIMEDOUT; + } - vlan_table[2] &= ~BIT(port); + vlan_table[2] &= ~BIT(port); - if (pvid == vid) - pvid = 1; + if (pvid == vlan->vid) + pvid = 1; - if (untagged) - vlan_table[1] &= ~BIT(port); + if (untagged) + vlan_table[1] &= ~BIT(port); - if (ksz9477_set_vlan_table(dev, vid, vlan_table)) { - dev_dbg(dev->dev, "Failed to set vlan table\n"); - return -ETIMEDOUT; - } + if (ksz9477_set_vlan_table(dev, vlan->vid, vlan_table)) { + dev_dbg(dev->dev, "Failed to set vlan table\n"); + return -ETIMEDOUT; } ksz_pwrite16(dev, port, REG_PORT_DEFAULT_VID, pvid); diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index a67cac15a724a..31d2d23bc8157 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1501,20 +1501,16 @@ mt7530_port_vlan_add(struct dsa_switch *ds, int port, bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; struct mt7530_hw_vlan_entry new_entry; struct mt7530_priv *priv = ds->priv; - u16 vid; mutex_lock(&priv->reg_mutex); - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - mt7530_hw_vlan_entry_init(&new_entry, port, untagged); - mt7530_hw_vlan_update(priv, vid, &new_entry, - mt7530_hw_vlan_add); - } + mt7530_hw_vlan_entry_init(&new_entry, port, untagged); + mt7530_hw_vlan_update(priv, vlan->vid, &new_entry, mt7530_hw_vlan_add); if (pvid) { mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, - G0_PORT_VID(vlan->vid_end)); - priv->ports[port].pvid = vlan->vid_end; + G0_PORT_VID(vlan->vid)); + priv->ports[port].pvid = vlan->vid; } mutex_unlock(&priv->reg_mutex); @@ -1526,22 +1522,20 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port, { struct mt7530_hw_vlan_entry target_entry; struct mt7530_priv *priv = ds->priv; - u16 vid, pvid; + u16 pvid; mutex_lock(&priv->reg_mutex); pvid = priv->ports[port].pvid; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - mt7530_hw_vlan_entry_init(&target_entry, port, 0); - mt7530_hw_vlan_update(priv, vid, &target_entry, - mt7530_hw_vlan_del); + mt7530_hw_vlan_entry_init(&target_entry, port, 0); + mt7530_hw_vlan_update(priv, vlan->vid, &target_entry, + mt7530_hw_vlan_del); - /* PVID is being restored to the default whenever the PVID port - * is being removed from the VLAN. - */ - if (pvid == vid) - pvid = G0_PORT_VID_DEF; - } + /* PVID is being restored to the default whenever the PVID port + * is being removed from the VLAN. + */ + if (pvid == vlan->vid) + pvid = G0_PORT_VID_DEF; mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, pvid); priv->ports[port].pvid = pvid; diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index eafe6bedc692e..4834be9e4e864 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1529,7 +1529,7 @@ static int mv88e6xxx_atu_new(struct mv88e6xxx_chip *chip, u16 *fid) } static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, - u16 vid_begin, u16 vid_end) + u16 vid) { struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_vtu_entry vlan; @@ -1539,47 +1539,45 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) return 0; - if (!vid_begin) + if (!vid) return -EOPNOTSUPP; - vlan.vid = vid_begin - 1; + vlan.vid = vid - 1; vlan.valid = false; - do { - err = mv88e6xxx_vtu_getnext(chip, &vlan); - if (err) - return err; + err = mv88e6xxx_vtu_getnext(chip, &vlan); + if (err) + return err; - if (!vlan.valid) - break; + if (!vlan.valid) + return 0; - if (vlan.vid > vid_end) - break; + if (vlan.vid != vid) + return 0; - for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { - if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i)) - continue; + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { + if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i)) + continue; - if (!dsa_to_port(ds, i)->slave) - continue; + if (!dsa_to_port(ds, i)->slave) + continue; - if (vlan.member[i] == - MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_NON_MEMBER) - continue; + if (vlan.member[i] == + MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_NON_MEMBER) + continue; - if (dsa_to_port(ds, i)->bridge_dev == - dsa_to_port(ds, port)->bridge_dev) - break; /* same bridge, check next VLAN */ + if (dsa_to_port(ds, i)->bridge_dev == + dsa_to_port(ds, port)->bridge_dev) + break; /* same bridge, check next VLAN */ - if (!dsa_to_port(ds, i)->bridge_dev) - continue; + if (!dsa_to_port(ds, i)->bridge_dev) + continue; - dev_err(ds->dev, "p%d: hw VLAN %d already used by port %d in %s\n", - port, vlan.vid, i, - netdev_name(dsa_to_port(ds, i)->bridge_dev)); - return -EOPNOTSUPP; - } - } while (vlan.vid < vid_end); + dev_err(ds->dev, "p%d: hw VLAN %d already used by port %d in %s\n", + port, vlan.vid, i, + netdev_name(dsa_to_port(ds, i)->bridge_dev)); + return -EOPNOTSUPP; + } return 0; } @@ -1617,8 +1615,7 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, * members, do not support it (yet) and fallback to software VLAN. */ mv88e6xxx_reg_lock(chip); - err = mv88e6xxx_port_check_hw_vlan(ds, port, vlan->vid_begin, - vlan->vid_end); + err = mv88e6xxx_port_check_hw_vlan(ds, port, vlan->vid); mv88e6xxx_reg_unlock(chip); /* We don't need any dynamic resource from the kernel (yet), @@ -1978,7 +1975,6 @@ static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; bool warn; u8 member; - u16 vid; if (!mv88e6xxx_max_vid(chip)) return; @@ -1997,14 +1993,13 @@ static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, mv88e6xxx_reg_lock(chip); - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) - if (mv88e6xxx_port_vlan_join(chip, port, vid, member, warn)) - dev_err(ds->dev, "p%d: failed to add VLAN %d%c\n", port, - vid, untagged ? 'u' : 't'); + if (mv88e6xxx_port_vlan_join(chip, port, vlan->vid, member, warn)) + dev_err(ds->dev, "p%d: failed to add VLAN %d%c\n", port, + vlan->vid, untagged ? 'u' : 't'); - if (pvid && mv88e6xxx_port_set_pvid(chip, port, vlan->vid_end)) + if (pvid && mv88e6xxx_port_set_pvid(chip, port, vlan->vid)) dev_err(ds->dev, "p%d: failed to set PVID %d\n", port, - vlan->vid_end); + vlan->vid); mv88e6xxx_reg_unlock(chip); } @@ -2055,8 +2050,8 @@ static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { struct mv88e6xxx_chip *chip = ds->priv; - u16 pvid, vid; int err = 0; + u16 pvid; if (!mv88e6xxx_max_vid(chip)) return -EOPNOTSUPP; @@ -2067,16 +2062,14 @@ static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, if (err) goto unlock; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - err = mv88e6xxx_port_vlan_leave(chip, port, vid); + err = mv88e6xxx_port_vlan_leave(chip, port, vlan->vid); + if (err) + goto unlock; + + if (vlan->vid == pvid) { + err = mv88e6xxx_port_set_pvid(chip, port, 0); if (err) goto unlock; - - if (vid == pvid) { - err = mv88e6xxx_port_set_pvid(chip, port, 0); - if (err) - goto unlock; - } } unlock: diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 90c3c76f21b20..216fcfbc5dafd 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -116,8 +116,7 @@ static int felix_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { struct ocelot *ocelot = ds->priv; - u16 vid, flags = vlan->flags; - int err; + u16 flags = vlan->flags; /* Ocelot switches copy frames as-is to the CPU, so the flags: * egress-untagged or not, pvid or not, make no difference. This @@ -130,15 +129,9 @@ static int felix_vlan_prepare(struct dsa_switch *ds, int port, if (port == ocelot->npi) return 0; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - err = ocelot_vlan_prepare(ocelot, port, vid, - flags & BRIDGE_VLAN_INFO_PVID, - flags & BRIDGE_VLAN_INFO_UNTAGGED); - if (err) - return err; - } - - return 0; + return ocelot_vlan_prepare(ocelot, port, vlan->vid, + flags & BRIDGE_VLAN_INFO_PVID, + flags & BRIDGE_VLAN_INFO_UNTAGGED); } static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, @@ -154,37 +147,18 @@ static void felix_vlan_add(struct dsa_switch *ds, int port, { struct ocelot *ocelot = ds->priv; u16 flags = vlan->flags; - u16 vid; - int err; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - err = ocelot_vlan_add(ocelot, port, vid, - flags & BRIDGE_VLAN_INFO_PVID, - flags & BRIDGE_VLAN_INFO_UNTAGGED); - if (err) { - dev_err(ds->dev, "Failed to add VLAN %d to port %d: %d\n", - vid, port, err); - return; - } - } + ocelot_vlan_add(ocelot, port, vlan->vid, + flags & BRIDGE_VLAN_INFO_PVID, + flags & BRIDGE_VLAN_INFO_UNTAGGED); } static int felix_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { struct ocelot *ocelot = ds->priv; - u16 vid; - int err; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - err = ocelot_vlan_del(ocelot, port, vid); - if (err) { - dev_err(ds->dev, "Failed to remove VLAN %d from port %d: %d\n", - vid, port, err); - return err; - } - } - return 0; + return ocelot_vlan_del(ocelot, port, vlan->vid); } static int felix_port_enable(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 5bdac669a3392..df99c696b6889 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1330,11 +1330,8 @@ qca8k_port_vlan_add(struct dsa_switch *ds, int port, bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; struct qca8k_priv *priv = ds->priv; int ret = 0; - u16 vid; - - for (vid = vlan->vid_begin; vid <= vlan->vid_end && !ret; ++vid) - ret = qca8k_vlan_add(priv, port, vid, untagged); + ret = qca8k_vlan_add(priv, port, vlan->vid, untagged); if (ret) dev_err(priv->dev, "Failed to add VLAN to port %d (%d)", port, ret); @@ -1342,11 +1339,10 @@ qca8k_port_vlan_add(struct dsa_switch *ds, int port, int shift = 16 * (port % 2); qca8k_rmw(priv, QCA8K_EGRESS_VLAN(port), - 0xfff << shift, - vlan->vid_end << shift); + 0xfff << shift, vlan->vid << shift); qca8k_write(priv, QCA8K_REG_PORT_VLAN_CTRL0(port), - QCA8K_PORT_VLAN_CVID(vlan->vid_end) | - QCA8K_PORT_VLAN_SVID(vlan->vid_end)); + QCA8K_PORT_VLAN_CVID(vlan->vid) | + QCA8K_PORT_VLAN_SVID(vlan->vid)); } } @@ -1356,11 +1352,8 @@ qca8k_port_vlan_del(struct dsa_switch *ds, int port, { struct qca8k_priv *priv = ds->priv; int ret = 0; - u16 vid; - - for (vid = vlan->vid_begin; vid <= vlan->vid_end && !ret; ++vid) - ret = qca8k_vlan_del(priv, port, vid); + ret = qca8k_vlan_del(priv, port, vlan->vid); if (ret) dev_err(priv->dev, "Failed to delete VLAN from port %d (%d)", port, ret); diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/rtl8366.c index 83d481ef92735..1a8f93112bc69 100644 --- a/drivers/net/dsa/rtl8366.c +++ b/drivers/net/dsa/rtl8366.c @@ -383,14 +383,11 @@ int rtl8366_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { struct realtek_smi *smi = ds->priv; - u16 vid; - for (vid = vlan->vid_begin; vid < vlan->vid_end; vid++) - if (!smi->ops->is_vlan_valid(smi, vid)) - return -EINVAL; + if (!smi->ops->is_vlan_valid(smi, vlan->vid)) + return -EINVAL; - dev_info(smi->dev, "prepare VLANs %04x..%04x\n", - vlan->vid_begin, vlan->vid_end); + dev_info(smi->dev, "prepare VLAN %04x\n", vlan->vid); /* Enable VLAN in the hardware * FIXME: what's with this 4k business? @@ -408,47 +405,38 @@ void rtl8366_vlan_add(struct dsa_switch *ds, int port, struct realtek_smi *smi = ds->priv; u32 member = 0; u32 untag = 0; - u16 vid; int ret; - for (vid = vlan->vid_begin; vid < vlan->vid_end; vid++) - if (!smi->ops->is_vlan_valid(smi, vid)) - return; + if (!smi->ops->is_vlan_valid(smi, vlan->vid)) + return; dev_info(smi->dev, "add VLAN %d on port %d, %s, %s\n", - vlan->vid_begin, - port, - untagged ? "untagged" : "tagged", + vlan->vid, port, untagged ? "untagged" : "tagged", pvid ? " PVID" : "no PVID"); if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) dev_err(smi->dev, "port is DSA or CPU port\n"); - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - member |= BIT(port); + member |= BIT(port); - if (untagged) - untag |= BIT(port); + if (untagged) + untag |= BIT(port); - ret = rtl8366_set_vlan(smi, vid, member, untag, 0); - if (ret) - dev_err(smi->dev, - "failed to set up VLAN %04x", - vid); + ret = rtl8366_set_vlan(smi, vlan->vid, member, untag, 0); + if (ret) + dev_err(smi->dev, "failed to set up VLAN %04x", vlan->vid); - if (!pvid) - continue; + if (!pvid) + return; - ret = rtl8366_set_pvid(smi, port, vid); - if (ret) - dev_err(smi->dev, - "failed to set PVID on port %d to VLAN %04x", - port, vid); + ret = rtl8366_set_pvid(smi, port, vlan->vid); + if (ret) + dev_err(smi->dev, "failed to set PVID on port %d to VLAN %04x", + port, vlan->vid); - if (!ret) - dev_dbg(smi->dev, "VLAN add: added VLAN %d with PVID on port %d\n", - vid, port); - } + if (!ret) + dev_dbg(smi->dev, "VLAN add: added VLAN %d with PVID on port %d\n", + vlan->vid, port); } EXPORT_SYMBOL_GPL(rtl8366_vlan_add); @@ -456,46 +444,39 @@ int rtl8366_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { struct realtek_smi *smi = ds->priv; - u16 vid; - int ret; + int ret, i; - dev_info(smi->dev, "del VLAN on port %d\n", port); + dev_info(smi->dev, "del VLAN %04x on port %d\n", vlan->vid, port); - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - int i; - - dev_info(smi->dev, "del VLAN %04x\n", vid); + for (i = 0; i < smi->num_vlan_mc; i++) { + struct rtl8366_vlan_mc vlanmc; - for (i = 0; i < smi->num_vlan_mc; i++) { - struct rtl8366_vlan_mc vlanmc; + ret = smi->ops->get_vlan_mc(smi, i, &vlanmc); + if (ret) + return ret; - ret = smi->ops->get_vlan_mc(smi, i, &vlanmc); - if (ret) + if (vlan->vid == vlanmc.vid) { + /* Remove this port from the VLAN */ + vlanmc.member &= ~BIT(port); + vlanmc.untag &= ~BIT(port); + /* + * If no ports are members of this VLAN + * anymore then clear the whole member + * config so it can be reused. + */ + if (!vlanmc.member && vlanmc.untag) { + vlanmc.vid = 0; + vlanmc.priority = 0; + vlanmc.fid = 0; + } + ret = smi->ops->set_vlan_mc(smi, i, &vlanmc); + if (ret) { + dev_err(smi->dev, + "failed to remove VLAN %04x\n", + vlan->vid); return ret; - - if (vid == vlanmc.vid) { - /* Remove this port from the VLAN */ - vlanmc.member &= ~BIT(port); - vlanmc.untag &= ~BIT(port); - /* - * If no ports are members of this VLAN - * anymore then clear the whole member - * config so it can be reused. - */ - if (!vlanmc.member && vlanmc.untag) { - vlanmc.vid = 0; - vlanmc.priority = 0; - vlanmc.fid = 0; - } - ret = smi->ops->set_vlan_mc(smi, i, &vlanmc); - if (ret) { - dev_err(smi->dev, - "failed to remove VLAN %04x\n", - vid); - return ret; - } - break; } + break; } } diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 59e00d55780bb..807e65ac2518a 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2611,7 +2611,6 @@ static int sja1105_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { struct sja1105_private *priv = ds->priv; - u16 vid; if (priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) return 0; @@ -2620,11 +2619,9 @@ static int sja1105_vlan_prepare(struct dsa_switch *ds, int port, * bridge plus tagging), be sure to at least deny alterations to the * configuration done by dsa_8021q. */ - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - if (vid_is_dsa_8021q(vid)) { - dev_err(ds->dev, "Range 1024-3071 reserved for dsa_8021q operation\n"); - return -EBUSY; - } + if (vid_is_dsa_8021q(vlan->vid)) { + dev_err(ds->dev, "Range 1024-3071 reserved for dsa_8021q operation\n"); + return -EBUSY; } return 0; @@ -2799,17 +2796,14 @@ static void sja1105_vlan_add(struct dsa_switch *ds, int port, { struct sja1105_private *priv = ds->priv; bool vlan_table_changed = false; - u16 vid; int rc; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - rc = sja1105_vlan_add_one(ds, port, vid, vlan->flags, - &priv->bridge_vlans); - if (rc < 0) - return; - if (rc > 0) - vlan_table_changed = true; - } + rc = sja1105_vlan_add_one(ds, port, vlan->vid, vlan->flags, + &priv->bridge_vlans); + if (rc < 0) + return; + if (rc > 0) + vlan_table_changed = true; if (!vlan_table_changed) return; @@ -2824,14 +2818,11 @@ static int sja1105_vlan_del(struct dsa_switch *ds, int port, { struct sja1105_private *priv = ds->priv; bool vlan_table_changed = false; - u16 vid; int rc; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - rc = sja1105_vlan_del_one(ds, port, vid, &priv->bridge_vlans); - if (rc > 0) - vlan_table_changed = true; - } + rc = sja1105_vlan_del_one(ds, port, vlan->vid, &priv->bridge_vlans); + if (rc > 0) + vlan_table_changed = true; if (!vlan_table_changed) return 0; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index 7d83e1f91ef17..c87667c1cca03 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -1045,17 +1045,9 @@ static int prestera_port_vlans_add(struct prestera_port *port, if (!bridge->vlan_enabled) return 0; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - int err; - - err = prestera_bridge_port_vlan_add(port, br_port, - vid, flag_untagged, - flag_pvid, extack); - if (err) - return err; - } - - return 0; + return prestera_bridge_port_vlan_add(port, br_port, + vid, flag_untagged, + flag_pvid, extack); } static int prestera_port_obj_add(struct net_device *dev, @@ -1081,7 +1073,6 @@ static int prestera_port_vlans_del(struct prestera_port *port, struct net_device *dev = vlan->obj.orig_dev; struct prestera_bridge_port *br_port; struct prestera_switch *sw = port->sw; - u16 vid; if (netif_is_bridge_master(dev)) return -EOPNOTSUPP; @@ -1093,8 +1084,7 @@ static int prestera_port_vlans_del(struct prestera_port *port, if (!br_port->bridge->vlan_enabled) return 0; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) - prestera_bridge_port_vlan_del(port, br_port, vid); + prestera_bridge_port_vlan_del(port, br_port, vlan->vid); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index cea42f6ed89be..7039cff69680e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1211,23 +1211,20 @@ mlxsw_sp_br_ban_rif_pvid_change(struct mlxsw_sp *mlxsw_sp, const struct switchdev_obj_port_vlan *vlan) { u16 pvid; - u16 vid; pvid = mlxsw_sp_rif_vid(mlxsw_sp, br_dev); if (!pvid) return 0; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - if (vlan->flags & BRIDGE_VLAN_INFO_PVID) { - if (vid != pvid) { - netdev_err(br_dev, "Can't change PVID, it's used by router interface\n"); - return -EBUSY; - } - } else { - if (vid == pvid) { - netdev_err(br_dev, "Can't remove PVID, it's used by router interface\n"); - return -EBUSY; - } + if (vlan->flags & BRIDGE_VLAN_INFO_PVID) { + if (vlan->vid != pvid) { + netdev_err(br_dev, "Can't change PVID, it's used by router interface\n"); + return -EBUSY; + } + } else { + if (vlan->vid == pvid) { + netdev_err(br_dev, "Can't remove PVID, it's used by router interface\n"); + return -EBUSY; } } @@ -1244,7 +1241,6 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct net_device *orig_dev = vlan->obj.orig_dev; struct mlxsw_sp_bridge_port *bridge_port; - u16 vid; if (netif_is_bridge_master(orig_dev)) { int err = 0; @@ -1269,17 +1265,9 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, if (!bridge_port->bridge_device->vlan_enabled) return 0; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - int err; - - err = mlxsw_sp_bridge_port_vlan_add(mlxsw_sp_port, bridge_port, - vid, flag_untagged, - flag_pvid, extack); - if (err) - return err; - } - - return 0; + return mlxsw_sp_bridge_port_vlan_add(mlxsw_sp_port, bridge_port, + vlan->vid, flag_untagged, + flag_pvid, extack); } static enum mlxsw_reg_sfdf_flush_type mlxsw_sp_fdb_flush_type(bool lagged) @@ -1873,7 +1861,6 @@ static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct net_device *orig_dev = vlan->obj.orig_dev; struct mlxsw_sp_bridge_port *bridge_port; - u16 vid; if (netif_is_bridge_master(orig_dev)) return -EOPNOTSUPP; @@ -1885,8 +1872,7 @@ static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, if (!bridge_port->bridge_device->vlan_enabled) return 0; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) - mlxsw_sp_bridge_port_vlan_del(mlxsw_sp_port, bridge_port, vid); + mlxsw_sp_bridge_port_vlan_del(mlxsw_sp_port, bridge_port, vlan->vid); return 0; } @@ -3411,7 +3397,6 @@ mlxsw_sp_switchdev_vxlan_vlans_add(struct net_device *vxlan_dev, struct netlink_ext_ack *extack; struct mlxsw_sp *mlxsw_sp; struct net_device *br_dev; - u16 vid; extack = switchdev_notifier_info_to_extack(&port_obj_info->info); br_dev = netdev_master_upper_dev_get(vxlan_dev); @@ -3434,18 +3419,10 @@ mlxsw_sp_switchdev_vxlan_vlans_add(struct net_device *vxlan_dev, if (!bridge_device->vlan_enabled) return 0; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - int err; - - err = mlxsw_sp_switchdev_vxlan_vlan_add(mlxsw_sp, bridge_device, - vxlan_dev, vid, - flag_untagged, - flag_pvid, extack); - if (err) - return err; - } - - return 0; + return mlxsw_sp_switchdev_vxlan_vlan_add(mlxsw_sp, bridge_device, + vxlan_dev, vlan->vid, + flag_untagged, + flag_pvid, extack); } static void @@ -3458,7 +3435,6 @@ mlxsw_sp_switchdev_vxlan_vlans_del(struct net_device *vxlan_dev, struct mlxsw_sp_bridge_device *bridge_device; struct mlxsw_sp *mlxsw_sp; struct net_device *br_dev; - u16 vid; br_dev = netdev_master_upper_dev_get(vxlan_dev); if (!br_dev) @@ -3477,9 +3453,8 @@ mlxsw_sp_switchdev_vxlan_vlans_del(struct net_device *vxlan_dev, if (!bridge_device->vlan_enabled) return; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) - mlxsw_sp_switchdev_vxlan_vlan_del(mlxsw_sp, bridge_device, - vxlan_dev, vid); + mlxsw_sp_switchdev_vxlan_vlan_del(mlxsw_sp, bridge_device, vxlan_dev, + vlan->vid); } static int diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 2bd2840d88bdc..3b8718b143bbc 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -893,39 +893,16 @@ static int ocelot_port_obj_add_vlan(struct net_device *dev, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { + bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; + bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; int ret; - u16 vid; - - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; - bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; - - if (switchdev_trans_ph_prepare(trans)) - ret = ocelot_vlan_vid_prepare(dev, vid, pvid, - untagged); - else - ret = ocelot_vlan_vid_add(dev, vid, pvid, untagged); - if (ret) - return ret; - } - - return 0; -} - -static int ocelot_port_vlan_del_vlan(struct net_device *dev, - const struct switchdev_obj_port_vlan *vlan) -{ - int ret; - u16 vid; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - ret = ocelot_vlan_vid_del(dev, vid); - - if (ret) - return ret; - } + if (switchdev_trans_ph_prepare(trans)) + ret = ocelot_vlan_vid_prepare(dev, vlan->vid, pvid, untagged); + else + ret = ocelot_vlan_vid_add(dev, vlan->vid, pvid, untagged); - return 0; + return ret; } static int ocelot_port_obj_add_mdb(struct net_device *dev, @@ -985,8 +962,8 @@ static int ocelot_port_obj_del(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: - ret = ocelot_port_vlan_del_vlan(dev, - SWITCHDEV_OBJ_PORT_VLAN(obj)); + ret = ocelot_vlan_vid_del(dev, + SWITCHDEV_OBJ_PORT_VLAN(obj)->vid); break; case SWITCHDEV_OBJ_ID_PORT_MDB: ret = ocelot_port_obj_del_mdb(dev, SWITCHDEV_OBJ_PORT_MDB(obj)); diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 7072b249c8bd6..d9d5188b76277 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2540,32 +2540,16 @@ static int ofdpa_port_obj_vlan_add(struct rocker_port *rocker_port, const struct switchdev_obj_port_vlan *vlan) { struct ofdpa_port *ofdpa_port = rocker_port->wpriv; - u16 vid; - int err; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - err = ofdpa_port_vlan_add(ofdpa_port, vid, vlan->flags); - if (err) - return err; - } - - return 0; + return ofdpa_port_vlan_add(ofdpa_port, vlan->vid, vlan->flags); } static int ofdpa_port_obj_vlan_del(struct rocker_port *rocker_port, const struct switchdev_obj_port_vlan *vlan) { struct ofdpa_port *ofdpa_port = rocker_port->wpriv; - u16 vid; - int err; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - err = ofdpa_port_vlan_del(ofdpa_port, vid, vlan->flags); - if (err) - return err; - } - - return 0; + return ofdpa_port_vlan_del(ofdpa_port, vlan->vid, vlan->flags); } static int ofdpa_port_obj_fdb_add(struct rocker_port *rocker_port, diff --git a/drivers/net/ethernet/ti/cpsw_switchdev.c b/drivers/net/ethernet/ti/cpsw_switchdev.c index 29747da5c514b..8a36228acc5d3 100644 --- a/drivers/net/ethernet/ti/cpsw_switchdev.c +++ b/drivers/net/ethernet/ti/cpsw_switchdev.c @@ -260,10 +260,9 @@ static int cpsw_port_vlans_add(struct cpsw_priv *priv, struct net_device *orig_dev = vlan->obj.orig_dev; bool cpu_port = netif_is_bridge_master(orig_dev); bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; - u16 vid; dev_dbg(priv->dev, "VID add: %s: vid:%u flags:%X\n", - priv->ndev->name, vlan->vid_begin, vlan->flags); + priv->ndev->name, vlan->vid, vlan->flags); if (cpu_port && !(vlan->flags & BRIDGE_VLAN_INFO_BRENTRY)) return 0; @@ -271,33 +270,7 @@ static int cpsw_port_vlans_add(struct cpsw_priv *priv, if (switchdev_trans_ph_prepare(trans)) return 0; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - int err; - - err = cpsw_port_vlan_add(priv, untag, pvid, vid, orig_dev); - if (err) - return err; - } - - return 0; -} - -static int cpsw_port_vlans_del(struct cpsw_priv *priv, - const struct switchdev_obj_port_vlan *vlan) - -{ - struct net_device *orig_dev = vlan->obj.orig_dev; - u16 vid; - - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - int err; - - err = cpsw_port_vlan_del(priv, vid, orig_dev); - if (err) - return err; - } - - return 0; + return cpsw_port_vlan_add(priv, untag, pvid, vlan->vid, orig_dev); } static int cpsw_port_mdb_add(struct cpsw_priv *priv, @@ -392,7 +365,7 @@ static int cpsw_port_obj_del(struct net_device *ndev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = cpsw_port_vlans_del(priv, vlan); + err = cpsw_port_vlan_del(priv, vlan->vid, vlan->obj.orig_dev); break; case SWITCHDEV_OBJ_ID_PORT_MDB: case SWITCHDEV_OBJ_ID_HOST_MDB: diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c index d524e92051a3a..62edb8d01f4ef 100644 --- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c +++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c @@ -981,19 +981,14 @@ static int dpaa2_switch_port_vlans_add(struct net_device *netdev, struct ethsw_port_priv *port_priv = netdev_priv(netdev); struct ethsw_core *ethsw = port_priv->ethsw_data; struct dpsw_attr *attr = ðsw->sw_attr; - int vid, err = 0, new_vlans = 0; + int err = 0; if (switchdev_trans_ph_prepare(trans)) { - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - if (!port_priv->ethsw_data->vlans[vid]) - new_vlans++; - - /* Make sure that the VLAN is not already configured - * on the switch port - */ - if (port_priv->vlans[vid] & ETHSW_VLAN_MEMBER) - return -EEXIST; - } + /* Make sure that the VLAN is not already configured + * on the switch port + */ + if (port_priv->vlans[vlan->vid] & ETHSW_VLAN_MEMBER) + return -EEXIST; /* Check if there is space for a new VLAN */ err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle, @@ -1002,27 +997,22 @@ static int dpaa2_switch_port_vlans_add(struct net_device *netdev, netdev_err(netdev, "dpsw_get_attributes err %d\n", err); return err; } - if (attr->max_vlans - attr->num_vlans < new_vlans) + if (attr->max_vlans - attr->num_vlans < 1) return -ENOSPC; return 0; } - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - if (!port_priv->ethsw_data->vlans[vid]) { - /* this is a new VLAN */ - err = dpaa2_switch_add_vlan(port_priv->ethsw_data, vid); - if (err) - return err; - - port_priv->ethsw_data->vlans[vid] |= ETHSW_VLAN_GLOBAL; - } - err = dpaa2_switch_port_add_vlan(port_priv, vid, vlan->flags); + if (!port_priv->ethsw_data->vlans[vlan->vid]) { + /* this is a new VLAN */ + err = dpaa2_switch_add_vlan(port_priv->ethsw_data, vlan->vid); if (err) - break; + return err; + + port_priv->ethsw_data->vlans[vlan->vid] |= ETHSW_VLAN_GLOBAL; } - return err; + return dpaa2_switch_port_add_vlan(port_priv, vlan->vid, vlan->flags); } static int dpaa2_switch_port_lookup_address(struct net_device *netdev, int is_uc, @@ -1155,18 +1145,11 @@ static int dpaa2_switch_port_vlans_del(struct net_device *netdev, const struct switchdev_obj_port_vlan *vlan) { struct ethsw_port_priv *port_priv = netdev_priv(netdev); - int vid, err = 0; if (netif_is_bridge_master(vlan->obj.orig_dev)) return -EOPNOTSUPP; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) { - err = dpaa2_switch_port_del_vlan(port_priv, vid); - if (err) - break; - } - - return err; + return dpaa2_switch_port_del_vlan(port_priv, vlan->vid); } static int dpaa2_switch_port_mdb_del(struct net_device *netdev, diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 99cd538d65191..bac7d3ba574f8 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -97,8 +97,7 @@ struct switchdev_obj { struct switchdev_obj_port_vlan { struct switchdev_obj obj; u16 flags; - u16 vid_begin; - u16 vid_end; + u16 vid; }; #define SWITCHDEV_OBJ_PORT_VLAN(OBJ) \ diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 015209bf44aa4..a9c23ef834434 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -153,8 +153,7 @@ int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags, .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .flags = flags, - .vid_begin = vid, - .vid_end = vid, + .vid = vid, }; return switchdev_port_obj_add(dev, &v.obj, extack); @@ -165,8 +164,7 @@ int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid) struct switchdev_obj_port_vlan v = { .obj.orig_dev = dev, .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, - .vid_begin = vid, - .vid_end = vid, + .vid = vid, }; return switchdev_port_obj_del(dev, &v.obj); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index f8b6a69b6873c..653d64f4a637f 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -318,7 +318,7 @@ dsa_slave_vlan_check_for_8021q_uppers(struct net_device *slave, continue; vid = vlan_dev_vlan_id(upper_dev); - if (vid >= vlan->vid_begin && vid <= vlan->vid_end) + if (vid == vlan->vid) return -EBUSY; } @@ -332,7 +332,7 @@ static int dsa_slave_vlan_add(struct net_device *dev, struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); struct switchdev_obj_port_vlan vlan; - int vid, err; + int err; if (obj->orig_dev != dev) return -EOPNOTSUPP; @@ -367,13 +367,7 @@ static int dsa_slave_vlan_add(struct net_device *dev, if (err) return err; - for (vid = vlan.vid_begin; vid <= vlan.vid_end; vid++) { - err = vlan_vid_add(master, htons(ETH_P_8021Q), vid); - if (err) - return err; - } - - return 0; + return vlan_vid_add(master, htons(ETH_P_8021Q), vlan.vid); } static int dsa_slave_port_obj_add(struct net_device *dev, @@ -419,7 +413,7 @@ static int dsa_slave_vlan_del(struct net_device *dev, struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); struct switchdev_obj_port_vlan *vlan; - int vid, err; + int err; if (obj->orig_dev != dev) return -EOPNOTSUPP; @@ -436,8 +430,7 @@ static int dsa_slave_vlan_del(struct net_device *dev, if (err) return err; - for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) - vlan_vid_del(master, htons(ETH_P_8021Q), vid); + vlan_vid_del(master, htons(ETH_P_8021Q), vlan->vid); return 0; } @@ -1289,8 +1282,7 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, struct dsa_port *dp = dsa_slave_to_port(dev); struct switchdev_obj_port_vlan vlan = { .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, - .vid_begin = vid, - .vid_end = vid, + .vid = vid, /* This API only allows programming tagged, non-PVID VIDs */ .flags = 0, }; @@ -1328,8 +1320,7 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); struct switchdev_obj_port_vlan vlan = { - .vid_begin = vid, - .vid_end = vid, + .vid = vid, /* This API only allows programming tagged, non-PVID VIDs */ .flags = 0, }; -- GitLab From 3e85f580e3fc553d1ba21ac01e08659cbd0f66cc Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 9 Jan 2021 02:01:47 +0200 Subject: [PATCH 0487/2012] net: dsa: mv88e6xxx: deny vid 0 on the CPU port and DSA links too mv88e6xxx apparently has a problem offloading VID 0, which the 8021q module tries to install as part of commit ad1afb003939 ("vlan_dev: VLAN 0 should be treated as "no vlan tag" (802.1p packet)"). That mv88e6xxx restriction seems to have been introduced by the "VTU GetNext VID-1 trick to retrieve a single entry" - see commit 2fb5ef09de7c ("net: dsa: mv88e6xxx: extract single VLAN retrieval"). There is one more problem. The mv88e6xxx CPU port and DSA links do not report properly in the prepare phase what are the VLANs that they can offload. They'll say they can offload everything: mv88e6xxx_port_vlan_prepare -> mv88e6xxx_port_check_hw_vlan: /* DSA and CPU ports have to be members of multiple vlans */ if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) return 0; Except that if you actually try to commit to it, they'll error out and print this message: [ 32.802438] mv88e6085 d0032004.mdio-mii:12: p9: failed to add VLAN 0t which comes from: mv88e6xxx_port_vlan_add -> mv88e6xxx_port_vlan_join: if (!vid) return -EOPNOTSUPP; What prevents this condition from triggering in real life? The fact that when a DSA_NOTIFIER_VLAN_ADD is emitted, it never targets a DSA link directly. Instead, the notifier will always target either a user port or a CPU port. DSA links just happen to get dragged in by: static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port, struct dsa_notifier_vlan_info *info) { ... if (dsa_is_dsa_port(ds, port)) return true; ... } So for every DSA VLAN notifier, during the prepare phase, it will just so happen that there will be somebody to say "no, don't do that". This will become a problem when the switchdev prepare/commit transactional model goes away. Every port needs to think on its own. DSA links can no longer bluff and rely on the fact that the prepare phase will not go through to the end, because there will be no prepare phase any longer. Fix this issue before it becomes a problem, by having the "vid == 0" check earlier than the check whether we are a CPU port / DSA link or not. Also, the "vid == 0" check becomes unnecessary in the .port_vlan_add callback, so we can remove it. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 4834be9e4e864..fb25cb87156af 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1535,13 +1535,13 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, struct mv88e6xxx_vtu_entry vlan; int i, err; + if (!vid) + return -EOPNOTSUPP; + /* DSA and CPU ports have to be members of multiple vlans */ if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) return 0; - if (!vid) - return -EOPNOTSUPP; - vlan.vid = vid - 1; vlan.valid = false; @@ -1920,9 +1920,6 @@ static int mv88e6xxx_port_vlan_join(struct mv88e6xxx_chip *chip, int port, struct mv88e6xxx_vtu_entry vlan; int i, err; - if (!vid) - return -EOPNOTSUPP; - vlan.vid = vid - 1; vlan.valid = false; -- GitLab From ffb68fc58e9640762be891f9aebe4f5aac615ab3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 9 Jan 2021 02:01:48 +0200 Subject: [PATCH 0488/2012] net: switchdev: remove the transaction structure from port object notifiers Since the introduction of the switchdev API, port objects were transmitted to drivers for offloading using a two-step transactional model, with a prepare phase that was supposed to catch all errors, and a commit phase that was supposed to never fail. Some classes of failures can never be avoided, like hardware access, or memory allocation. In the latter case, merely attempting to move the memory allocation to the preparation phase makes it impossible to avoid memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused transaction item queue") which has removed the unused mechanism of passing on the allocated memory between one phase and another. It is time we admit that separating the preparation from the commit phase is something that is best left for the driver to decide, and not something that should be baked into the API, especially since there are no switchdev callers that depend on this. This patch removes the struct switchdev_trans member from switchdev port object notifier structures, and converts drivers to not look at this member. Where driver conversion is trivial (like in the case of the Marvell Prestera driver, NXP DPAA2 switch, TI CPSW, and Rocker drivers), it is done in this patch. Where driver conversion needs more attention (DSA, Mellanox Spectrum), the conversion is left for subsequent patches and here we only fake the prepare/commit phases at a lower level, just not in the switchdev notifier itself. Where the code has a natural structure that is best left alone as a preparation and a commit phase (as in the case of the Ocelot switch), that structure is left in place, just made to not depend upon the switchdev transactional model. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Acked-by: Linus Walleij Acked-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../marvell/prestera/prestera_switchdev.c | 7 +-- .../mellanox/mlxsw/spectrum_switchdev.c | 52 +++++++++------- drivers/net/ethernet/mscc/ocelot_net.c | 25 +++----- drivers/net/ethernet/rocker/rocker_main.c | 15 ++--- drivers/net/ethernet/ti/cpsw_switchdev.c | 17 ++---- drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 59 +++++++++---------- include/net/switchdev.h | 3 - net/dsa/dsa_priv.h | 8 +-- net/dsa/port.c | 8 +-- net/dsa/slave.c | 34 +++-------- net/dsa/switch.c | 36 ++--------- net/switchdev/switchdev.c | 42 ++----------- 12 files changed, 98 insertions(+), 208 deletions(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index c87667c1cca03..3235458a55017 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -1020,7 +1020,6 @@ prestera_bridge_port_vlan_del(struct prestera_port *port, static int prestera_port_vlans_add(struct prestera_port *port, const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans, struct netlink_ext_ack *extack) { bool flag_untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; @@ -1034,9 +1033,6 @@ static int prestera_port_vlans_add(struct prestera_port *port, if (netif_is_bridge_master(dev)) return 0; - if (switchdev_trans_ph_commit(trans)) - return 0; - br_port = prestera_bridge_port_by_dev(sw->swdev, dev); if (WARN_ON(!br_port)) return -EINVAL; @@ -1052,7 +1048,6 @@ static int prestera_port_vlans_add(struct prestera_port *port, static int prestera_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack) { struct prestera_port *port = netdev_priv(dev); @@ -1061,7 +1056,7 @@ static int prestera_port_obj_add(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); - return prestera_port_vlans_add(port, vlan, trans, extack); + return prestera_port_vlans_add(port, vlan, extack); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 7039cff69680e..eff5920aed063 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1704,8 +1704,7 @@ static int mlxsw_sp_port_remove_from_mid(struct mlxsw_sp_port *mlxsw_sp_port, } static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans) + const struct switchdev_obj_port_mdb *mdb) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct net_device *orig_dev = mdb->obj.orig_dev; @@ -1717,9 +1716,6 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid_index; int err = 0; - if (switchdev_trans_ph_commit(trans)) - return 0; - bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); if (!bridge_port) return 0; @@ -1801,32 +1797,38 @@ mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port, static int mlxsw_sp_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); const struct switchdev_obj_port_vlan *vlan; + struct switchdev_trans trans; int err = 0; switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); - err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan, trans, + + trans.ph_prepare = true; + err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan, &trans, extack); - if (switchdev_trans_ph_prepare(trans)) { - /* The event is emitted before the changes are actually - * applied to the bridge. Therefore schedule the respin - * call for later, so that the respin logic sees the - * updated bridge state. - */ - mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); - } + /* The event is emitted before the changes are actually + * applied to the bridge. Therefore schedule the respin + * call for later, so that the respin logic sees the + * updated bridge state. + */ + mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); + + if (err) + break; + + trans.ph_prepare = false; + err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan, &trans, + extack); break; case SWITCHDEV_OBJ_ID_PORT_MDB: err = mlxsw_sp_port_mdb_add(mlxsw_sp_port, - SWITCHDEV_OBJ_PORT_MDB(obj), - trans); + SWITCHDEV_OBJ_PORT_MDB(obj)); break; default: err = -EOPNOTSUPP; @@ -3386,13 +3388,13 @@ out: static int mlxsw_sp_switchdev_vxlan_vlans_add(struct net_device *vxlan_dev, struct switchdev_notifier_port_obj_info * - port_obj_info) + port_obj_info, + struct switchdev_trans *trans) { struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(port_obj_info->obj); bool flag_untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool flag_pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; - struct switchdev_trans *trans = port_obj_info->trans; struct mlxsw_sp_bridge_device *bridge_device; struct netlink_ext_ack *extack; struct mlxsw_sp *mlxsw_sp; @@ -3462,12 +3464,22 @@ mlxsw_sp_switchdev_handle_vxlan_obj_add(struct net_device *vxlan_dev, struct switchdev_notifier_port_obj_info * port_obj_info) { + struct switchdev_trans trans; int err = 0; switch (port_obj_info->obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: + trans.ph_prepare = true; + err = mlxsw_sp_switchdev_vxlan_vlans_add(vxlan_dev, + port_obj_info, + &trans); + if (err) + break; + + trans.ph_prepare = false; err = mlxsw_sp_switchdev_vxlan_vlans_add(vxlan_dev, - port_obj_info); + port_obj_info, + &trans); break; default: break; diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 3b8718b143bbc..16d958a6e2066 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -890,33 +890,27 @@ static int ocelot_port_attr_set(struct net_device *dev, } static int ocelot_port_obj_add_vlan(struct net_device *dev, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) + const struct switchdev_obj_port_vlan *vlan) { bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; int ret; - if (switchdev_trans_ph_prepare(trans)) - ret = ocelot_vlan_vid_prepare(dev, vlan->vid, pvid, untagged); - else - ret = ocelot_vlan_vid_add(dev, vlan->vid, pvid, untagged); + ret = ocelot_vlan_vid_prepare(dev, vlan->vid, pvid, untagged); + if (ret) + return ret; - return ret; + return ocelot_vlan_vid_add(dev, vlan->vid, pvid, untagged); } static int ocelot_port_obj_add_mdb(struct net_device *dev, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans) + const struct switchdev_obj_port_mdb *mdb) { struct ocelot_port_private *priv = netdev_priv(dev); struct ocelot_port *ocelot_port = &priv->port; struct ocelot *ocelot = ocelot_port->ocelot; int port = priv->chip_port; - if (switchdev_trans_ph_prepare(trans)) - return 0; - return ocelot_port_mdb_add(ocelot, port, mdb); } @@ -933,7 +927,6 @@ static int ocelot_port_obj_del_mdb(struct net_device *dev, static int ocelot_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack) { int ret = 0; @@ -941,12 +934,10 @@ static int ocelot_port_obj_add(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: ret = ocelot_port_obj_add_vlan(dev, - SWITCHDEV_OBJ_PORT_VLAN(obj), - trans); + SWITCHDEV_OBJ_PORT_VLAN(obj)); break; case SWITCHDEV_OBJ_ID_PORT_MDB: - ret = ocelot_port_obj_add_mdb(dev, SWITCHDEV_OBJ_PORT_MDB(obj), - trans); + ret = ocelot_port_obj_add_mdb(dev, SWITCHDEV_OBJ_PORT_MDB(obj)); break; default: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index dd0bc7f0aaeee..1018d37593169 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1638,17 +1638,13 @@ rocker_world_port_attr_bridge_ageing_time_set(struct rocker_port *rocker_port, static int rocker_world_port_obj_vlan_add(struct rocker_port *rocker_port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) + const struct switchdev_obj_port_vlan *vlan) { struct rocker_world_ops *wops = rocker_port->rocker->wops; if (!wops->port_obj_vlan_add) return -EOPNOTSUPP; - if (switchdev_trans_ph_prepare(trans)) - return 0; - return wops->port_obj_vlan_add(rocker_port, vlan); } @@ -2102,8 +2098,7 @@ static int rocker_port_attr_set(struct net_device *dev, } static int rocker_port_obj_add(struct net_device *dev, - const struct switchdev_obj *obj, - struct switchdev_trans *trans) + const struct switchdev_obj *obj) { struct rocker_port *rocker_port = netdev_priv(dev); int err = 0; @@ -2111,8 +2106,7 @@ static int rocker_port_obj_add(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: err = rocker_world_port_obj_vlan_add(rocker_port, - SWITCHDEV_OBJ_PORT_VLAN(obj), - trans); + SWITCHDEV_OBJ_PORT_VLAN(obj)); break; default: err = -EOPNOTSUPP; @@ -2847,8 +2841,7 @@ rocker_switchdev_port_obj_event(unsigned long event, struct net_device *netdev, switch (event) { case SWITCHDEV_PORT_OBJ_ADD: - err = rocker_port_obj_add(netdev, port_obj_info->obj, - port_obj_info->trans); + err = rocker_port_obj_add(netdev, port_obj_info->obj); break; case SWITCHDEV_PORT_OBJ_DEL: err = rocker_port_obj_del(netdev, port_obj_info->obj); diff --git a/drivers/net/ethernet/ti/cpsw_switchdev.c b/drivers/net/ethernet/ti/cpsw_switchdev.c index 8a36228acc5d3..3232f483c0686 100644 --- a/drivers/net/ethernet/ti/cpsw_switchdev.c +++ b/drivers/net/ethernet/ti/cpsw_switchdev.c @@ -253,8 +253,7 @@ static int cpsw_port_vlan_del(struct cpsw_priv *priv, u16 vid, } static int cpsw_port_vlans_add(struct cpsw_priv *priv, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) + const struct switchdev_obj_port_vlan *vlan) { bool untag = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; struct net_device *orig_dev = vlan->obj.orig_dev; @@ -267,15 +266,11 @@ static int cpsw_port_vlans_add(struct cpsw_priv *priv, if (cpu_port && !(vlan->flags & BRIDGE_VLAN_INFO_BRENTRY)) return 0; - if (switchdev_trans_ph_prepare(trans)) - return 0; - return cpsw_port_vlan_add(priv, untag, pvid, vlan->vid, orig_dev); } static int cpsw_port_mdb_add(struct cpsw_priv *priv, - struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans) + struct switchdev_obj_port_mdb *mdb) { struct net_device *orig_dev = mdb->obj.orig_dev; @@ -284,9 +279,6 @@ static int cpsw_port_mdb_add(struct cpsw_priv *priv, int port_mask; int err; - if (switchdev_trans_ph_prepare(trans)) - return 0; - if (cpu_port) port_mask = BIT(HOST_PORT_NUM); else @@ -325,7 +317,6 @@ static int cpsw_port_mdb_del(struct cpsw_priv *priv, static int cpsw_port_obj_add(struct net_device *ndev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack) { struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); @@ -338,11 +329,11 @@ static int cpsw_port_obj_add(struct net_device *ndev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = cpsw_port_vlans_add(priv, vlan, trans); + err = cpsw_port_vlans_add(priv, vlan); break; case SWITCHDEV_OBJ_ID_PORT_MDB: case SWITCHDEV_OBJ_ID_HOST_MDB: - err = cpsw_port_mdb_add(priv, mdb, trans); + err = cpsw_port_mdb_add(priv, mdb); break; default: err = -EOPNOTSUPP; diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c index 62edb8d01f4ef..197dea9c3b424 100644 --- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c +++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c @@ -975,33 +975,38 @@ static int dpaa2_switch_port_attr_set(struct net_device *netdev, } static int dpaa2_switch_port_vlans_add(struct net_device *netdev, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) + const struct switchdev_obj_port_vlan *vlan) { struct ethsw_port_priv *port_priv = netdev_priv(netdev); struct ethsw_core *ethsw = port_priv->ethsw_data; struct dpsw_attr *attr = ðsw->sw_attr; int err = 0; - if (switchdev_trans_ph_prepare(trans)) { - /* Make sure that the VLAN is not already configured - * on the switch port - */ - if (port_priv->vlans[vlan->vid] & ETHSW_VLAN_MEMBER) - return -EEXIST; + /* Make sure that the VLAN is not already configured + * on the switch port + */ + if (port_priv->vlans[vlan->vid] & ETHSW_VLAN_MEMBER) + return -EEXIST; - /* Check if there is space for a new VLAN */ - err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle, - ðsw->sw_attr); - if (err) { - netdev_err(netdev, "dpsw_get_attributes err %d\n", err); - return err; - } - if (attr->max_vlans - attr->num_vlans < 1) - return -ENOSPC; + /* Check if there is space for a new VLAN */ + err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle, + ðsw->sw_attr); + if (err) { + netdev_err(netdev, "dpsw_get_attributes err %d\n", err); + return err; + } + if (attr->max_vlans - attr->num_vlans < 1) + return -ENOSPC; - return 0; + /* Check if there is space for a new VLAN */ + err = dpsw_get_attributes(ethsw->mc_io, 0, ethsw->dpsw_handle, + ðsw->sw_attr); + if (err) { + netdev_err(netdev, "dpsw_get_attributes err %d\n", err); + return err; } + if (attr->max_vlans - attr->num_vlans < 1) + return -ENOSPC; if (!port_priv->ethsw_data->vlans[vlan->vid]) { /* this is a new VLAN */ @@ -1033,15 +1038,11 @@ static int dpaa2_switch_port_lookup_address(struct net_device *netdev, int is_uc } static int dpaa2_switch_port_mdb_add(struct net_device *netdev, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans) + const struct switchdev_obj_port_mdb *mdb) { struct ethsw_port_priv *port_priv = netdev_priv(netdev); int err; - if (switchdev_trans_ph_prepare(trans)) - return 0; - /* Check if address is already set on this port */ if (dpaa2_switch_port_lookup_address(netdev, 0, mdb->addr)) return -EEXIST; @@ -1060,21 +1061,18 @@ static int dpaa2_switch_port_mdb_add(struct net_device *netdev, } static int dpaa2_switch_port_obj_add(struct net_device *netdev, - const struct switchdev_obj *obj, - struct switchdev_trans *trans) + const struct switchdev_obj *obj) { int err; switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dpaa2_switch_port_vlans_add(netdev, - SWITCHDEV_OBJ_PORT_VLAN(obj), - trans); + SWITCHDEV_OBJ_PORT_VLAN(obj)); break; case SWITCHDEV_OBJ_ID_PORT_MDB: err = dpaa2_switch_port_mdb_add(netdev, - SWITCHDEV_OBJ_PORT_MDB(obj), - trans); + SWITCHDEV_OBJ_PORT_MDB(obj)); break; default: err = -EOPNOTSUPP; @@ -1394,8 +1392,7 @@ static int dpaa2_switch_port_obj_event(unsigned long event, switch (event) { case SWITCHDEV_PORT_OBJ_ADD: - err = dpaa2_switch_port_obj_add(netdev, port_obj_info->obj, - port_obj_info->trans); + err = dpaa2_switch_port_obj_add(netdev, port_obj_info->obj); break; case SWITCHDEV_PORT_OBJ_DEL: err = dpaa2_switch_port_obj_del(netdev, port_obj_info->obj); diff --git a/include/net/switchdev.h b/include/net/switchdev.h index bac7d3ba574f8..cbe6e35d51f54 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -233,7 +233,6 @@ struct switchdev_notifier_fdb_info { struct switchdev_notifier_port_obj_info { struct switchdev_notifier_info info; /* must be first */ const struct switchdev_obj *obj; - struct switchdev_trans *trans; bool handled; }; @@ -288,7 +287,6 @@ int switchdev_handle_port_obj_add(struct net_device *dev, bool (*check_cb)(const struct net_device *dev), int (*add_cb)(struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack)); int switchdev_handle_port_obj_del(struct net_device *dev, struct switchdev_notifier_port_obj_info *port_obj_info, @@ -372,7 +370,6 @@ switchdev_handle_port_obj_add(struct net_device *dev, bool (*check_cb)(const struct net_device *dev), int (*add_cb)(struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack)) { return 0; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 3822520eeeae9..9eaa85cc87eaa 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -52,7 +52,6 @@ struct dsa_notifier_fdb_info { /* DSA_NOTIFIER_MDB_* */ struct dsa_notifier_mdb_info { const struct switchdev_obj_port_mdb *mdb; - struct switchdev_trans *trans; int sw_index; int port; }; @@ -60,7 +59,6 @@ struct dsa_notifier_mdb_info { /* DSA_NOTIFIER_VLAN_* */ struct dsa_notifier_vlan_info { const struct switchdev_obj_port_vlan *vlan; - struct switchdev_trans *trans; int sw_index; int port; }; @@ -151,8 +149,7 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, u16 vid); int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data); int dsa_port_mdb_add(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans); + const struct switchdev_obj_port_mdb *mdb); int dsa_port_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb); int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags, @@ -162,8 +159,7 @@ int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags, int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, struct switchdev_trans *trans); int dsa_port_vlan_add(struct dsa_port *dp, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans); + const struct switchdev_obj_port_vlan *vlan); int dsa_port_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan); int dsa_port_link_register_of(struct dsa_port *dp); diff --git a/net/dsa/port.c b/net/dsa/port.c index 73569c9af3cc0..6668fe188f477 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -425,13 +425,11 @@ int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data) } int dsa_port_mdb_add(const struct dsa_port *dp, - const struct switchdev_obj_port_mdb *mdb, - struct switchdev_trans *trans) + const struct switchdev_obj_port_mdb *mdb) { struct dsa_notifier_mdb_info info = { .sw_index = dp->ds->index, .port = dp->index, - .trans = trans, .mdb = mdb, }; @@ -451,13 +449,11 @@ int dsa_port_mdb_del(const struct dsa_port *dp, } int dsa_port_vlan_add(struct dsa_port *dp, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) + const struct switchdev_obj_port_vlan *vlan) { struct dsa_notifier_vlan_info info = { .sw_index = dp->ds->index, .port = dp->index, - .trans = trans, .vlan = vlan, }; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 653d64f4a637f..0eefb50aae8c6 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -326,8 +326,7 @@ dsa_slave_vlan_check_for_8021q_uppers(struct net_device *slave, } static int dsa_slave_vlan_add(struct net_device *dev, - const struct switchdev_obj *obj, - struct switchdev_trans *trans) + const struct switchdev_obj *obj) { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); @@ -345,7 +344,7 @@ static int dsa_slave_vlan_add(struct net_device *dev, /* Deny adding a bridge VLAN when there is already an 802.1Q upper with * the same VID. */ - if (trans->ph_prepare && br_vlan_enabled(dp->bridge_dev)) { + if (br_vlan_enabled(dp->bridge_dev)) { rcu_read_lock(); err = dsa_slave_vlan_check_for_8021q_uppers(dev, &vlan); rcu_read_unlock(); @@ -353,7 +352,7 @@ static int dsa_slave_vlan_add(struct net_device *dev, return err; } - err = dsa_port_vlan_add(dp, &vlan, trans); + err = dsa_port_vlan_add(dp, &vlan); if (err) return err; @@ -363,7 +362,7 @@ static int dsa_slave_vlan_add(struct net_device *dev, */ vlan.flags &= ~BRIDGE_VLAN_INFO_PVID; - err = dsa_port_vlan_add(dp->cpu_dp, &vlan, trans); + err = dsa_port_vlan_add(dp->cpu_dp, &vlan); if (err) return err; @@ -372,7 +371,6 @@ static int dsa_slave_vlan_add(struct net_device *dev, static int dsa_slave_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_slave_to_port(dev); @@ -387,17 +385,16 @@ static int dsa_slave_port_obj_add(struct net_device *dev, case SWITCHDEV_OBJ_ID_PORT_MDB: if (obj->orig_dev != dev) return -EOPNOTSUPP; - err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans); + err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_HOST_MDB: /* DSA can directly translate this to a normal MDB add, * but on the CPU port. */ - err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj), - trans); + err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = dsa_slave_vlan_add(dev, obj, trans); + err = dsa_slave_vlan_add(dev, obj); break; default: err = -EOPNOTSUPP; @@ -1286,28 +1283,15 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, /* This API only allows programming tagged, non-PVID VIDs */ .flags = 0, }; - struct switchdev_trans trans; int ret; /* User port... */ - trans.ph_prepare = true; - ret = dsa_port_vlan_add(dp, &vlan, &trans); - if (ret) - return ret; - - trans.ph_prepare = false; - ret = dsa_port_vlan_add(dp, &vlan, &trans); + ret = dsa_port_vlan_add(dp, &vlan); if (ret) return ret; /* And CPU port... */ - trans.ph_prepare = true; - ret = dsa_port_vlan_add(dp->cpu_dp, &vlan, &trans); - if (ret) - return ret; - - trans.ph_prepare = false; - ret = dsa_port_vlan_add(dp->cpu_dp, &vlan, &trans); + ret = dsa_port_vlan_add(dp->cpu_dp, &vlan); if (ret) return ret; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 3fb362b6874e3..5b0bf29e13752 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -190,8 +190,8 @@ static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port, return false; } -static int dsa_switch_mdb_prepare(struct dsa_switch *ds, - struct dsa_notifier_mdb_info *info) +static int dsa_switch_mdb_add(struct dsa_switch *ds, + struct dsa_notifier_mdb_info *info) { int port, err; @@ -206,20 +206,6 @@ static int dsa_switch_mdb_prepare(struct dsa_switch *ds, } } - return 0; -} - -static int dsa_switch_mdb_add(struct dsa_switch *ds, - struct dsa_notifier_mdb_info *info) -{ - int port; - - if (switchdev_trans_ph_prepare(info->trans)) - return dsa_switch_mdb_prepare(ds, info); - - if (!ds->ops->port_mdb_add) - return 0; - for (port = 0; port < ds->num_ports; port++) if (dsa_switch_mdb_match(ds, port, info)) ds->ops->port_mdb_add(ds, port, info->mdb); @@ -251,8 +237,8 @@ static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port, return false; } -static int dsa_switch_vlan_prepare(struct dsa_switch *ds, - struct dsa_notifier_vlan_info *info) +static int dsa_switch_vlan_add(struct dsa_switch *ds, + struct dsa_notifier_vlan_info *info) { int port, err; @@ -267,20 +253,6 @@ static int dsa_switch_vlan_prepare(struct dsa_switch *ds, } } - return 0; -} - -static int dsa_switch_vlan_add(struct dsa_switch *ds, - struct dsa_notifier_vlan_info *info) -{ - int port; - - if (switchdev_trans_ph_prepare(info->trans)) - return dsa_switch_vlan_prepare(ds, info); - - if (!ds->ops->port_vlan_add) - return 0; - for (port = 0; port < ds->num_ports; port++) if (dsa_switch_vlan_match(ds, port, info)) ds->ops->port_vlan_add(ds, port, info->vlan); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 23d8685453627..a575bb33ee6cd 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -221,7 +221,6 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj) static int switchdev_port_obj_notify(enum switchdev_notifier_type nt, struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack) { int rc; @@ -229,7 +228,6 @@ static int switchdev_port_obj_notify(enum switchdev_notifier_type nt, struct switchdev_notifier_port_obj_info obj_info = { .obj = obj, - .trans = trans, .handled = false, }; @@ -248,35 +246,10 @@ static int switchdev_port_obj_add_now(struct net_device *dev, const struct switchdev_obj *obj, struct netlink_ext_ack *extack) { - struct switchdev_trans trans; - int err; - ASSERT_RTNL(); - /* Phase I: prepare for obj add. Driver/device should fail - * here if there are going to be issues in the commit phase, - * such as lack of resources or support. The driver/device - * should reserve resources needed for the commit phase here, - * but should not commit the obj. - */ - - trans.ph_prepare = true; - err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD, - dev, obj, &trans, extack); - if (err) - return err; - - /* Phase II: commit obj add. This cannot fail as a fault - * of driver/device. If it does, it's a bug in the driver/device - * because the driver said everythings was OK in phase I. - */ - - trans.ph_prepare = false; - err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD, - dev, obj, &trans, extack); - WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); - - return err; + return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD, + dev, obj, extack); } static void switchdev_port_obj_add_deferred(struct net_device *dev, @@ -307,10 +280,6 @@ static int switchdev_port_obj_add_defer(struct net_device *dev, * @obj: object to add * @extack: netlink extended ack * - * Use a 2-phase prepare-commit transaction model to ensure - * system is not left in a partially updated state due to - * failure from driver/device. - * * rtnl_lock must be held and must not be in atomic section, * in case SWITCHDEV_F_DEFER flag is not set. */ @@ -329,7 +298,7 @@ static int switchdev_port_obj_del_now(struct net_device *dev, const struct switchdev_obj *obj) { return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_DEL, - dev, obj, NULL, NULL); + dev, obj, NULL); } static void switchdev_port_obj_del_deferred(struct net_device *dev, @@ -449,7 +418,6 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev, bool (*check_cb)(const struct net_device *dev), int (*add_cb)(struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack)) { struct netlink_ext_ack *extack; @@ -462,8 +430,7 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev, if (check_cb(dev)) { /* This flag is only checked if the return value is success. */ port_obj_info->handled = true; - return add_cb(dev, port_obj_info->obj, port_obj_info->trans, - extack); + return add_cb(dev, port_obj_info->obj, extack); } /* Switch ports might be stacked under e.g. a LAG. Ignore the @@ -491,7 +458,6 @@ int switchdev_handle_port_obj_add(struct net_device *dev, bool (*check_cb)(const struct net_device *dev), int (*add_cb)(struct net_device *dev, const struct switchdev_obj *obj, - struct switchdev_trans *trans, struct netlink_ext_ack *extack)) { int err; -- GitLab From cf6def51badebbacaedd3999a4d94761197e18b4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 9 Jan 2021 02:01:49 +0200 Subject: [PATCH 0489/2012] net: switchdev: delete switchdev_port_obj_add_now After the removal of the transactional model inside switchdev_port_obj_add_now, it has no added value and we can just call switchdev_port_obj_notify directly, bypassing this function. Let's delete it. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Reviewed-by: Ido Schimmel Acked-by: Linus Walleij Acked-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- net/switchdev/switchdev.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index a575bb33ee6cd..3509d362056d3 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -242,23 +242,15 @@ static int switchdev_port_obj_notify(enum switchdev_notifier_type nt, return 0; } -static int switchdev_port_obj_add_now(struct net_device *dev, - const struct switchdev_obj *obj, - struct netlink_ext_ack *extack) -{ - ASSERT_RTNL(); - - return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD, - dev, obj, extack); -} - static void switchdev_port_obj_add_deferred(struct net_device *dev, const void *data) { const struct switchdev_obj *obj = data; int err; - err = switchdev_port_obj_add_now(dev, obj, NULL); + ASSERT_RTNL(); + err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD, + dev, obj, NULL); if (err && err != -EOPNOTSUPP) netdev_err(dev, "failed (err=%d) to add object (id=%d)\n", err, obj->id); @@ -290,7 +282,8 @@ int switchdev_port_obj_add(struct net_device *dev, if (obj->flags & SWITCHDEV_F_DEFER) return switchdev_port_obj_add_defer(dev, obj); ASSERT_RTNL(); - return switchdev_port_obj_add_now(dev, obj, extack); + return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD, + dev, obj, extack); } EXPORT_SYMBOL_GPL(switchdev_port_obj_add); -- GitLab From bae33f2b5afea932176c1b9096851c81dc0983de Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 9 Jan 2021 02:01:50 +0200 Subject: [PATCH 0490/2012] net: switchdev: remove the transaction structure from port attributes Since the introduction of the switchdev API, port attributes were transmitted to drivers for offloading using a two-step transactional model, with a prepare phase that was supposed to catch all errors, and a commit phase that was supposed to never fail. Some classes of failures can never be avoided, like hardware access, or memory allocation. In the latter case, merely attempting to move the memory allocation to the preparation phase makes it impossible to avoid memory leaks, since commit 91cf8eceffc1 ("switchdev: Remove unused transaction item queue") which has removed the unused mechanism of passing on the allocated memory between one phase and another. It is time we admit that separating the preparation from the commit phase is something that is best left for the driver to decide, and not something that should be baked into the API, especially since there are no switchdev callers that depend on this. This patch removes the struct switchdev_trans member from switchdev port attribute notifier structures, and converts drivers to not look at this member. In part, this patch contains a revert of my previous commit 2e554a7a5d8a ("net: dsa: propagate switchdev vlan_filtering prepare phase to drivers"). For the most part, the conversion was trivial except for: - Rocker's world implementation based on Broadcom OF-DPA had an odd implementation of ofdpa_port_attr_bridge_flags_set. The conversion was done mechanically, by pasting the implementation twice, then only keeping the code that would get executed during prepare phase on top, then only keeping the code that gets executed during the commit phase on bottom, then simplifying the resulting code until this was obtained. - DSA's offloading of STP state, bridge flags, VLAN filtering and multicast router could be converted right away. But the ageing time could not, so a shim was introduced and this was left for a further commit. Signed-off-by: Vladimir Oltean Acked-by: Linus Walleij Acked-by: Jiri Pirko Reviewed-by: Kurt Kanzenbach # hellcreek Reviewed-by: Linus Walleij # RTL8366RB Reviewed-by: Ido Schimmel Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 6 +- drivers/net/dsa/b53/b53_priv.h | 3 +- drivers/net/dsa/dsa_loop.c | 3 +- drivers/net/dsa/hirschmann/hellcreek.c | 6 +- drivers/net/dsa/lantiq_gswip.c | 26 +---- drivers/net/dsa/microchip/ksz8795.c | 6 +- drivers/net/dsa/microchip/ksz9477.c | 6 +- drivers/net/dsa/mt7530.c | 6 +- drivers/net/dsa/mv88e6xxx/chip.c | 7 +- drivers/net/dsa/ocelot/felix.c | 5 +- drivers/net/dsa/qca8k.c | 6 +- drivers/net/dsa/realtek-smi-core.h | 3 +- drivers/net/dsa/rtl8366.c | 11 +-- drivers/net/dsa/sja1105/sja1105.h | 3 +- drivers/net/dsa/sja1105/sja1105_devlink.c | 9 +- drivers/net/dsa/sja1105/sja1105_main.c | 17 ++-- .../marvell/prestera/prestera_switchdev.c | 37 ++----- .../mellanox/mlxsw/spectrum_switchdev.c | 63 +++--------- drivers/net/ethernet/mscc/ocelot.c | 32 ++---- drivers/net/ethernet/mscc/ocelot_net.c | 13 +-- drivers/net/ethernet/rocker/rocker.h | 6 +- drivers/net/ethernet/rocker/rocker_main.c | 46 +++------ drivers/net/ethernet/rocker/rocker_ofdpa.c | 23 ++--- drivers/net/ethernet/ti/cpsw_switchdev.c | 20 +--- drivers/staging/fsl-dpaa2/ethsw/ethsw.c | 21 +--- include/net/dsa.h | 3 +- include/net/switchdev.h | 7 +- include/soc/mscc/ocelot.h | 3 +- net/dsa/dsa_priv.h | 18 ++-- net/dsa/port.c | 97 ++++++++----------- net/dsa/slave.c | 17 ++-- net/dsa/switch.c | 11 +-- net/switchdev/switchdev.c | 46 +-------- 33 files changed, 162 insertions(+), 424 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index bab174c052b3e..f07f547db53e3 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1374,14 +1374,10 @@ void b53_phylink_mac_link_up(struct dsa_switch *ds, int port, } EXPORT_SYMBOL(b53_phylink_mac_link_up); -int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, - struct switchdev_trans *trans) +int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) { struct b53_device *dev = ds->priv; - if (switchdev_trans_ph_prepare(trans)) - return 0; - b53_enable_vlan(dev, dev->vlan_enabled, vlan_filtering); return 0; diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 6d0c724763c70..9ab0fb409f78a 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -348,8 +348,7 @@ void b53_phylink_mac_link_up(struct dsa_switch *ds, int port, struct phy_device *phydev, int speed, int duplex, bool tx_pause, bool rx_pause); -int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, - struct switchdev_trans *trans); +int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering); int b53_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); void b53_vlan_add(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index 3be9f665d174b..8e3d623f4dbd2 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -190,8 +190,7 @@ static void dsa_loop_port_stp_state_set(struct dsa_switch *ds, int port, } static int dsa_loop_port_vlan_filtering(struct dsa_switch *ds, int port, - bool vlan_filtering, - struct switchdev_trans *trans) + bool vlan_filtering) { dev_dbg(ds->dev, "%s: port: %d, vlan_filtering: %d\n", __func__, port, vlan_filtering); diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index a59cc40170fc4..4e1dbf91720c8 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -858,14 +858,10 @@ static int hellcreek_fdb_dump(struct dsa_switch *ds, int port, } static int hellcreek_vlan_filtering(struct dsa_switch *ds, int port, - bool vlan_filtering, - struct switchdev_trans *trans) + bool vlan_filtering) { struct hellcreek *hellcreek = ds->priv; - if (switchdev_trans_ph_prepare(trans)) - return 0; - dev_dbg(hellcreek->dev, "%s VLAN filtering on port %d\n", vlan_filtering ? "Enable" : "Disable", port); diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 72cadd16056fd..1dddca92d17ee 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -727,23 +727,14 @@ static int gswip_pce_load_microcode(struct gswip_priv *priv) } static int gswip_port_vlan_filtering(struct dsa_switch *ds, int port, - bool vlan_filtering, - struct switchdev_trans *trans) + bool vlan_filtering) { + struct net_device *bridge = dsa_to_port(ds, port)->bridge_dev; struct gswip_priv *priv = ds->priv; /* Do not allow changing the VLAN filtering options while in bridge */ - if (switchdev_trans_ph_prepare(trans)) { - struct net_device *bridge = dsa_to_port(ds, port)->bridge_dev; - - if (!bridge) - return 0; - - if (!!(priv->port_vlan_filter & BIT(port)) != vlan_filtering) - return -EIO; - - return 0; - } + if (bridge && !!(priv->port_vlan_filter & BIT(port)) != vlan_filtering) + return -EIO; if (vlan_filtering) { /* Use port based VLAN tag */ @@ -781,15 +772,8 @@ static int gswip_setup(struct dsa_switch *ds) /* disable port fetch/store dma on all ports */ for (i = 0; i < priv->hw_info->max_ports; i++) { - struct switchdev_trans trans; - - /* Skip the prepare phase, this shouldn't return an error - * during setup. - */ - trans.ph_prepare = false; - gswip_port_disable(ds, i); - gswip_port_vlan_filtering(ds, i, false, &trans); + gswip_port_vlan_filtering(ds, i, false); } /* enable Switch */ diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index a4c814f6a4b56..6a9ec8a0f92f8 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -783,14 +783,10 @@ static void ksz8795_flush_dyn_mac_table(struct ksz_device *dev, int port) } static int ksz8795_port_vlan_filtering(struct dsa_switch *ds, int port, - bool flag, - struct switchdev_trans *trans) + bool flag) { struct ksz_device *dev = ds->priv; - if (switchdev_trans_ph_prepare(trans)) - return 0; - ksz_cfg(dev, S_MIRROR_CTRL, SW_VLAN_ENABLE, flag); return 0; diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 5a6ac0749ab0e..446c088ce5c56 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -493,14 +493,10 @@ static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port) } static int ksz9477_port_vlan_filtering(struct dsa_switch *ds, int port, - bool flag, - struct switchdev_trans *trans) + bool flag) { struct ksz_device *dev = ds->priv; - if (switchdev_trans_ph_prepare(trans)) - return 0; - if (flag) { ksz_port_cfg(dev, port, REG_PORT_LUE_CTRL, PORT_VLAN_LOOKUP_VID_0, true); diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 31d2d23bc8157..fcaddc9c93701 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1376,12 +1376,8 @@ mt7530_vlan_cmd(struct mt7530_priv *priv, enum mt7530_vlan_cmd cmd, u16 vid) static int mt7530_port_vlan_filtering(struct dsa_switch *ds, int port, - bool vlan_filtering, - struct switchdev_trans *trans) + bool vlan_filtering) { - if (switchdev_trans_ph_prepare(trans)) - return 0; - if (vlan_filtering) { /* The port is being kept as VLAN-unaware port when bridge is * set up with vlan_filtering not being set, Otherwise, the diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index fb25cb87156af..bbf1a71ce55cf 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1583,16 +1583,15 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, } static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, - bool vlan_filtering, - struct switchdev_trans *trans) + bool vlan_filtering) { struct mv88e6xxx_chip *chip = ds->priv; u16 mode = vlan_filtering ? MV88E6XXX_PORT_CTL2_8021Q_MODE_SECURE : MV88E6XXX_PORT_CTL2_8021Q_MODE_DISABLED; int err; - if (switchdev_trans_ph_prepare(trans)) - return mv88e6xxx_max_vid(chip) ? 0 : -EOPNOTSUPP; + if (!mv88e6xxx_max_vid(chip)) + return -EOPNOTSUPP; mv88e6xxx_reg_lock(chip); err = mv88e6xxx_port_set_8021q_mode(chip, port, mode); diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 216fcfbc5dafd..8c4cd9168dbaf 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -134,12 +134,11 @@ static int felix_vlan_prepare(struct dsa_switch *ds, int port, flags & BRIDGE_VLAN_INFO_UNTAGGED); } -static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, - struct switchdev_trans *trans) +static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled) { struct ocelot *ocelot = ds->priv; - return ocelot_port_vlan_filtering(ocelot, port, enabled, trans); + return ocelot_port_vlan_filtering(ocelot, port, enabled); } static void felix_vlan_add(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index df99c696b6889..1de6473b221bd 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1294,14 +1294,10 @@ qca8k_port_fdb_dump(struct dsa_switch *ds, int port, } static int -qca8k_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, - struct switchdev_trans *trans) +qca8k_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) { struct qca8k_priv *priv = ds->priv; - if (switchdev_trans_ph_prepare(trans)) - return 0; - if (vlan_filtering) { qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), QCA8K_PORT_LOOKUP_VLAN_MODE, diff --git a/drivers/net/dsa/realtek-smi-core.h b/drivers/net/dsa/realtek-smi-core.h index 6b6a3dec09840..bc7bd47fb0375 100644 --- a/drivers/net/dsa/realtek-smi-core.h +++ b/drivers/net/dsa/realtek-smi-core.h @@ -131,8 +131,7 @@ int rtl8366_enable_vlan(struct realtek_smi *smi, bool enable); int rtl8366_reset_vlan(struct realtek_smi *smi); int rtl8366_init_vlan(struct realtek_smi *smi); int rtl8366_vlan_filtering(struct dsa_switch *ds, int port, - bool vlan_filtering, - struct switchdev_trans *trans); + bool vlan_filtering); int rtl8366_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); void rtl8366_vlan_add(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/rtl8366.c index 1a8f93112bc69..27f429aa89a6a 100644 --- a/drivers/net/dsa/rtl8366.c +++ b/drivers/net/dsa/rtl8366.c @@ -340,20 +340,15 @@ int rtl8366_init_vlan(struct realtek_smi *smi) } EXPORT_SYMBOL_GPL(rtl8366_init_vlan); -int rtl8366_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering, - struct switchdev_trans *trans) +int rtl8366_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) { struct realtek_smi *smi = ds->priv; struct rtl8366_vlan_4k vlan4k; int ret; /* Use VLAN nr port + 1 since VLAN0 is not valid */ - if (switchdev_trans_ph_prepare(trans)) { - if (!smi->ops->is_vlan_valid(smi, port + 1)) - return -EINVAL; - - return 0; - } + if (!smi->ops->is_vlan_valid(smi, port + 1)) + return -EINVAL; dev_info(smi->dev, "%s filtering on port %d\n", vlan_filtering ? "enable" : "disable", diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index 4ebc4a5a7b355..d582308c24016 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -245,8 +245,7 @@ enum sja1105_reset_reason { int sja1105_static_config_reload(struct sja1105_private *priv, enum sja1105_reset_reason reason); -int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, - struct switchdev_trans *trans); +int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled); void sja1105_frame_memory_partitioning(struct sja1105_private *priv); /* From sja1105_devlink.c */ diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c index 4a2ec395bcb00..b4bf1b10e66ca 100644 --- a/drivers/net/dsa/sja1105/sja1105_devlink.c +++ b/drivers/net/dsa/sja1105/sja1105_devlink.c @@ -135,7 +135,6 @@ static int sja1105_best_effort_vlan_filtering_set(struct sja1105_private *priv, rtnl_lock(); for (port = 0; port < ds->num_ports; port++) { - struct switchdev_trans trans; struct dsa_port *dp; if (!dsa_is_user_port(ds, port)) @@ -144,13 +143,7 @@ static int sja1105_best_effort_vlan_filtering_set(struct sja1105_private *priv, dp = dsa_to_port(ds, port); vlan_filtering = dsa_port_is_vlan_filtering(dp); - trans.ph_prepare = true; - rc = sja1105_vlan_filtering(ds, port, vlan_filtering, &trans); - if (rc) - break; - - trans.ph_prepare = false; - rc = sja1105_vlan_filtering(ds, port, vlan_filtering, &trans); + rc = sja1105_vlan_filtering(ds, port, vlan_filtering); if (rc) break; } diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 807e65ac2518a..c1b7f2b0e40cb 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2631,8 +2631,7 @@ static int sja1105_vlan_prepare(struct dsa_switch *ds, int port, * which can only be partially reconfigured at runtime (and not the TPID). * So a switch reset is required. */ -int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, - struct switchdev_trans *trans) +int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled) { struct sja1105_l2_lookup_params_entry *l2_lookup_params; struct sja1105_general_params_entry *general_params; @@ -2644,16 +2643,12 @@ int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, u16 tpid, tpid2; int rc; - if (switchdev_trans_ph_prepare(trans)) { - list_for_each_entry(rule, &priv->flow_block.rules, list) { - if (rule->type == SJA1105_RULE_VL) { - dev_err(ds->dev, - "Cannot change VLAN filtering with active VL rules\n"); - return -EBUSY; - } + list_for_each_entry(rule, &priv->flow_block.rules, list) { + if (rule->type == SJA1105_RULE_VL) { + dev_err(ds->dev, + "Cannot change VLAN filtering with active VL rules\n"); + return -EBUSY; } - - return 0; } if (enabled) { diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index 3235458a55017..e2374a39e4f8a 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -580,16 +580,12 @@ int prestera_bridge_port_event(struct net_device *dev, unsigned long event, } static int prestera_port_attr_br_flags_set(struct prestera_port *port, - struct switchdev_trans *trans, struct net_device *dev, unsigned long flags) { struct prestera_bridge_port *br_port; int err; - if (switchdev_trans_ph_prepare(trans)) - return 0; - br_port = prestera_bridge_port_by_dev(port->sw->swdev, dev); if (!br_port) return 0; @@ -608,35 +604,26 @@ static int prestera_port_attr_br_flags_set(struct prestera_port *port, } static int prestera_port_attr_br_ageing_set(struct prestera_port *port, - struct switchdev_trans *trans, unsigned long ageing_clock_t) { unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock_t); u32 ageing_time_ms = jiffies_to_msecs(ageing_jiffies); struct prestera_switch *sw = port->sw; - if (switchdev_trans_ph_prepare(trans)) { - if (ageing_time_ms < PRESTERA_MIN_AGEING_TIME_MS || - ageing_time_ms > PRESTERA_MAX_AGEING_TIME_MS) - return -ERANGE; - else - return 0; - } + if (ageing_time_ms < PRESTERA_MIN_AGEING_TIME_MS || + ageing_time_ms > PRESTERA_MAX_AGEING_TIME_MS) + return -ERANGE; return prestera_hw_switch_ageing_set(sw, ageing_time_ms); } static int prestera_port_attr_br_vlan_set(struct prestera_port *port, - struct switchdev_trans *trans, struct net_device *dev, bool vlan_enabled) { struct prestera_switch *sw = port->sw; struct prestera_bridge *bridge; - if (!switchdev_trans_ph_prepare(trans)) - return 0; - bridge = prestera_bridge_by_dev(sw->swdev, dev); if (WARN_ON(!bridge)) return -EINVAL; @@ -666,7 +653,6 @@ static int prestera_port_bridge_vlan_stp_set(struct prestera_port *port, } static int presterar_port_attr_stp_state_set(struct prestera_port *port, - struct switchdev_trans *trans, struct net_device *dev, u8 state) { @@ -675,9 +661,6 @@ static int presterar_port_attr_stp_state_set(struct prestera_port *port, int err; u16 vid; - if (switchdev_trans_ph_prepare(trans)) - return 0; - br_port = prestera_bridge_port_by_dev(port->sw->swdev, dev); if (!br_port) return 0; @@ -712,16 +695,14 @@ err_port_stp_set: } static int prestera_port_obj_attr_set(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) + const struct switchdev_attr *attr) { struct prestera_port *port = netdev_priv(dev); int err = 0; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - err = presterar_port_attr_stp_state_set(port, trans, - attr->orig_dev, + err = presterar_port_attr_stp_state_set(port, attr->orig_dev, attr->u.stp_state); break; case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: @@ -730,17 +711,15 @@ static int prestera_port_obj_attr_set(struct net_device *dev, err = -EINVAL; break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - err = prestera_port_attr_br_flags_set(port, trans, - attr->orig_dev, + err = prestera_port_attr_br_flags_set(port, attr->orig_dev, attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: - err = prestera_port_attr_br_ageing_set(port, trans, + err = prestera_port_attr_br_ageing_set(port, attr->u.ageing_time); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: - err = prestera_port_attr_br_vlan_set(port, trans, - attr->orig_dev, + err = prestera_port_attr_br_vlan_set(port, attr->orig_dev, attr->u.vlan_filtering); break; default: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index eff5920aed063..409c206f813fc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -527,7 +527,6 @@ mlxsw_sp_port_bridge_vlan_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, } static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_trans *trans, struct net_device *orig_dev, u8 state) { @@ -535,9 +534,6 @@ static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_vlan *bridge_vlan; int err; - if (switchdev_trans_ph_prepare(trans)) - return 0; - /* It's possible we failed to enslave the port, yet this * operation is executed due to it being deferred. */ @@ -659,7 +655,6 @@ err_port_bridge_vlan_learning_set: static int mlxsw_sp_port_attr_br_pre_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_trans *trans, unsigned long brport_flags) { if (brport_flags & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD)) @@ -669,16 +664,12 @@ static int mlxsw_sp_port_attr_br_pre_flags_set(struct mlxsw_sp_port } static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_trans *trans, struct net_device *orig_dev, unsigned long brport_flags) { struct mlxsw_sp_bridge_port *bridge_port; int err; - if (switchdev_trans_ph_prepare(trans)) - return 0; - bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge, orig_dev); if (!bridge_port) @@ -724,35 +715,26 @@ static int mlxsw_sp_ageing_set(struct mlxsw_sp *mlxsw_sp, u32 ageing_time) } static int mlxsw_sp_port_attr_br_ageing_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_trans *trans, unsigned long ageing_clock_t) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock_t); u32 ageing_time = jiffies_to_msecs(ageing_jiffies) / 1000; - if (switchdev_trans_ph_prepare(trans)) { - if (ageing_time < MLXSW_SP_MIN_AGEING_TIME || - ageing_time > MLXSW_SP_MAX_AGEING_TIME) - return -ERANGE; - else - return 0; - } + if (ageing_time < MLXSW_SP_MIN_AGEING_TIME || + ageing_time > MLXSW_SP_MAX_AGEING_TIME) + return -ERANGE; return mlxsw_sp_ageing_set(mlxsw_sp, ageing_time); } static int mlxsw_sp_port_attr_br_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_trans *trans, struct net_device *orig_dev, bool vlan_enabled) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_bridge_device *bridge_device; - if (!switchdev_trans_ph_prepare(trans)) - return 0; - bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev); if (WARN_ON(!bridge_device)) return -EINVAL; @@ -765,16 +747,12 @@ static int mlxsw_sp_port_attr_br_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, } static int mlxsw_sp_port_attr_br_vlan_proto_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_trans *trans, struct net_device *orig_dev, u16 vlan_proto) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_bridge_device *bridge_device; - if (!switchdev_trans_ph_prepare(trans)) - return 0; - bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev); if (WARN_ON(!bridge_device)) return -EINVAL; @@ -784,16 +762,12 @@ static int mlxsw_sp_port_attr_br_vlan_proto_set(struct mlxsw_sp_port *mlxsw_sp_p } static int mlxsw_sp_port_attr_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_trans *trans, struct net_device *orig_dev, bool is_port_mrouter) { struct mlxsw_sp_bridge_port *bridge_port; int err; - if (switchdev_trans_ph_prepare(trans)) - return 0; - bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge, orig_dev); if (!bridge_port) @@ -825,7 +799,6 @@ static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port) } static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_trans *trans, struct net_device *orig_dev, bool mc_disabled) { @@ -834,9 +807,6 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_port *bridge_port; int err; - if (switchdev_trans_ph_prepare(trans)) - return 0; - /* It's possible we failed to enslave the port, yet this * operation is executed due to it being deferred. */ @@ -896,16 +866,12 @@ mlxsw_sp_bridge_mrouter_update_mdb(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_port_attr_br_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_trans *trans, struct net_device *orig_dev, bool is_mrouter) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_bridge_device *bridge_device; - if (switchdev_trans_ph_prepare(trans)) - return 0; - /* It's possible we failed to enslave the port, yet this * operation is executed due to it being deferred. */ @@ -921,54 +887,52 @@ mlxsw_sp_port_attr_br_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port, } static int mlxsw_sp_port_attr_set(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) + const struct switchdev_attr *attr) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); int err; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - err = mlxsw_sp_port_attr_stp_state_set(mlxsw_sp_port, trans, + err = mlxsw_sp_port_attr_stp_state_set(mlxsw_sp_port, attr->orig_dev, attr->u.stp_state); break; case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: err = mlxsw_sp_port_attr_br_pre_flags_set(mlxsw_sp_port, - trans, attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, trans, + err = mlxsw_sp_port_attr_br_flags_set(mlxsw_sp_port, attr->orig_dev, attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: - err = mlxsw_sp_port_attr_br_ageing_set(mlxsw_sp_port, trans, + err = mlxsw_sp_port_attr_br_ageing_set(mlxsw_sp_port, attr->u.ageing_time); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: - err = mlxsw_sp_port_attr_br_vlan_set(mlxsw_sp_port, trans, + err = mlxsw_sp_port_attr_br_vlan_set(mlxsw_sp_port, attr->orig_dev, attr->u.vlan_filtering); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_PROTOCOL: - err = mlxsw_sp_port_attr_br_vlan_proto_set(mlxsw_sp_port, trans, + err = mlxsw_sp_port_attr_br_vlan_proto_set(mlxsw_sp_port, attr->orig_dev, attr->u.vlan_protocol); break; case SWITCHDEV_ATTR_ID_PORT_MROUTER: - err = mlxsw_sp_port_attr_mrouter_set(mlxsw_sp_port, trans, + err = mlxsw_sp_port_attr_mrouter_set(mlxsw_sp_port, attr->orig_dev, attr->u.mrouter); break; case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED: - err = mlxsw_sp_port_mc_disabled_set(mlxsw_sp_port, trans, + err = mlxsw_sp_port_mc_disabled_set(mlxsw_sp_port, attr->orig_dev, attr->u.mc_disabled); break; case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER: - err = mlxsw_sp_port_attr_br_mrouter_set(mlxsw_sp_port, trans, + err = mlxsw_sp_port_attr_br_mrouter_set(mlxsw_sp_port, attr->orig_dev, attr->u.mrouter); break; @@ -977,8 +941,7 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, break; } - if (switchdev_trans_ph_commit(trans)) - mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); + mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); return err; } diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 0b9992bd66262..af620f7ce4690 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -208,25 +208,20 @@ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, } int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, - bool vlan_aware, struct switchdev_trans *trans) + bool vlan_aware) { + struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1]; struct ocelot_port *ocelot_port = ocelot->ports[port]; + struct ocelot_vcap_filter *filter; u32 val; - if (switchdev_trans_ph_prepare(trans)) { - struct ocelot_vcap_block *block = &ocelot->block[VCAP_IS1]; - struct ocelot_vcap_filter *filter; - - list_for_each_entry(filter, &block->rules, list) { - if (filter->ingress_port_mask & BIT(port) && - filter->action.vid_replace_ena) { - dev_err(ocelot->dev, - "Cannot change VLAN state with vlan modify rules active\n"); - return -EBUSY; - } + list_for_each_entry(filter, &block->rules, list) { + if (filter->ingress_port_mask & BIT(port) && + filter->action.vid_replace_ena) { + dev_err(ocelot->dev, + "Cannot change VLAN state with vlan modify rules active\n"); + return -EBUSY; } - - return 0; } ocelot_port->vlan_aware = vlan_aware; @@ -1179,7 +1174,6 @@ int ocelot_port_bridge_leave(struct ocelot *ocelot, int port, struct net_device *bridge) { struct ocelot_vlan pvid = {0}, native_vlan = {0}; - struct switchdev_trans trans; int ret; ocelot->bridge_mask &= ~BIT(port); @@ -1187,13 +1181,7 @@ int ocelot_port_bridge_leave(struct ocelot *ocelot, int port, if (!ocelot->bridge_mask) ocelot->hw_bridge_dev = NULL; - trans.ph_prepare = true; - ret = ocelot_port_vlan_filtering(ocelot, port, false, &trans); - if (ret) - return ret; - - trans.ph_prepare = false; - ret = ocelot_port_vlan_filtering(ocelot, port, false, &trans); + ret = ocelot_port_vlan_filtering(ocelot, port, false); if (ret) return ret; diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 16d958a6e2066..4fb9095be3eab 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -825,12 +825,8 @@ static const struct ethtool_ops ocelot_ethtool_ops = { }; static void ocelot_port_attr_stp_state_set(struct ocelot *ocelot, int port, - struct switchdev_trans *trans, u8 state) { - if (switchdev_trans_ph_prepare(trans)) - return; - ocelot_bridge_stp_state_set(ocelot, port, state); } @@ -858,8 +854,7 @@ static void ocelot_port_attr_mc_set(struct ocelot *ocelot, int port, bool mc) } static int ocelot_port_attr_set(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) + const struct switchdev_attr *attr) { struct ocelot_port_private *priv = netdev_priv(dev); struct ocelot *ocelot = priv->port.ocelot; @@ -868,15 +863,13 @@ static int ocelot_port_attr_set(struct net_device *dev, switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - ocelot_port_attr_stp_state_set(ocelot, port, trans, - attr->u.stp_state); + ocelot_port_attr_stp_state_set(ocelot, port, attr->u.stp_state); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: ocelot_port_attr_ageing_set(ocelot, port, attr->u.ageing_time); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: - ocelot_port_vlan_filtering(ocelot, port, - attr->u.vlan_filtering, trans); + ocelot_port_vlan_filtering(ocelot, port, attr->u.vlan_filtering); break; case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED: ocelot_port_attr_mc_set(ocelot, port, !attr->u.mc_disabled); diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index 6fad25321dc5d..315a6e5c0f595 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -103,15 +103,13 @@ struct rocker_world_ops { int (*port_attr_stp_state_set)(struct rocker_port *rocker_port, u8 state); int (*port_attr_bridge_flags_set)(struct rocker_port *rocker_port, - unsigned long brport_flags, - struct switchdev_trans *trans); + unsigned long brport_flags); int (*port_attr_bridge_flags_support_get)(const struct rocker_port * rocker_port, unsigned long * p_brport_flags); int (*port_attr_bridge_ageing_time_set)(struct rocker_port *rocker_port, - u32 ageing_time, - struct switchdev_trans *trans); + u32 ageing_time); int (*port_obj_vlan_add)(struct rocker_port *rocker_port, const struct switchdev_obj_port_vlan *vlan); int (*port_obj_vlan_del)(struct rocker_port *rocker_port, diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 1018d37593169..740a715c49c6d 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1550,17 +1550,13 @@ static void rocker_world_port_stop(struct rocker_port *rocker_port) } static int rocker_world_port_attr_stp_state_set(struct rocker_port *rocker_port, - u8 state, - struct switchdev_trans *trans) + u8 state) { struct rocker_world_ops *wops = rocker_port->rocker->wops; if (!wops->port_attr_stp_state_set) return -EOPNOTSUPP; - if (switchdev_trans_ph_prepare(trans)) - return 0; - return wops->port_attr_stp_state_set(rocker_port, state); } @@ -1580,8 +1576,7 @@ rocker_world_port_attr_bridge_flags_support_get(const struct rocker_port * static int rocker_world_port_attr_pre_bridge_flags_set(struct rocker_port *rocker_port, - unsigned long brport_flags, - struct switchdev_trans *trans) + unsigned long brport_flags) { struct rocker_world_ops *wops = rocker_port->rocker->wops; unsigned long brport_flags_s; @@ -1603,37 +1598,26 @@ rocker_world_port_attr_pre_bridge_flags_set(struct rocker_port *rocker_port, static int rocker_world_port_attr_bridge_flags_set(struct rocker_port *rocker_port, - unsigned long brport_flags, - struct switchdev_trans *trans) + unsigned long brport_flags) { struct rocker_world_ops *wops = rocker_port->rocker->wops; if (!wops->port_attr_bridge_flags_set) return -EOPNOTSUPP; - if (switchdev_trans_ph_prepare(trans)) - return 0; - - return wops->port_attr_bridge_flags_set(rocker_port, brport_flags, - trans); + return wops->port_attr_bridge_flags_set(rocker_port, brport_flags); } static int rocker_world_port_attr_bridge_ageing_time_set(struct rocker_port *rocker_port, - u32 ageing_time, - struct switchdev_trans *trans) - + u32 ageing_time) { struct rocker_world_ops *wops = rocker_port->rocker->wops; if (!wops->port_attr_bridge_ageing_time_set) return -EOPNOTSUPP; - if (switchdev_trans_ph_prepare(trans)) - return 0; - - return wops->port_attr_bridge_ageing_time_set(rocker_port, ageing_time, - trans); + return wops->port_attr_bridge_ageing_time_set(rocker_port, ageing_time); } static int @@ -2062,8 +2046,7 @@ static const struct net_device_ops rocker_port_netdev_ops = { ********************/ static int rocker_port_attr_set(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) + const struct switchdev_attr *attr) { struct rocker_port *rocker_port = netdev_priv(dev); int err = 0; @@ -2071,23 +2054,19 @@ static int rocker_port_attr_set(struct net_device *dev, switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: err = rocker_world_port_attr_stp_state_set(rocker_port, - attr->u.stp_state, - trans); + attr->u.stp_state); break; case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: err = rocker_world_port_attr_pre_bridge_flags_set(rocker_port, - attr->u.brport_flags, - trans); + attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: err = rocker_world_port_attr_bridge_flags_set(rocker_port, - attr->u.brport_flags, - trans); + attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: err = rocker_world_port_attr_bridge_ageing_time_set(rocker_port, - attr->u.ageing_time, - trans); + attr->u.ageing_time); break; default: err = -EOPNOTSUPP; @@ -2719,8 +2698,7 @@ rocker_switchdev_port_attr_set_event(struct net_device *netdev, { int err; - err = rocker_port_attr_set(netdev, port_attr_info->attr, - port_attr_info->trans); + err = rocker_port_attr_set(netdev, port_attr_info->attr); port_attr_info->handled = true; return notifier_from_errno(err); diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index d9d5188b76277..d067da1ef0706 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2488,8 +2488,7 @@ static int ofdpa_port_attr_stp_state_set(struct rocker_port *rocker_port, } static int ofdpa_port_attr_bridge_flags_set(struct rocker_port *rocker_port, - unsigned long brport_flags, - struct switchdev_trans *trans) + unsigned long brport_flags) { struct ofdpa_port *ofdpa_port = rocker_port->wpriv; unsigned long orig_flags; @@ -2497,14 +2496,11 @@ static int ofdpa_port_attr_bridge_flags_set(struct rocker_port *rocker_port, orig_flags = ofdpa_port->brport_flags; ofdpa_port->brport_flags = brport_flags; - if ((orig_flags ^ ofdpa_port->brport_flags) & BR_LEARNING && - !switchdev_trans_ph_prepare(trans)) + + if ((orig_flags ^ ofdpa_port->brport_flags) & BR_LEARNING) err = rocker_port_set_learning(ofdpa_port->rocker_port, !!(ofdpa_port->brport_flags & BR_LEARNING)); - if (switchdev_trans_ph_prepare(trans)) - ofdpa_port->brport_flags = orig_flags; - return err; } @@ -2520,18 +2516,15 @@ ofdpa_port_attr_bridge_flags_support_get(const struct rocker_port * static int ofdpa_port_attr_bridge_ageing_time_set(struct rocker_port *rocker_port, - u32 ageing_time, - struct switchdev_trans *trans) + u32 ageing_time) { struct ofdpa_port *ofdpa_port = rocker_port->wpriv; struct ofdpa *ofdpa = ofdpa_port->ofdpa; - if (!switchdev_trans_ph_prepare(trans)) { - ofdpa_port->ageing_time = clock_t_to_jiffies(ageing_time); - if (ofdpa_port->ageing_time < ofdpa->ageing_time) - ofdpa->ageing_time = ofdpa_port->ageing_time; - mod_timer(&ofdpa_port->ofdpa->fdb_cleanup_timer, jiffies); - } + ofdpa_port->ageing_time = clock_t_to_jiffies(ageing_time); + if (ofdpa_port->ageing_time < ofdpa->ageing_time) + ofdpa->ageing_time = ofdpa_port->ageing_time; + mod_timer(&ofdpa_port->ofdpa->fdb_cleanup_timer, jiffies); return 0; } diff --git a/drivers/net/ethernet/ti/cpsw_switchdev.c b/drivers/net/ethernet/ti/cpsw_switchdev.c index 3232f483c0686..9967cf985728d 100644 --- a/drivers/net/ethernet/ti/cpsw_switchdev.c +++ b/drivers/net/ethernet/ti/cpsw_switchdev.c @@ -24,16 +24,12 @@ struct cpsw_switchdev_event_work { unsigned long event; }; -static int cpsw_port_stp_state_set(struct cpsw_priv *priv, - struct switchdev_trans *trans, u8 state) +static int cpsw_port_stp_state_set(struct cpsw_priv *priv, u8 state) { struct cpsw_common *cpsw = priv->cpsw; u8 cpsw_state; int ret = 0; - if (switchdev_trans_ph_prepare(trans)) - return 0; - switch (state) { case BR_STATE_FORWARDING: cpsw_state = ALE_PORT_STATE_FORWARD; @@ -60,16 +56,12 @@ static int cpsw_port_stp_state_set(struct cpsw_priv *priv, } static int cpsw_port_attr_br_flags_set(struct cpsw_priv *priv, - struct switchdev_trans *trans, struct net_device *orig_dev, unsigned long brport_flags) { struct cpsw_common *cpsw = priv->cpsw; bool unreg_mcast_add = false; - if (switchdev_trans_ph_prepare(trans)) - return 0; - if (brport_flags & BR_MCAST_FLOOD) unreg_mcast_add = true; dev_dbg(priv->dev, "BR_MCAST_FLOOD: %d port %u\n", @@ -82,7 +74,6 @@ static int cpsw_port_attr_br_flags_set(struct cpsw_priv *priv, } static int cpsw_port_attr_br_flags_pre_set(struct net_device *netdev, - struct switchdev_trans *trans, unsigned long flags) { if (flags & ~(BR_LEARNING | BR_MCAST_FLOOD)) @@ -92,8 +83,7 @@ static int cpsw_port_attr_br_flags_pre_set(struct net_device *netdev, } static int cpsw_port_attr_set(struct net_device *ndev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) + const struct switchdev_attr *attr) { struct cpsw_priv *priv = netdev_priv(ndev); int ret; @@ -102,15 +92,15 @@ static int cpsw_port_attr_set(struct net_device *ndev, switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: - ret = cpsw_port_attr_br_flags_pre_set(ndev, trans, + ret = cpsw_port_attr_br_flags_pre_set(ndev, attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - ret = cpsw_port_stp_state_set(priv, trans, attr->u.stp_state); + ret = cpsw_port_stp_state_set(priv, attr->u.stp_state); dev_dbg(priv->dev, "stp state: %u\n", attr->u.stp_state); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - ret = cpsw_port_attr_br_flags_set(priv, trans, attr->orig_dev, + ret = cpsw_port_attr_br_flags_set(priv, attr->orig_dev, attr->u.brport_flags); break; default: diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c index 197dea9c3b424..ca3d07fe7f58e 100644 --- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c +++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c @@ -901,19 +901,14 @@ static void dpaa2_switch_teardown_irqs(struct fsl_mc_device *sw_dev) } static int dpaa2_switch_port_attr_stp_state_set(struct net_device *netdev, - struct switchdev_trans *trans, u8 state) { struct ethsw_port_priv *port_priv = netdev_priv(netdev); - if (switchdev_trans_ph_prepare(trans)) - return 0; - return dpaa2_switch_port_set_stp_state(port_priv, state); } static int dpaa2_switch_port_attr_br_flags_pre_set(struct net_device *netdev, - struct switchdev_trans *trans, unsigned long flags) { if (flags & ~(BR_LEARNING | BR_FLOOD)) @@ -923,15 +918,11 @@ static int dpaa2_switch_port_attr_br_flags_pre_set(struct net_device *netdev, } static int dpaa2_switch_port_attr_br_flags_set(struct net_device *netdev, - struct switchdev_trans *trans, unsigned long flags) { struct ethsw_port_priv *port_priv = netdev_priv(netdev); int err = 0; - if (switchdev_trans_ph_prepare(trans)) - return 0; - /* Learning is enabled per switch */ err = dpaa2_switch_set_learning(port_priv->ethsw_data, !!(flags & BR_LEARNING)); @@ -945,22 +936,21 @@ exit: } static int dpaa2_switch_port_attr_set(struct net_device *netdev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) + const struct switchdev_attr *attr) { int err = 0; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - err = dpaa2_switch_port_attr_stp_state_set(netdev, trans, + err = dpaa2_switch_port_attr_stp_state_set(netdev, attr->u.stp_state); break; case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: - err = dpaa2_switch_port_attr_br_flags_pre_set(netdev, trans, + err = dpaa2_switch_port_attr_br_flags_pre_set(netdev, attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - err = dpaa2_switch_port_attr_br_flags_set(netdev, trans, + err = dpaa2_switch_port_attr_br_flags_set(netdev, attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: @@ -1197,8 +1187,7 @@ static int dpaa2_switch_port_attr_set_event(struct net_device *netdev, { int err; - err = dpaa2_switch_port_attr_set(netdev, port_attr_info->attr, - port_attr_info->trans); + err = dpaa2_switch_port_attr_set(netdev, port_attr_info->attr); port_attr_info->handled = true; return notifier_from_errno(err); diff --git a/include/net/dsa.h b/include/net/dsa.h index 9a0cab5fb7c44..d0f0430919698 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -565,8 +565,7 @@ struct dsa_switch_ops { * VLAN support */ int (*port_vlan_filtering)(struct dsa_switch *ds, int port, - bool vlan_filtering, - struct switchdev_trans *trans); + bool vlan_filtering); int (*port_vlan_prepare)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); void (*port_vlan_add)(struct dsa_switch *ds, int port, diff --git a/include/net/switchdev.h b/include/net/switchdev.h index cbe6e35d51f54..f873e2c5e125a 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -239,7 +239,6 @@ struct switchdev_notifier_port_obj_info { struct switchdev_notifier_port_attr_info { struct switchdev_notifier_info info; /* must be first */ const struct switchdev_attr *attr; - struct switchdev_trans *trans; bool handled; }; @@ -298,8 +297,7 @@ int switchdev_handle_port_attr_set(struct net_device *dev, struct switchdev_notifier_port_attr_info *port_attr_info, bool (*check_cb)(const struct net_device *dev), int (*set_cb)(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans)); + const struct switchdev_attr *attr)); #else static inline void switchdev_deferred_process(void) @@ -390,8 +388,7 @@ switchdev_handle_port_attr_set(struct net_device *dev, struct switchdev_notifier_port_attr_info *port_attr_info, bool (*check_cb)(const struct net_device *dev), int (*set_cb)(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans)) + const struct switchdev_attr *attr)) { return 0; } diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 2f4cd3288bccc..f3db75c0172be 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -739,8 +739,7 @@ int ocelot_get_ts_info(struct ocelot *ocelot, int port, void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs); void ocelot_adjust_link(struct ocelot *ocelot, int port, struct phy_device *phydev); -int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled, - struct switchdev_trans *trans); +int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled); void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state); int ocelot_port_bridge_join(struct ocelot *ocelot, int port, struct net_device *bridge); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 9eaa85cc87eaa..3e6063bf3f176 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -128,19 +128,16 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, } /* port.c */ -int dsa_port_set_state(struct dsa_port *dp, u8 state, - struct switchdev_trans *trans); +int dsa_port_set_state(struct dsa_port *dp, u8 state); int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy); int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); void dsa_port_disable_rt(struct dsa_port *dp); void dsa_port_disable(struct dsa_port *dp); int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); -int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, - struct switchdev_trans *trans); +int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering); bool dsa_port_skip_vlan_configuration(struct dsa_port *dp); -int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, - struct switchdev_trans *trans); +int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock); int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu, bool propagate_upstream); int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, @@ -152,12 +149,9 @@ int dsa_port_mdb_add(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb); int dsa_port_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb); -int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags, - struct switchdev_trans *trans); -int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags, - struct switchdev_trans *trans); -int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, - struct switchdev_trans *trans); +int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags); +int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags); +int dsa_port_mrouter(struct dsa_port *dp, bool mrouter); int dsa_port_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan); int dsa_port_vlan_del(struct dsa_port *dp, diff --git a/net/dsa/port.c b/net/dsa/port.c index 6668fe188f477..14bf0053ae011 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -40,17 +40,15 @@ static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v) return notifier_to_errno(err); } -int dsa_port_set_state(struct dsa_port *dp, u8 state, - struct switchdev_trans *trans) +int dsa_port_set_state(struct dsa_port *dp, u8 state) { struct dsa_switch *ds = dp->ds; int port = dp->index; - if (switchdev_trans_ph_prepare(trans)) - return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP; + if (!ds->ops->port_stp_state_set) + return -EOPNOTSUPP; - if (ds->ops->port_stp_state_set) - ds->ops->port_stp_state_set(ds, port, state); + ds->ops->port_stp_state_set(ds, port, state); if (ds->ops->port_fast_age) { /* Fast age FDB entries or flush appropriate forwarding database @@ -75,7 +73,7 @@ static void dsa_port_set_state_now(struct dsa_port *dp, u8 state) { int err; - err = dsa_port_set_state(dp, state, NULL); + err = dsa_port_set_state(dp, state); if (err) pr_err("DSA: failed to set STP state %u (%d)\n", state, err); } @@ -145,7 +143,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) int err; /* Set the flooding mode before joining the port in the switch */ - err = dsa_port_bridge_flags(dp, BR_FLOOD | BR_MCAST_FLOOD, NULL); + err = dsa_port_bridge_flags(dp, BR_FLOOD | BR_MCAST_FLOOD); if (err) return err; @@ -158,7 +156,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) /* The bridging is rolled back on error */ if (err) { - dsa_port_bridge_flags(dp, 0, NULL); + dsa_port_bridge_flags(dp, 0); dp->bridge_dev = NULL; } @@ -185,7 +183,7 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n"); /* Port is leaving the bridge, disable flooding */ - dsa_port_bridge_flags(dp, 0, NULL); + dsa_port_bridge_flags(dp, 0); /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional @@ -259,43 +257,36 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, return true; } -int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, - struct switchdev_trans *trans) +int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering) { struct dsa_switch *ds = dp->ds; + bool apply; int err; - if (switchdev_trans_ph_prepare(trans)) { - bool apply; - - if (!ds->ops->port_vlan_filtering) - return -EOPNOTSUPP; + if (!ds->ops->port_vlan_filtering) + return -EOPNOTSUPP; - /* We are called from dsa_slave_switchdev_blocking_event(), - * which is not under rcu_read_lock(), unlike - * dsa_slave_switchdev_event(). - */ - rcu_read_lock(); - apply = dsa_port_can_apply_vlan_filtering(dp, vlan_filtering); - rcu_read_unlock(); - if (!apply) - return -EINVAL; - } + /* We are called from dsa_slave_switchdev_blocking_event(), + * which is not under rcu_read_lock(), unlike + * dsa_slave_switchdev_event(). + */ + rcu_read_lock(); + apply = dsa_port_can_apply_vlan_filtering(dp, vlan_filtering); + rcu_read_unlock(); + if (!apply) + return -EINVAL; if (dsa_port_is_vlan_filtering(dp) == vlan_filtering) return 0; - err = ds->ops->port_vlan_filtering(ds, dp->index, vlan_filtering, - trans); + err = ds->ops->port_vlan_filtering(ds, dp->index, vlan_filtering); if (err) return err; - if (switchdev_trans_ph_commit(trans)) { - if (ds->vlan_filtering_is_global) - ds->vlan_filtering = vlan_filtering; - else - dp->vlan_filtering = vlan_filtering; - } + if (ds->vlan_filtering_is_global) + ds->vlan_filtering = vlan_filtering; + else + dp->vlan_filtering = vlan_filtering; return 0; } @@ -314,26 +305,29 @@ bool dsa_port_skip_vlan_configuration(struct dsa_port *dp) !br_vlan_enabled(dp->bridge_dev)); } -int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, - struct switchdev_trans *trans) +int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock) { unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock); unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies); - struct dsa_notifier_ageing_time_info info = { - .ageing_time = ageing_time, - .trans = trans, - }; + struct dsa_notifier_ageing_time_info info; + struct switchdev_trans trans; + int err; - if (switchdev_trans_ph_prepare(trans)) - return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); + info.ageing_time = ageing_time; + info.trans = &trans; + + trans.ph_prepare = true; + err = dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); + if (err) + return err; dp->ageing_time = ageing_time; + trans.ph_prepare = false; return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); } -int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags, - struct switchdev_trans *trans) +int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags) { struct dsa_switch *ds = dp->ds; @@ -344,16 +338,12 @@ int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags, return 0; } -int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags, - struct switchdev_trans *trans) +int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags) { struct dsa_switch *ds = dp->ds; int port = dp->index; int err = 0; - if (switchdev_trans_ph_prepare(trans)) - return 0; - if (ds->ops->port_egress_floods) err = ds->ops->port_egress_floods(ds, port, flags & BR_FLOOD, flags & BR_MCAST_FLOOD); @@ -361,14 +351,13 @@ int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags, return err; } -int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, - struct switchdev_trans *trans) +int dsa_port_mrouter(struct dsa_port *dp, bool mrouter) { struct dsa_switch *ds = dp->ds; int port = dp->index; - if (switchdev_trans_ph_prepare(trans)) - return ds->ops->port_egress_floods ? 0 : -EOPNOTSUPP; + if (!ds->ops->port_egress_floods) + return -EOPNOTSUPP; return ds->ops->port_egress_floods(ds, port, true, mrouter); } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 0eefb50aae8c6..7ba505825db2f 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -268,32 +268,29 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } static int dsa_slave_port_attr_set(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) + const struct switchdev_attr *attr) { struct dsa_port *dp = dsa_slave_to_port(dev); int ret; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - ret = dsa_port_set_state(dp, attr->u.stp_state, trans); + ret = dsa_port_set_state(dp, attr->u.stp_state); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: - ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering, - trans); + ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: - ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans); + ret = dsa_port_ageing_time(dp, attr->u.ageing_time); break; case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: - ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags, - trans); + ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: - ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, trans); + ret = dsa_port_bridge_flags(dp, attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER: - ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, trans); + ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter); break; default: ret = -EOPNOTSUPP; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 5b0bf29e13752..17979956d756e 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -139,17 +139,8 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, } } if (unset_vlan_filtering) { - struct switchdev_trans trans; - - trans.ph_prepare = true; - err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port), - false, &trans); - if (err && err != EOPNOTSUPP) - return err; - - trans.ph_prepare = false; err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port), - false, &trans); + false); if (err && err != EOPNOTSUPP) return err; } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 3509d362056d3..855a10feef3de 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -100,15 +100,13 @@ static int switchdev_deferred_enqueue(struct net_device *dev, static int switchdev_port_attr_notify(enum switchdev_notifier_type nt, struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans) + const struct switchdev_attr *attr) { int err; int rc; struct switchdev_notifier_port_attr_info attr_info = { .attr = attr, - .trans = trans, .handled = false, }; @@ -129,34 +127,7 @@ static int switchdev_port_attr_notify(enum switchdev_notifier_type nt, static int switchdev_port_attr_set_now(struct net_device *dev, const struct switchdev_attr *attr) { - struct switchdev_trans trans; - int err; - - /* Phase I: prepare for attr set. Driver/device should fail - * here if there are going to be issues in the commit phase, - * such as lack of resources or support. The driver/device - * should reserve resources needed for the commit phase here, - * but should not commit the attr. - */ - - trans.ph_prepare = true; - err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr, - &trans); - if (err) - return err; - - /* Phase II: commit attr set. This cannot fail as a fault - * of driver/device. If it does, it's a bug in the driver/device - * because the driver said everythings was OK in phase I. - */ - - trans.ph_prepare = false; - err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr, - &trans); - WARN(err, "%s: Commit of attribute (id=%d) failed.\n", - dev->name, attr->id); - - return err; + return switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr); } static void switchdev_port_attr_set_deferred(struct net_device *dev, @@ -186,10 +157,6 @@ static int switchdev_port_attr_set_defer(struct net_device *dev, * @dev: port device * @attr: attribute to set * - * Use a 2-phase prepare-commit transaction model to ensure - * system is not left in a partially updated state due to - * failure from driver/device. - * * rtnl_lock must be held and must not be in atomic section, * in case SWITCHDEV_F_DEFER flag is not set. */ @@ -519,8 +486,7 @@ static int __switchdev_handle_port_attr_set(struct net_device *dev, struct switchdev_notifier_port_attr_info *port_attr_info, bool (*check_cb)(const struct net_device *dev), int (*set_cb)(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans)) + const struct switchdev_attr *attr)) { struct net_device *lower_dev; struct list_head *iter; @@ -528,8 +494,7 @@ static int __switchdev_handle_port_attr_set(struct net_device *dev, if (check_cb(dev)) { port_attr_info->handled = true; - return set_cb(dev, port_attr_info->attr, - port_attr_info->trans); + return set_cb(dev, port_attr_info->attr); } /* Switch ports might be stacked under e.g. a LAG. Ignore the @@ -556,8 +521,7 @@ int switchdev_handle_port_attr_set(struct net_device *dev, struct switchdev_notifier_port_attr_info *port_attr_info, bool (*check_cb)(const struct net_device *dev), int (*set_cb)(struct net_device *dev, - const struct switchdev_attr *attr, - struct switchdev_trans *trans)) + const struct switchdev_attr *attr)) { int err; -- GitLab From 77b61365ecefb2404326c924e215f1ed5a680285 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 9 Jan 2021 02:01:51 +0200 Subject: [PATCH 0491/2012] net: dsa: remove the transactional logic from ageing time notifiers Remove the shim introduced in DSA for offloading the bridge ageing time from switchdev, by first checking whether the ageing time is within the range limits requested by the driver. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Acked-by: Linus Walleij Acked-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- net/dsa/dsa_priv.h | 1 - net/dsa/port.c | 6 +----- net/dsa/switch.c | 15 ++++++--------- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 3e6063bf3f176..89143cc049db5 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -29,7 +29,6 @@ enum { /* DSA_NOTIFIER_AGEING_TIME */ struct dsa_notifier_ageing_time_info { - struct switchdev_trans *trans; unsigned int ageing_time; }; diff --git a/net/dsa/port.c b/net/dsa/port.c index 14bf0053ae011..e59bf66c4c0da 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -310,21 +310,17 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock) unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock); unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies); struct dsa_notifier_ageing_time_info info; - struct switchdev_trans trans; int err; info.ageing_time = ageing_time; - info.trans = &trans; - trans.ph_prepare = true; err = dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); if (err) return err; dp->ageing_time = ageing_time; - trans.ph_prepare = false; - return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); + return 0; } int dsa_port_pre_bridge_flags(const struct dsa_port *dp, unsigned long flags) diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 17979956d756e..c6b3ac93bcc74 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -33,15 +33,12 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds, struct dsa_notifier_ageing_time_info *info) { unsigned int ageing_time = info->ageing_time; - struct switchdev_trans *trans = info->trans; - - if (switchdev_trans_ph_prepare(trans)) { - if (ds->ageing_time_min && ageing_time < ds->ageing_time_min) - return -ERANGE; - if (ds->ageing_time_max && ageing_time > ds->ageing_time_max) - return -ERANGE; - return 0; - } + + if (ds->ageing_time_min && ageing_time < ds->ageing_time_min) + return -ERANGE; + + if (ds->ageing_time_max && ageing_time > ds->ageing_time_max) + return -ERANGE; /* Program the fastest ageing time in case of multiple bridges */ ageing_time = dsa_switch_fastest_ageing_time(ds, ageing_time); -- GitLab From a52b2da778fc93e01c4e5a744953f2ba1ec01c00 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 9 Jan 2021 02:01:52 +0200 Subject: [PATCH 0492/2012] net: dsa: remove the transactional logic from MDB entries For many drivers, the .port_mdb_prepare callback was not a good opportunity to avoid any error condition, and they would suppress errors found during the actual commit phase. Where a logical separation between the prepare and the commit phase existed, the function that used to implement the .port_mdb_prepare callback still exists, but now it is called directly from .port_mdb_add, which was modified to return an int code. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Acked-by: Linus Walleij Acked-by: Jiri Pirko Reviewed-by: Kurt Kanzenbach # hellcreek Reviewed-by: Linus Wallei # RTL8366 Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 19 +++---------------- drivers/net/dsa/b53/b53_priv.h | 6 ++---- drivers/net/dsa/bcm_sf2.c | 1 - drivers/net/dsa/lan9303-core.c | 12 ++++++++---- drivers/net/dsa/microchip/ksz8795.c | 1 - drivers/net/dsa/microchip/ksz9477.c | 14 +++++++++----- drivers/net/dsa/microchip/ksz_common.c | 16 +++++----------- drivers/net/dsa/microchip/ksz_common.h | 6 ++---- drivers/net/dsa/mv88e6xxx/chip.c | 24 +++++++----------------- drivers/net/dsa/ocelot/felix.c | 14 +++----------- drivers/net/dsa/sja1105/sja1105_main.c | 14 +++----------- include/net/dsa.h | 4 +--- net/dsa/switch.c | 15 ++++++--------- 13 files changed, 49 insertions(+), 97 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index f07f547db53e3..d5f2798968edf 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1741,8 +1741,8 @@ int b53_fdb_dump(struct dsa_switch *ds, int port, } EXPORT_SYMBOL(b53_fdb_dump); -int b53_mdb_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) +int b53_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb) { struct b53_device *priv = ds->priv; @@ -1752,19 +1752,7 @@ int b53_mdb_prepare(struct dsa_switch *ds, int port, if (is5325(priv) || is5365(priv)) return -EOPNOTSUPP; - return 0; -} -EXPORT_SYMBOL(b53_mdb_prepare); - -void b53_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) -{ - struct b53_device *priv = ds->priv; - int ret; - - ret = b53_arl_op(priv, 0, port, mdb->addr, mdb->vid, true); - if (ret) - dev_err(ds->dev, "failed to add MDB entry\n"); + return b53_arl_op(priv, 0, port, mdb->addr, mdb->vid, true); } EXPORT_SYMBOL(b53_mdb_add); @@ -2205,7 +2193,6 @@ static const struct dsa_switch_ops b53_switch_ops = { .port_fdb_del = b53_fdb_del, .port_mirror_add = b53_mirror_add, .port_mirror_del = b53_mirror_del, - .port_mdb_prepare = b53_mdb_prepare, .port_mdb_add = b53_mdb_add, .port_mdb_del = b53_mdb_del, .port_max_mtu = b53_get_max_mtu, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 9ab0fb409f78a..2a4a54df3a312 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -361,10 +361,8 @@ int b53_fdb_del(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid); int b53_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data); -int b53_mdb_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); -void b53_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); +int b53_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb); int b53_mdb_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb); int b53_mirror_add(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 65c8a044f222a..12ff84109f133 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1126,7 +1126,6 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .set_rxnfc = bcm_sf2_set_rxnfc, .port_mirror_add = b53_mirror_add, .port_mirror_del = b53_mirror_del, - .port_mdb_prepare = b53_mdb_prepare, .port_mdb_add = b53_mdb_add, .port_mdb_del = b53_mdb_del, }; diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index aa1142d6a9f54..3443740254261 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -1232,14 +1232,19 @@ static int lan9303_port_mdb_prepare(struct dsa_switch *ds, int port, return 0; } -static void lan9303_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) +static int lan9303_port_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb) { struct lan9303 *chip = ds->priv; + int err; + + err = lan9303_port_mdb_prepare(ds, port, mdb); + if (err) + return err; dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, mdb->addr, mdb->vid); - lan9303_alr_add_port(chip, mdb->addr, port, false); + return lan9303_alr_add_port(chip, mdb->addr, port, false); } static int lan9303_port_mdb_del(struct dsa_switch *ds, int port, @@ -1274,7 +1279,6 @@ static const struct dsa_switch_ops lan9303_switch_ops = { .port_fdb_add = lan9303_port_fdb_add, .port_fdb_del = lan9303_port_fdb_del, .port_fdb_dump = lan9303_port_fdb_dump, - .port_mdb_prepare = lan9303_port_mdb_prepare, .port_mdb_add = lan9303_port_mdb_add, .port_mdb_del = lan9303_port_mdb_del, }; diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 6a9ec8a0f92f8..89e1c01cf5b88 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -1114,7 +1114,6 @@ static const struct dsa_switch_ops ksz8795_switch_ops = { .port_vlan_add = ksz8795_port_vlan_add, .port_vlan_del = ksz8795_port_vlan_del, .port_fdb_dump = ksz_port_fdb_dump, - .port_mdb_prepare = ksz_port_mdb_prepare, .port_mdb_add = ksz_port_mdb_add, .port_mdb_del = ksz_port_mdb_del, .port_mirror_add = ksz8795_port_mirror_add, diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 446c088ce5c56..08bf54eb9f5f0 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -774,14 +774,15 @@ exit: return ret; } -static void ksz9477_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) +static int ksz9477_port_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb) { struct ksz_device *dev = ds->priv; u32 static_table[4]; u32 data; int index; u32 mac_hi, mac_lo; + int err = 0; mac_hi = ((mdb->addr[0] << 8) | mdb->addr[1]); mac_lo = ((mdb->addr[2] << 24) | (mdb->addr[3] << 16)); @@ -796,7 +797,8 @@ static void ksz9477_port_mdb_add(struct dsa_switch *ds, int port, ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data); /* wait to be finished */ - if (ksz9477_wait_alu_sta_ready(dev)) { + err = ksz9477_wait_alu_sta_ready(dev); + if (err) { dev_dbg(dev->dev, "Failed to read ALU STATIC\n"); goto exit; } @@ -819,8 +821,10 @@ static void ksz9477_port_mdb_add(struct dsa_switch *ds, int port, } /* no available entry */ - if (index == dev->num_statics) + if (index == dev->num_statics) { + err = -ENOSPC; goto exit; + } /* add entry */ static_table[0] = ALU_V_STATIC_VALID; @@ -842,6 +846,7 @@ static void ksz9477_port_mdb_add(struct dsa_switch *ds, int port, exit: mutex_unlock(&dev->alu_mutex); + return err; } static int ksz9477_port_mdb_del(struct dsa_switch *ds, int port, @@ -1395,7 +1400,6 @@ static const struct dsa_switch_ops ksz9477_switch_ops = { .port_fdb_dump = ksz9477_port_fdb_dump, .port_fdb_add = ksz9477_port_fdb_add, .port_fdb_del = ksz9477_port_fdb_del, - .port_mdb_prepare = ksz_port_mdb_prepare, .port_mdb_add = ksz9477_port_mdb_add, .port_mdb_del = ksz9477_port_mdb_del, .port_mirror_add = ksz9477_port_mirror_add, diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index cf743133b0b93..f2c9ff3ea4bef 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -253,16 +253,8 @@ int ksz_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, } EXPORT_SYMBOL_GPL(ksz_port_fdb_dump); -int ksz_port_mdb_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) -{ - /* nothing to do */ - return 0; -} -EXPORT_SYMBOL_GPL(ksz_port_mdb_prepare); - -void ksz_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) +int ksz_port_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb) { struct ksz_device *dev = ds->priv; struct alu_struct alu; @@ -284,7 +276,7 @@ void ksz_port_mdb_add(struct dsa_switch *ds, int port, /* no available entry */ if (index == dev->num_statics && !empty) - return; + return -ENOSPC; /* add entry */ if (index == dev->num_statics) { @@ -301,6 +293,8 @@ void ksz_port_mdb_add(struct dsa_switch *ds, int port, alu.fid = mdb->vid; } dev->dev_ops->w_sta_mac_table(dev, index, &alu); + + return 0; } EXPORT_SYMBOL_GPL(ksz_port_mdb_add); diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 720f22275c844..a1f0929d45a05 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -165,10 +165,8 @@ int ksz_port_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); int ksz_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data); -int ksz_port_mdb_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); -void ksz_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); +int ksz_port_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb); int ksz_port_mdb_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb); int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy); diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index bbf1a71ce55cf..e9c517c0f89c3 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5238,27 +5238,18 @@ static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds, return chip->info->tag_protocol; } -static int mv88e6xxx_port_mdb_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) -{ - /* We don't need any dynamic resource from the kernel (yet), - * so skip the prepare phase. - */ - - return 0; -} - -static void mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) +static int mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb) { struct mv88e6xxx_chip *chip = ds->priv; + int err; mv88e6xxx_reg_lock(chip); - if (mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid, - MV88E6XXX_G1_ATU_DATA_STATE_MC_STATIC)) - dev_err(ds->dev, "p%d: failed to load multicast MAC address\n", - port); + err = mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid, + MV88E6XXX_G1_ATU_DATA_STATE_MC_STATIC); mv88e6xxx_reg_unlock(chip); + + return err; } static int mv88e6xxx_port_mdb_del(struct dsa_switch *ds, int port, @@ -5403,7 +5394,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .port_fdb_add = mv88e6xxx_port_fdb_add, .port_fdb_del = mv88e6xxx_port_fdb_del, .port_fdb_dump = mv88e6xxx_port_fdb_dump, - .port_mdb_prepare = mv88e6xxx_port_mdb_prepare, .port_mdb_add = mv88e6xxx_port_mdb_add, .port_mdb_del = mv88e6xxx_port_mdb_del, .port_mirror_add = mv88e6xxx_port_mirror_add, diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 8c4cd9168dbaf..2825dd11feeea 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -65,19 +65,12 @@ static int felix_fdb_del(struct dsa_switch *ds, int port, return ocelot_fdb_del(ocelot, port, addr, vid); } -/* This callback needs to be present */ -static int felix_mdb_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) -{ - return 0; -} - -static void felix_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) +static int felix_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb) { struct ocelot *ocelot = ds->priv; - ocelot_port_mdb_add(ocelot, port, mdb); + return ocelot_port_mdb_add(ocelot, port, mdb); } static int felix_mdb_del(struct dsa_switch *ds, int port, @@ -772,7 +765,6 @@ const struct dsa_switch_ops felix_switch_ops = { .port_fdb_dump = felix_fdb_dump, .port_fdb_add = felix_fdb_add, .port_fdb_del = felix_fdb_del, - .port_mdb_prepare = felix_mdb_prepare, .port_mdb_add = felix_mdb_add, .port_mdb_del = felix_mdb_del, .port_bridge_join = felix_bridge_join, diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index c1b7f2b0e40cb..be200d4289af7 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1524,17 +1524,10 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port, return 0; } -/* This callback needs to be present */ -static int sja1105_mdb_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) -{ - return 0; -} - -static void sja1105_mdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb) +static int sja1105_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb) { - sja1105_fdb_add(ds, port, mdb->addr, mdb->vid); + return sja1105_fdb_add(ds, port, mdb->addr, mdb->vid); } static int sja1105_mdb_del(struct dsa_switch *ds, int port, @@ -3288,7 +3281,6 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .port_vlan_filtering = sja1105_vlan_filtering, .port_vlan_add = sja1105_vlan_add, .port_vlan_del = sja1105_vlan_del, - .port_mdb_prepare = sja1105_mdb_prepare, .port_mdb_add = sja1105_mdb_add, .port_mdb_del = sja1105_mdb_del, .port_hwtstamp_get = sja1105_hwtstamp_get, diff --git a/include/net/dsa.h b/include/net/dsa.h index d0f0430919698..b8c0550dfa746 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -585,10 +585,8 @@ struct dsa_switch_ops { /* * Multicast database */ - int (*port_mdb_prepare)(struct dsa_switch *ds, int port, + int (*port_mdb_add)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb); - void (*port_mdb_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_mdb *mdb); int (*port_mdb_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb); /* diff --git a/net/dsa/switch.c b/net/dsa/switch.c index c6b3ac93bcc74..5f5e19c5e43ae 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -181,24 +181,21 @@ static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port, static int dsa_switch_mdb_add(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { - int port, err; + int err = 0; + int port; - if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) + if (!ds->ops->port_mdb_add) return -EOPNOTSUPP; for (port = 0; port < ds->num_ports; port++) { if (dsa_switch_mdb_match(ds, port, info)) { - err = ds->ops->port_mdb_prepare(ds, port, info->mdb); + err = ds->ops->port_mdb_add(ds, port, info->mdb); if (err) - return err; + break; } } - for (port = 0; port < ds->num_ports; port++) - if (dsa_switch_mdb_match(ds, port, info)) - ds->ops->port_mdb_add(ds, port, info->mdb); - - return 0; + return err; } static int dsa_switch_mdb_del(struct dsa_switch *ds, -- GitLab From 1958d5815c91353960df2198a74f5ca15785ed28 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 9 Jan 2021 02:01:53 +0200 Subject: [PATCH 0493/2012] net: dsa: remove the transactional logic from VLAN objects It should be the driver's business to logically separate its VLAN offloading into a preparation and a commit phase, and some drivers don't need / can't do this. So remove the transactional shim from DSA and let drivers propagate errors directly from the .port_vlan_add callback. It would appear that the code has worse error handling now than it had before. DSA is the only in-kernel user of switchdev that offloads one switchdev object to more than one port: for every VLAN object offloaded to a user port, that VLAN is also offloaded to the CPU port. So the "prepare for user port -> check for errors -> prepare for CPU port -> check for errors -> commit for user port -> commit for CPU port" sequence appears to make more sense than the one we are using now: "offload to user port -> check for errors -> offload to CPU port -> check for errors", but it is really a compromise. In the new way, we can catch errors from the commit phase that we previously had to ignore. But we have our hands tied and cannot do any rollback now: if we add a VLAN on the CPU port and it fails, we can't do the rollback by simply deleting it from the user port, because the switchdev API is not so nice with us: it could have simply been there already, even with the same flags. So we don't even attempt to rollback anything on addition error, just leave whatever VLANs managed to get offloaded right where they are. This should not be a problem at all in practice. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Acked-by: Linus Walleij Acked-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 17 ++++++---- drivers/net/dsa/b53/b53_priv.h | 6 ++-- drivers/net/dsa/bcm_sf2.c | 1 - drivers/net/dsa/bcm_sf2_cfp.c | 7 ++-- drivers/net/dsa/dsa_loop.c | 28 ++++------------ drivers/net/dsa/hirschmann/hellcreek.c | 12 +++++-- drivers/net/dsa/lantiq_gswip.c | 15 ++++++--- drivers/net/dsa/microchip/ksz8795.c | 7 ++-- drivers/net/dsa/microchip/ksz9477.c | 18 +++++++---- drivers/net/dsa/microchip/ksz_common.c | 9 ------ drivers/net/dsa/microchip/ksz_common.h | 2 -- drivers/net/dsa/mt7530.c | 14 ++------ drivers/net/dsa/mv88e6xxx/chip.c | 34 ++++++++++++-------- drivers/net/dsa/ocelot/felix.c | 16 ++++++---- drivers/net/dsa/qca8k.c | 14 +++----- drivers/net/dsa/realtek-smi-core.h | 6 ++-- drivers/net/dsa/rtl8366.c | 44 +++++++++++--------------- drivers/net/dsa/rtl8366rb.c | 1 - drivers/net/dsa/sja1105/sja1105_main.c | 43 +++++++++---------------- include/net/dsa.h | 4 +-- net/dsa/switch.c | 8 ++--- 21 files changed, 133 insertions(+), 173 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index d5f2798968edf..5d6d9f91d40af 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1384,8 +1384,8 @@ int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) } EXPORT_SYMBOL(b53_vlan_filtering); -int b53_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +static int b53_vlan_prepare(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) { struct b53_device *dev = ds->priv; @@ -1407,15 +1407,19 @@ int b53_vlan_prepare(struct dsa_switch *ds, int port, return 0; } -EXPORT_SYMBOL(b53_vlan_prepare); -void b53_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +int b53_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) { struct b53_device *dev = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; struct b53_vlan *vl; + int err; + + err = b53_vlan_prepare(ds, port, vlan); + if (err) + return err; vl = &dev->vlans[vlan->vid]; @@ -1438,6 +1442,8 @@ void b53_vlan_add(struct dsa_switch *ds, int port, vlan->vid); b53_fast_age_vlan(dev, vlan->vid); } + + return 0; } EXPORT_SYMBOL(b53_vlan_add); @@ -2185,7 +2191,6 @@ static const struct dsa_switch_ops b53_switch_ops = { .port_fast_age = b53_br_fast_age, .port_egress_floods = b53_br_egress_floods, .port_vlan_filtering = b53_vlan_filtering, - .port_vlan_prepare = b53_vlan_prepare, .port_vlan_add = b53_vlan_add, .port_vlan_del = b53_vlan_del, .port_fdb_dump = b53_fdb_dump, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 2a4a54df3a312..0d2cc0453bef9 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -349,10 +349,8 @@ void b53_phylink_mac_link_up(struct dsa_switch *ds, int port, int speed, int duplex, bool tx_pause, bool rx_pause); int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering); -int b53_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan); -void b53_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan); +int b53_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan); int b53_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); int b53_fdb_add(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 12ff84109f133..d53485c79d779 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1116,7 +1116,6 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .port_stp_state_set = b53_br_set_stp_state, .port_fast_age = b53_br_fast_age, .port_vlan_filtering = b53_vlan_filtering, - .port_vlan_prepare = b53_vlan_prepare, .port_vlan_add = b53_vlan_add, .port_vlan_del = b53_vlan_del, .port_fdb_dump = b53_fdb_dump, diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 59d799ac1b607..ed45d16250e16 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -891,11 +891,9 @@ static int bcm_sf2_cfp_rule_insert(struct dsa_switch *ds, int port, else vlan.flags = 0; - ret = ds->ops->port_vlan_prepare(ds, port_num, &vlan); + ret = ds->ops->port_vlan_add(ds, port_num, &vlan); if (ret) return ret; - - ds->ops->port_vlan_add(ds, port_num, &vlan); } /* @@ -941,8 +939,7 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, return -EINVAL; if ((fs->flow_type & FLOW_EXT) && - !(ds->ops->port_vlan_prepare || ds->ops->port_vlan_add || - ds->ops->port_vlan_del)) + !(ds->ops->port_vlan_add || ds->ops->port_vlan_del)) return -EOPNOTSUPP; if (fs->location != RX_CLS_LOC_ANY && diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index 8e3d623f4dbd2..be61ce93a3778 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -198,26 +198,8 @@ static int dsa_loop_port_vlan_filtering(struct dsa_switch *ds, int port, return 0; } -static int -dsa_loop_port_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) -{ - struct dsa_loop_priv *ps = ds->priv; - struct mii_bus *bus = ps->bus; - - dev_dbg(ds->dev, "%s: port: %d, vlan: %d", __func__, port, vlan->vid); - - /* Just do a sleeping operation to make lockdep checks effective */ - mdiobus_read(bus, ps->port_base + port, MII_BMSR); - - if (vlan->vid > ARRAY_SIZE(ps->vlans)) - return -ERANGE; - - return 0; -} - -static void dsa_loop_port_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +static int dsa_loop_port_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) { bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; @@ -225,6 +207,9 @@ static void dsa_loop_port_vlan_add(struct dsa_switch *ds, int port, struct mii_bus *bus = ps->bus; struct dsa_loop_vlan *vl; + if (vlan->vid > ARRAY_SIZE(ps->vlans)) + return -ERANGE; + /* Just do a sleeping operation to make lockdep checks effective */ mdiobus_read(bus, ps->port_base + port, MII_BMSR); @@ -241,6 +226,8 @@ static void dsa_loop_port_vlan_add(struct dsa_switch *ds, int port, if (pvid) ps->ports[port].pvid = vlan->vid; + + return 0; } static int dsa_loop_port_vlan_del(struct dsa_switch *ds, int port, @@ -300,7 +287,6 @@ static const struct dsa_switch_ops dsa_loop_driver = { .port_bridge_leave = dsa_loop_port_bridge_leave, .port_stp_state_set = dsa_loop_port_stp_state_set, .port_vlan_filtering = dsa_loop_port_vlan_filtering, - .port_vlan_prepare = dsa_loop_port_vlan_prepare, .port_vlan_add = dsa_loop_port_vlan_add, .port_vlan_del = dsa_loop_port_vlan_del, .port_change_mtu = dsa_loop_port_change_mtu, diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 4e1dbf91720c8..205249504289d 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -438,18 +438,25 @@ static void hellcreek_unapply_vlan(struct hellcreek *hellcreek, int port, mutex_unlock(&hellcreek->reg_lock); } -static void hellcreek_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +static int hellcreek_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) { bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; struct hellcreek *hellcreek = ds->priv; + int err; + + err = hellcreek_vlan_prepare(ds, port, vlan); + if (err) + return err; dev_dbg(hellcreek->dev, "Add VLAN %d on port %d, %s, %s\n", vlan->vid, port, untagged ? "untagged" : "tagged", pvid ? "PVID" : "no PVID"); hellcreek_apply_vlan(hellcreek, port, vlan->vid, pvid, untagged); + + return 0; } static int hellcreek_vlan_del(struct dsa_switch *ds, int port, @@ -1146,7 +1153,6 @@ static const struct dsa_switch_ops hellcreek_ds_ops = { .port_vlan_add = hellcreek_vlan_add, .port_vlan_del = hellcreek_vlan_del, .port_vlan_filtering = hellcreek_vlan_filtering, - .port_vlan_prepare = hellcreek_vlan_prepare, .setup = hellcreek_setup, }; diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 1dddca92d17ee..244729ee3bdbd 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1167,13 +1167,18 @@ static int gswip_port_vlan_prepare(struct dsa_switch *ds, int port, return 0; } -static void gswip_port_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +static int gswip_port_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) { struct gswip_priv *priv = ds->priv; struct net_device *bridge = dsa_to_port(ds, port)->bridge_dev; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; + int err; + + err = gswip_port_vlan_prepare(ds, port, vlan); + if (err) + return err; /* We have to receive all packets on the CPU port and should not * do any VLAN filtering here. This is also called with bridge @@ -1181,9 +1186,10 @@ static void gswip_port_vlan_add(struct dsa_switch *ds, int port, * this. */ if (dsa_is_cpu_port(ds, port)) - return; + return 0; - gswip_vlan_add_aware(priv, bridge, port, vlan->vid, untagged, pvid); + return gswip_vlan_add_aware(priv, bridge, port, vlan->vid, + untagged, pvid); } static int gswip_port_vlan_del(struct dsa_switch *ds, int port, @@ -1580,7 +1586,6 @@ static const struct dsa_switch_ops gswip_switch_ops = { .port_bridge_leave = gswip_port_bridge_leave, .port_fast_age = gswip_port_fast_age, .port_vlan_filtering = gswip_port_vlan_filtering, - .port_vlan_prepare = gswip_port_vlan_prepare, .port_vlan_add = gswip_port_vlan_add, .port_vlan_del = gswip_port_vlan_del, .port_stp_state_set = gswip_port_stp_state_set, diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 89e1c01cf5b88..d639f9476bd93 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -792,8 +792,8 @@ static int ksz8795_port_vlan_filtering(struct dsa_switch *ds, int port, return 0; } -static void ksz8795_port_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +static int ksz8795_port_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) { bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; struct ksz_device *dev = ds->priv; @@ -828,6 +828,8 @@ static void ksz8795_port_vlan_add(struct dsa_switch *ds, int port, vid |= new_pvid; ksz_pwrite16(dev, port, REG_PORT_CTRL_VID, vid); } + + return 0; } static int ksz8795_port_vlan_del(struct dsa_switch *ds, int port, @@ -1110,7 +1112,6 @@ static const struct dsa_switch_ops ksz8795_switch_ops = { .port_stp_state_set = ksz8795_port_stp_state_set, .port_fast_age = ksz_port_fast_age, .port_vlan_filtering = ksz8795_port_vlan_filtering, - .port_vlan_prepare = ksz_port_vlan_prepare, .port_vlan_add = ksz8795_port_vlan_add, .port_vlan_del = ksz8795_port_vlan_del, .port_fdb_dump = ksz_port_fdb_dump, diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 08bf54eb9f5f0..71cf24f20252a 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -510,16 +510,18 @@ static int ksz9477_port_vlan_filtering(struct dsa_switch *ds, int port, return 0; } -static void ksz9477_port_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +static int ksz9477_port_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) { struct ksz_device *dev = ds->priv; u32 vlan_table[3]; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; + int err; - if (ksz9477_get_vlan_table(dev, vlan->vid, vlan_table)) { + err = ksz9477_get_vlan_table(dev, vlan->vid, vlan_table); + if (err) { dev_dbg(dev->dev, "Failed to get vlan table\n"); - return; + return err; } vlan_table[0] = VLAN_VALID | (vlan->vid & VLAN_FID_M); @@ -531,14 +533,17 @@ static void ksz9477_port_vlan_add(struct dsa_switch *ds, int port, vlan_table[2] |= BIT(port) | BIT(dev->cpu_port); - if (ksz9477_set_vlan_table(dev, vlan->vid, vlan_table)) { + err = ksz9477_set_vlan_table(dev, vlan->vid, vlan_table); + if (err) { dev_dbg(dev->dev, "Failed to set vlan table\n"); - return; + return err; } /* change PVID */ if (vlan->flags & BRIDGE_VLAN_INFO_PVID) ksz_pwrite16(dev, port, REG_PORT_DEFAULT_VID, vlan->vid); + + return 0; } static int ksz9477_port_vlan_del(struct dsa_switch *ds, int port, @@ -1394,7 +1399,6 @@ static const struct dsa_switch_ops ksz9477_switch_ops = { .port_stp_state_set = ksz9477_port_stp_state_set, .port_fast_age = ksz_port_fast_age, .port_vlan_filtering = ksz9477_port_vlan_filtering, - .port_vlan_prepare = ksz_port_vlan_prepare, .port_vlan_add = ksz9477_port_vlan_add, .port_vlan_del = ksz9477_port_vlan_del, .port_fdb_dump = ksz9477_port_fdb_dump, diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index f2c9ff3ea4bef..4e0619c665730 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -213,15 +213,6 @@ void ksz_port_fast_age(struct dsa_switch *ds, int port) } EXPORT_SYMBOL_GPL(ksz_port_fast_age); -int ksz_port_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) -{ - /* nothing needed */ - - return 0; -} -EXPORT_SYMBOL_GPL(ksz_port_vlan_prepare); - int ksz_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data) { diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index a1f0929d45a05..f212775372cec 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -161,8 +161,6 @@ int ksz_port_bridge_join(struct dsa_switch *ds, int port, void ksz_port_bridge_leave(struct dsa_switch *ds, int port, struct net_device *br); void ksz_port_fast_age(struct dsa_switch *ds, int port); -int ksz_port_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan); int ksz_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data); int ksz_port_mdb_add(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index fcaddc9c93701..199a135125b23 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1393,15 +1393,6 @@ mt7530_port_vlan_filtering(struct dsa_switch *ds, int port, return 0; } -static int -mt7530_port_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) -{ - /* nothing needed */ - - return 0; -} - static void mt7530_hw_vlan_add(struct mt7530_priv *priv, struct mt7530_hw_vlan_entry *entry) @@ -1489,7 +1480,7 @@ mt7530_hw_vlan_update(struct mt7530_priv *priv, u16 vid, mt7530_vlan_cmd(priv, MT7530_VTCR_WR_VID, vid); } -static void +static int mt7530_port_vlan_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { @@ -1510,6 +1501,8 @@ mt7530_port_vlan_add(struct dsa_switch *ds, int port, } mutex_unlock(&priv->reg_mutex); + + return 0; } static int @@ -2608,7 +2601,6 @@ static const struct dsa_switch_ops mt7530_switch_ops = { .port_fdb_del = mt7530_port_fdb_del, .port_fdb_dump = mt7530_port_fdb_dump, .port_vlan_filtering = mt7530_port_vlan_filtering, - .port_vlan_prepare = mt7530_port_vlan_prepare, .port_vlan_add = mt7530_port_vlan_add, .port_vlan_del = mt7530_port_vlan_del, .port_mirror_add = mt753x_port_mirror_add, diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index e9c517c0f89c3..4aa7d0a8f1977 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1617,9 +1617,6 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, err = mv88e6xxx_port_check_hw_vlan(ds, port, vlan->vid); mv88e6xxx_reg_unlock(chip); - /* We don't need any dynamic resource from the kernel (yet), - * so skip the prepare phase. - */ return err; } @@ -1963,17 +1960,19 @@ static int mv88e6xxx_port_vlan_join(struct mv88e6xxx_chip *chip, int port, return 0; } -static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +static int mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) { struct mv88e6xxx_chip *chip = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; bool warn; u8 member; + int err; - if (!mv88e6xxx_max_vid(chip)) - return; + err = mv88e6xxx_port_vlan_prepare(ds, port, vlan); + if (err) + return err; if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) member = MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_UNMODIFIED; @@ -1989,15 +1988,25 @@ static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, mv88e6xxx_reg_lock(chip); - if (mv88e6xxx_port_vlan_join(chip, port, vlan->vid, member, warn)) + err = mv88e6xxx_port_vlan_join(chip, port, vlan->vid, member, warn); + if (err) { dev_err(ds->dev, "p%d: failed to add VLAN %d%c\n", port, vlan->vid, untagged ? 'u' : 't'); + goto out; + } - if (pvid && mv88e6xxx_port_set_pvid(chip, port, vlan->vid)) - dev_err(ds->dev, "p%d: failed to set PVID %d\n", port, - vlan->vid); - + if (pvid) { + err = mv88e6xxx_port_set_pvid(chip, port, vlan->vid); + if (err) { + dev_err(ds->dev, "p%d: failed to set PVID %d\n", + port, vlan->vid); + goto out; + } + } +out: mv88e6xxx_reg_unlock(chip); + + return err; } static int mv88e6xxx_port_vlan_leave(struct mv88e6xxx_chip *chip, @@ -5388,7 +5397,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .port_stp_state_set = mv88e6xxx_port_stp_state_set, .port_fast_age = mv88e6xxx_port_fast_age, .port_vlan_filtering = mv88e6xxx_port_vlan_filtering, - .port_vlan_prepare = mv88e6xxx_port_vlan_prepare, .port_vlan_add = mv88e6xxx_port_vlan_add, .port_vlan_del = mv88e6xxx_port_vlan_del, .port_fdb_add = mv88e6xxx_port_fdb_add, diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 2825dd11feeea..768a74dc462af 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -134,15 +134,20 @@ static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled) return ocelot_port_vlan_filtering(ocelot, port, enabled); } -static void felix_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +static int felix_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) { struct ocelot *ocelot = ds->priv; u16 flags = vlan->flags; + int err; + + err = felix_vlan_prepare(ds, port, vlan); + if (err) + return err; - ocelot_vlan_add(ocelot, port, vlan->vid, - flags & BRIDGE_VLAN_INFO_PVID, - flags & BRIDGE_VLAN_INFO_UNTAGGED); + return ocelot_vlan_add(ocelot, port, vlan->vid, + flags & BRIDGE_VLAN_INFO_PVID, + flags & BRIDGE_VLAN_INFO_UNTAGGED); } static int felix_vlan_del(struct dsa_switch *ds, int port, @@ -770,7 +775,6 @@ const struct dsa_switch_ops felix_switch_ops = { .port_bridge_join = felix_bridge_join, .port_bridge_leave = felix_bridge_leave, .port_stp_state_set = felix_bridge_stp_state_set, - .port_vlan_prepare = felix_vlan_prepare, .port_vlan_filtering = felix_vlan_filtering, .port_vlan_add = felix_vlan_add, .port_vlan_del = felix_vlan_del, diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 1de6473b221bd..f54e8b6c86215 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1312,13 +1312,6 @@ qca8k_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) } static int -qca8k_port_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) -{ - return 0; -} - -static void qca8k_port_vlan_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { @@ -1328,8 +1321,10 @@ qca8k_port_vlan_add(struct dsa_switch *ds, int port, int ret = 0; ret = qca8k_vlan_add(priv, port, vlan->vid, untagged); - if (ret) + if (ret) { dev_err(priv->dev, "Failed to add VLAN to port %d (%d)", port, ret); + return ret; + } if (pvid) { int shift = 16 * (port % 2); @@ -1340,6 +1335,8 @@ qca8k_port_vlan_add(struct dsa_switch *ds, int port, QCA8K_PORT_VLAN_CVID(vlan->vid) | QCA8K_PORT_VLAN_SVID(vlan->vid)); } + + return 0; } static int @@ -1382,7 +1379,6 @@ static const struct dsa_switch_ops qca8k_switch_ops = { .port_fdb_del = qca8k_port_fdb_del, .port_fdb_dump = qca8k_port_fdb_dump, .port_vlan_filtering = qca8k_port_vlan_filtering, - .port_vlan_prepare = qca8k_port_vlan_prepare, .port_vlan_add = qca8k_port_vlan_add, .port_vlan_del = qca8k_port_vlan_del, .phylink_validate = qca8k_phylink_validate, diff --git a/drivers/net/dsa/realtek-smi-core.h b/drivers/net/dsa/realtek-smi-core.h index bc7bd47fb0375..26376b052594a 100644 --- a/drivers/net/dsa/realtek-smi-core.h +++ b/drivers/net/dsa/realtek-smi-core.h @@ -132,10 +132,8 @@ int rtl8366_reset_vlan(struct realtek_smi *smi); int rtl8366_init_vlan(struct realtek_smi *smi); int rtl8366_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering); -int rtl8366_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan); -void rtl8366_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan); +int rtl8366_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan); int rtl8366_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); void rtl8366_get_strings(struct dsa_switch *ds, int port, u32 stringset, diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/rtl8366.c index 27f429aa89a6a..3b24f2e13200a 100644 --- a/drivers/net/dsa/rtl8366.c +++ b/drivers/net/dsa/rtl8366.c @@ -374,36 +374,26 @@ int rtl8366_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) } EXPORT_SYMBOL_GPL(rtl8366_vlan_filtering); -int rtl8366_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +int rtl8366_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) { + bool untagged = !!(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED); + bool pvid = !!(vlan->flags & BRIDGE_VLAN_INFO_PVID); struct realtek_smi *smi = ds->priv; + u32 member = 0; + u32 untag = 0; + int ret; if (!smi->ops->is_vlan_valid(smi, vlan->vid)) return -EINVAL; - dev_info(smi->dev, "prepare VLAN %04x\n", vlan->vid); - /* Enable VLAN in the hardware * FIXME: what's with this 4k business? * Just rtl8366_enable_vlan() seems inconclusive. */ - return rtl8366_enable_vlan4k(smi, true); -} -EXPORT_SYMBOL_GPL(rtl8366_vlan_prepare); - -void rtl8366_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) -{ - bool untagged = !!(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED); - bool pvid = !!(vlan->flags & BRIDGE_VLAN_INFO_PVID); - struct realtek_smi *smi = ds->priv; - u32 member = 0; - u32 untag = 0; - int ret; - - if (!smi->ops->is_vlan_valid(smi, vlan->vid)) - return; + ret = rtl8366_enable_vlan4k(smi, true); + if (ret) + return ret; dev_info(smi->dev, "add VLAN %d on port %d, %s, %s\n", vlan->vid, port, untagged ? "untagged" : "tagged", @@ -418,20 +408,22 @@ void rtl8366_vlan_add(struct dsa_switch *ds, int port, untag |= BIT(port); ret = rtl8366_set_vlan(smi, vlan->vid, member, untag, 0); - if (ret) + if (ret) { dev_err(smi->dev, "failed to set up VLAN %04x", vlan->vid); + return ret; + } if (!pvid) - return; + return 0; ret = rtl8366_set_pvid(smi, port, vlan->vid); - if (ret) + if (ret) { dev_err(smi->dev, "failed to set PVID on port %d to VLAN %04x", port, vlan->vid); + return ret; + } - if (!ret) - dev_dbg(smi->dev, "VLAN add: added VLAN %d with PVID on port %d\n", - vlan->vid, port); + return 0; } EXPORT_SYMBOL_GPL(rtl8366_vlan_add); diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/rtl8366rb.c index cfe56960f44b9..8969785687167 100644 --- a/drivers/net/dsa/rtl8366rb.c +++ b/drivers/net/dsa/rtl8366rb.c @@ -1504,7 +1504,6 @@ static const struct dsa_switch_ops rtl8366rb_switch_ops = { .get_ethtool_stats = rtl8366_get_ethtool_stats, .get_sset_count = rtl8366_get_sset_count, .port_vlan_filtering = rtl8366_vlan_filtering, - .port_vlan_prepare = rtl8366_vlan_prepare, .port_vlan_add = rtl8366_vlan_add, .port_vlan_del = rtl8366_vlan_del, .port_enable = rtl8366rb_port_enable, diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index be200d4289af7..050b1260f358b 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2600,26 +2600,6 @@ out: return rc; } -static int sja1105_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) -{ - struct sja1105_private *priv = ds->priv; - - if (priv->vlan_state == SJA1105_VLAN_FILTERING_FULL) - return 0; - - /* If the user wants best-effort VLAN filtering (aka vlan_filtering - * bridge plus tagging), be sure to at least deny alterations to the - * configuration done by dsa_8021q. - */ - if (vid_is_dsa_8021q(vlan->vid)) { - dev_err(ds->dev, "Range 1024-3071 reserved for dsa_8021q operation\n"); - return -EBUSY; - } - - return 0; -} - /* The TPID setting belongs to the General Parameters table, * which can only be partially reconfigured at runtime (and not the TPID). * So a switch reset is required. @@ -2779,26 +2759,34 @@ static int sja1105_vlan_del_one(struct dsa_switch *ds, int port, u16 vid, return 0; } -static void sja1105_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) +static int sja1105_vlan_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_vlan *vlan) { struct sja1105_private *priv = ds->priv; bool vlan_table_changed = false; int rc; + /* If the user wants best-effort VLAN filtering (aka vlan_filtering + * bridge plus tagging), be sure to at least deny alterations to the + * configuration done by dsa_8021q. + */ + if (priv->vlan_state != SJA1105_VLAN_FILTERING_FULL && + vid_is_dsa_8021q(vlan->vid)) { + dev_err(ds->dev, "Range 1024-3071 reserved for dsa_8021q operation\n"); + return -EBUSY; + } + rc = sja1105_vlan_add_one(ds, port, vlan->vid, vlan->flags, &priv->bridge_vlans); if (rc < 0) - return; + return rc; if (rc > 0) vlan_table_changed = true; if (!vlan_table_changed) - return; + return 0; - rc = sja1105_build_vlan_table(priv, true); - if (rc) - dev_err(ds->dev, "Failed to build VLAN table: %d\n", rc); + return sja1105_build_vlan_table(priv, true); } static int sja1105_vlan_del(struct dsa_switch *ds, int port, @@ -3277,7 +3265,6 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .port_bridge_join = sja1105_bridge_join, .port_bridge_leave = sja1105_bridge_leave, .port_stp_state_set = sja1105_bridge_stp_state_set, - .port_vlan_prepare = sja1105_vlan_prepare, .port_vlan_filtering = sja1105_vlan_filtering, .port_vlan_add = sja1105_vlan_add, .port_vlan_del = sja1105_vlan_del, diff --git a/include/net/dsa.h b/include/net/dsa.h index b8c0550dfa746..c9a3dd7588dfe 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -566,10 +566,8 @@ struct dsa_switch_ops { */ int (*port_vlan_filtering)(struct dsa_switch *ds, int port, bool vlan_filtering); - int (*port_vlan_prepare)(struct dsa_switch *ds, int port, + int (*port_vlan_add)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); - void (*port_vlan_add)(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan); int (*port_vlan_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); /* diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 5f5e19c5e43ae..f92eaacb17cfe 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -227,21 +227,17 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds, { int port, err; - if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) + if (!ds->ops->port_vlan_add) return -EOPNOTSUPP; for (port = 0; port < ds->num_ports; port++) { if (dsa_switch_vlan_match(ds, port, info)) { - err = ds->ops->port_vlan_prepare(ds, port, info->vlan); + err = ds->ops->port_vlan_add(ds, port, info->vlan); if (err) return err; } } - for (port = 0; port < ds->num_ports; port++) - if (dsa_switch_vlan_match(ds, port, info)) - ds->ops->port_vlan_add(ds, port, info->vlan); - return 0; } -- GitLab From 417b99bf75c329e3d13555c720e621158c164b71 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 9 Jan 2021 02:01:54 +0200 Subject: [PATCH 0494/2012] net: dsa: remove obsolete comments about switchdev transactions Now that all port object notifiers were converted to be non-transactional, we can remove the comments that say otherwise. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Acked-by: Linus Walleij Acked-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- net/dsa/slave.c | 5 ----- net/dsa/switch.c | 4 ---- 2 files changed, 9 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 7ba505825db2f..5d7f6cada6a81 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -373,11 +373,6 @@ static int dsa_slave_port_obj_add(struct net_device *dev, struct dsa_port *dp = dsa_slave_to_port(dev); int err; - /* For the prepare phase, ensure the full set of changes is feasable in - * one go in order to signal a failure properly. If an operation is not - * supported, return -EOPNOTSUPP. - */ - switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: if (obj->orig_dev != dev) diff --git a/net/dsa/switch.c b/net/dsa/switch.c index f92eaacb17cfe..21d2f842d068f 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -298,10 +298,6 @@ static int dsa_switch_event(struct notifier_block *nb, break; } - /* Non-switchdev operations cannot be rolled back. If a DSA driver - * returns an error during the chained call, switch chips may be in an - * inconsistent state. - */ if (err) dev_dbg(ds->dev, "breaking chain for DSA event %lu (%d)\n", event, err); -- GitLab From 4b400fea76e13aec5a5752c1463b74df95178ced Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 9 Jan 2021 02:01:55 +0200 Subject: [PATCH 0495/2012] mlxsw: spectrum_switchdev: remove transactional logic for VLAN objects As of commit 457e20d65924 ("mlxsw: spectrum_switchdev: Avoid returning errors in commit phase"), the mlxsw driver performs the VLAN object offloading during the prepare phase. So conversion just seems to be a matter of removing the code that was running in the commit phase. Ido Schimmel explains that the reason why mlxsw_sp_span_respin is called unconditionally is because the bridge driver will ignore -EOPNOTSUPP and actually add the VLAN on the bridge device - see commit 9c86ce2c1ae3 ("net: bridge: Notify about bridge VLANs") and commit ea4721751977 ("mlxsw: spectrum_switchdev: Ignore bridge VLAN events"). Since the VLAN was successfully added on the bridge device, mlxsw_sp_span_respin_work() should be able to resolve the egress port for a packet that is mirrored to a gre tap and passes through the bridge device. Therefore keep the logic as it is. Signed-off-by: Vladimir Oltean Acked-by: Linus Walleij Acked-by: Jiri Pirko Reviewed-by: Ido Schimmel Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- .../mellanox/mlxsw/spectrum_switchdev.c | 37 ++----------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 409c206f813fc..20c4f3c2cf23f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1196,7 +1196,6 @@ mlxsw_sp_br_ban_rif_pvid_change(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans, struct netlink_ext_ack *extack) { bool flag_untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; @@ -1209,8 +1208,7 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, int err = 0; if ((vlan->flags & BRIDGE_VLAN_INFO_BRENTRY) && - br_vlan_enabled(orig_dev) && - switchdev_trans_ph_prepare(trans)) + br_vlan_enabled(orig_dev)) err = mlxsw_sp_br_ban_rif_pvid_change(mlxsw_sp, orig_dev, vlan); if (!err) @@ -1218,9 +1216,6 @@ static int mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, return err; } - if (switchdev_trans_ph_commit(trans)) - return 0; - bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); if (WARN_ON(!bridge_port)) return -EINVAL; @@ -1764,16 +1759,13 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); const struct switchdev_obj_port_vlan *vlan; - struct switchdev_trans trans; int err = 0; switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); - trans.ph_prepare = true; - err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan, &trans, - extack); + err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan, extack); /* The event is emitted before the changes are actually * applied to the bridge. Therefore schedule the respin @@ -1781,13 +1773,6 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, * updated bridge state. */ mlxsw_sp_span_respin(mlxsw_sp_port->mlxsw_sp); - - if (err) - break; - - trans.ph_prepare = false; - err = mlxsw_sp_port_vlans_add(mlxsw_sp_port, vlan, &trans, - extack); break; case SWITCHDEV_OBJ_ID_PORT_MDB: err = mlxsw_sp_port_mdb_add(mlxsw_sp_port, @@ -3351,8 +3336,7 @@ out: static int mlxsw_sp_switchdev_vxlan_vlans_add(struct net_device *vxlan_dev, struct switchdev_notifier_port_obj_info * - port_obj_info, - struct switchdev_trans *trans) + port_obj_info) { struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(port_obj_info->obj); @@ -3374,9 +3358,6 @@ mlxsw_sp_switchdev_vxlan_vlans_add(struct net_device *vxlan_dev, port_obj_info->handled = true; - if (switchdev_trans_ph_commit(trans)) - return 0; - bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); if (!bridge_device) return -EINVAL; @@ -3427,22 +3408,12 @@ mlxsw_sp_switchdev_handle_vxlan_obj_add(struct net_device *vxlan_dev, struct switchdev_notifier_port_obj_info * port_obj_info) { - struct switchdev_trans trans; int err = 0; switch (port_obj_info->obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: - trans.ph_prepare = true; - err = mlxsw_sp_switchdev_vxlan_vlans_add(vxlan_dev, - port_obj_info, - &trans); - if (err) - break; - - trans.ph_prepare = false; err = mlxsw_sp_switchdev_vxlan_vlans_add(vxlan_dev, - port_obj_info, - &trans); + port_obj_info); break; default: break; -- GitLab From 8f73cc50ba2dd5a5749d3670e453c3864258b892 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 9 Jan 2021 02:01:56 +0200 Subject: [PATCH 0496/2012] net: switchdev: delete the transaction object Now that all users of struct switchdev_trans have been modified to do without it, we can remove this structure and the two helpers to determine the phase. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Reviewed-by: Ido Schimmel Acked-by: Linus Walleij Acked-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/switchdev.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index f873e2c5e125a..88fcac1409667 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -16,20 +16,6 @@ #define SWITCHDEV_F_SKIP_EOPNOTSUPP BIT(1) #define SWITCHDEV_F_DEFER BIT(2) -struct switchdev_trans { - bool ph_prepare; -}; - -static inline bool switchdev_trans_ph_prepare(struct switchdev_trans *trans) -{ - return trans && trans->ph_prepare; -} - -static inline bool switchdev_trans_ph_commit(struct switchdev_trans *trans) -{ - return trans && !trans->ph_prepare; -} - enum switchdev_attr_id { SWITCHDEV_ATTR_ID_UNDEFINED, SWITCHDEV_ATTR_ID_PORT_STP_STATE, -- GitLab From 537e2b88224c53b2620a89cd41c3d0d77e8d4030 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 9 Jan 2021 22:34:15 +0200 Subject: [PATCH 0497/2012] net: dsa: felix: the switch does not support DMA The code that sets the DMA mask to 64 bits is bogus, it is taken from the enetc driver together with the rest of the PCI probing boilerplate. Since this patch is touching the error path to delete err_dma, let's also change the err_alloc_felix label which was incorrect. The kzalloc failure does not need a kfree, but it doesn't hurt either, since kfree works with NULL pointers. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20210109203415.2120142-1-olteanv@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix_vsc9959.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 2e5bbdca5ea47..a87597eef8cf3 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1408,17 +1408,6 @@ static int felix_pci_probe(struct pci_dev *pdev, goto err_pci_enable; } - /* set up for high or low dma */ - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (err) { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "DMA configuration failed: 0x%x\n", err); - goto err_dma; - } - } - felix = kzalloc(sizeof(struct felix), GFP_KERNEL); if (!felix) { err = -ENOMEM; @@ -1474,9 +1463,8 @@ err_register_ds: kfree(ds); err_alloc_ds: err_alloc_irq: -err_alloc_felix: kfree(felix); -err_dma: +err_alloc_felix: pci_disable_device(pdev); err_pci_enable: return err; -- GitLab From 624407d2cf14ff58e53bf4b2af9595c4f21d606e Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 10 Jan 2021 10:58:32 +0000 Subject: [PATCH 0498/2012] net: sfp: cope with SFPs that set both LOS normal and LOS inverted The SFP MSA defines two option bits in byte 65 to indicate how the Rx_LOS signal on SFP pin 8 behaves: bit 2 - Loss of Signal implemented, signal inverted from standard definition in SFP MSA (often called "Signal Detect"). bit 1 - Loss of Signal implemented, signal as defined in SFP MSA (often called "Rx_LOS"). Clearly, setting both bits results in a meaningless situation: it would mean that LOS is implemented in both the normal sense (1 = signal loss) and inverted sense (0 = signal loss). Unfortunately, there are modules out there which set both bits, which will be initially interpret as "inverted" sense, and then, if the LOS signal changes state, we will toggle between LINK_UP and WAIT_LOS states. Change our LOS handling to give well defined behaviour: only interpret these bits as meaningful if exactly one is set, otherwise treat it as if LOS is not implemented. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/E1kyYQa-0004iR-CU@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 91d74c1a920ab..55e6a2fc5bc69 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -1482,15 +1482,19 @@ static void sfp_sm_link_down(struct sfp *sfp) static void sfp_sm_link_check_los(struct sfp *sfp) { - unsigned int los = sfp->state & SFP_F_LOS; + const __be16 los_inverted = cpu_to_be16(SFP_OPTIONS_LOS_INVERTED); + const __be16 los_normal = cpu_to_be16(SFP_OPTIONS_LOS_NORMAL); + __be16 los_options = sfp->id.ext.options & (los_inverted | los_normal); + bool los = false; /* If neither SFP_OPTIONS_LOS_INVERTED nor SFP_OPTIONS_LOS_NORMAL - * are set, we assume that no LOS signal is available. + * are set, we assume that no LOS signal is available. If both are + * set, we assume LOS is not implemented (and is meaningless.) */ - if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED)) - los ^= SFP_F_LOS; - else if (!(sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL))) - los = 0; + if (los_options == los_inverted) + los = !(sfp->state & SFP_F_LOS); + else if (los_options == los_normal) + los = !!(sfp->state & SFP_F_LOS); if (los) sfp_sm_next(sfp, SFP_S_WAIT_LOS, 0); @@ -1500,18 +1504,22 @@ static void sfp_sm_link_check_los(struct sfp *sfp) static bool sfp_los_event_active(struct sfp *sfp, unsigned int event) { - return (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) && - event == SFP_E_LOS_LOW) || - (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL) && - event == SFP_E_LOS_HIGH); + const __be16 los_inverted = cpu_to_be16(SFP_OPTIONS_LOS_INVERTED); + const __be16 los_normal = cpu_to_be16(SFP_OPTIONS_LOS_NORMAL); + __be16 los_options = sfp->id.ext.options & (los_inverted | los_normal); + + return (los_options == los_inverted && event == SFP_E_LOS_LOW) || + (los_options == los_normal && event == SFP_E_LOS_HIGH); } static bool sfp_los_event_inactive(struct sfp *sfp, unsigned int event) { - return (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_INVERTED) && - event == SFP_E_LOS_HIGH) || - (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_LOS_NORMAL) && - event == SFP_E_LOS_LOW); + const __be16 los_inverted = cpu_to_be16(SFP_OPTIONS_LOS_INVERTED); + const __be16 los_normal = cpu_to_be16(SFP_OPTIONS_LOS_NORMAL); + __be16 los_options = sfp->id.ext.options & (los_inverted | los_normal); + + return (los_options == los_inverted && event == SFP_E_LOS_HIGH) || + (los_options == los_normal && event == SFP_E_LOS_LOW); } static void sfp_sm_fault(struct sfp *sfp, unsigned int next_state, bool warn) -- GitLab From a006dbf06e5df64adf08996857a57f84577ddbcc Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 10 Jan 2021 10:58:37 +0000 Subject: [PATCH 0499/2012] net: sfp: extend bitrate-derived mode for 2500BASE-X Extend the bitrate-derived support to include 2500BASE-X for modules that report a bitrate of 2500Mbaud. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/E1kyYQf-0004iY-Gh@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp-bus.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 20b91f5dfc6ed..cdfa0a190962e 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -337,11 +337,16 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, * the bitrate to determine supported modes. Some BiDi modules (eg, * 1310nm/1550nm) are not 1000BASE-BX compliant due to the differing * wavelengths, so do not set any transceiver bits. + * + * Do the same for modules supporting 2500BASE-X. Note that some + * modules use 2500Mbaud rather than 3100 or 3200Mbaud for + * 2500BASE-X, so we allow some slack here. */ - if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) { - /* If the bit rate allows 1000baseX */ - if (br_nom && br_min <= 1300 && br_max >= 1200) + if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS) && br_nom) { + if (br_min <= 1300 && br_max >= 1200) phylink_set(modes, 1000baseX_Full); + if (br_min <= 3200 && br_max >= 2500) + phylink_set(modes, 2500baseX_Full); } if (bus->sfp_quirk) -- GitLab From a45c1c10ebf28b1079d621d069f183157e496c63 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 10 Jan 2021 14:54:36 +0000 Subject: [PATCH 0500/2012] net: phy: at803x: use phy_modify_mmd() Convert at803x_clk_out_config() to use phy_modify_mmd(). Signed-off-by: Russell King Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/E1kyc72-0008Pq-1x@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/at803x.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index d0b36fd6c2655..9636edb8d6187 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -529,19 +529,12 @@ static void at803x_remove(struct phy_device *phydev) static int at803x_clk_out_config(struct phy_device *phydev) { struct at803x_priv *priv = phydev->priv; - int val; if (!priv->clk_25m_mask) return 0; - val = phy_read_mmd(phydev, MDIO_MMD_AN, AT803X_MMD7_CLK25M); - if (val < 0) - return val; - - val &= ~priv->clk_25m_mask; - val |= priv->clk_25m_reg; - - return phy_write_mmd(phydev, MDIO_MMD_AN, AT803X_MMD7_CLK25M, val); + return phy_modify_mmd(phydev, MDIO_MMD_AN, AT803X_MMD7_CLK25M, + priv->clk_25m_mask, priv->clk_25m_reg); } static int at8031_pll_config(struct phy_device *phydev) -- GitLab From 22fe6b04b460feaef00ab4bc5b7420762b73f95f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 8 Jan 2021 21:06:22 -0800 Subject: [PATCH 0501/2012] net: marvell: prestera: Correct typo The function was incorrectly named with a trailing 'r' at the end of prestera. Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20210109050622.8081-1-f.fainelli@gmail.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/prestera/prestera_switchdev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index e2374a39e4f8a..beb6447fbe407 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -652,9 +652,9 @@ static int prestera_port_bridge_vlan_stp_set(struct prestera_port *port, return 0; } -static int presterar_port_attr_stp_state_set(struct prestera_port *port, - struct net_device *dev, - u8 state) +static int prestera_port_attr_stp_state_set(struct prestera_port *port, + struct net_device *dev, + u8 state) { struct prestera_bridge_port *br_port; struct prestera_bridge_vlan *br_vlan; @@ -702,8 +702,8 @@ static int prestera_port_obj_attr_set(struct net_device *dev, switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: - err = presterar_port_attr_stp_state_set(port, attr->orig_dev, - attr->u.stp_state); + err = prestera_port_attr_stp_state_set(port, attr->orig_dev, + attr->u.stp_state); break; case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: if (attr->u.brport_flags & -- GitLab From 2007317e15cd573c248e8deed30b5c0d622129bc Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 9 Jan 2021 23:00:04 +0100 Subject: [PATCH 0502/2012] r8169: align RTL8168e jumbo pcie read request size with vendor driver Align behavior with r8168 vendor driver and don't reduce max read request size for RTL8168e in jumbo mode. Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index ddcf4870f0256..c4c2647c54e73 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2368,12 +2368,10 @@ static void rtl_jumbo_config(struct rtl8169_private *tp) r8168dp_hw_jumbo_disable(tp); break; case RTL_GIGA_MAC_VER_31 ... RTL_GIGA_MAC_VER_33: - if (jumbo) { - pcie_set_readrq(tp->pci_dev, 512); + if (jumbo) r8168e_hw_jumbo_enable(tp); - } else { + else r8168e_hw_jumbo_disable(tp); - } break; default: break; -- GitLab From 5e00e16cb98935bcf06f51931876d898c226f65c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 9 Jan 2021 23:01:18 +0100 Subject: [PATCH 0503/2012] r8169: tweak max read request size for newer chips also in jumbo mtu mode So far we don't increase the max read request size if we switch to jumbo mode before bringing up the interface for the first time. Let's change this. Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index c4c2647c54e73..bfcc64db88032 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2341,13 +2341,14 @@ static void r8168b_1_hw_jumbo_disable(struct rtl8169_private *tp) static void rtl_jumbo_config(struct rtl8169_private *tp) { bool jumbo = tp->dev->mtu > ETH_DATA_LEN; + int readrq = 4096; rtl_unlock_config_regs(tp); switch (tp->mac_version) { case RTL_GIGA_MAC_VER_12: case RTL_GIGA_MAC_VER_17: if (jumbo) { - pcie_set_readrq(tp->pci_dev, 512); + readrq = 512; r8168b_1_hw_jumbo_enable(tp); } else { r8168b_1_hw_jumbo_disable(tp); @@ -2355,7 +2356,7 @@ static void rtl_jumbo_config(struct rtl8169_private *tp) break; case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_26: if (jumbo) { - pcie_set_readrq(tp->pci_dev, 512); + readrq = 512; r8168c_hw_jumbo_enable(tp); } else { r8168c_hw_jumbo_disable(tp); @@ -2378,8 +2379,8 @@ static void rtl_jumbo_config(struct rtl8169_private *tp) } rtl_lock_config_regs(tp); - if (!jumbo && pci_is_pcie(tp->pci_dev) && tp->supports_gmii) - pcie_set_readrq(tp->pci_dev, 4096); + if (pci_is_pcie(tp->pci_dev) && tp->supports_gmii) + pcie_set_readrq(tp->pci_dev, readrq); } DECLARE_RTL_COND(rtl_chipcmd_cond) -- GitLab From ae1e82c6b74191e6119d39f26aa3248af74d867e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 9 Jan 2021 23:53:26 +0100 Subject: [PATCH 0504/2012] r8169: make use of the unaligned access helpers Instead of open-coding unaligned access let's use the predefined unaligned access helpers. Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/cfaf9176-e4f9-c32d-4d4d-e8fb52009f35@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 35 +++++++++-------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index bfcc64db88032..dbf0c2909d911 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "r8169.h" @@ -2104,18 +2105,12 @@ static void rtl8125b_config_eee_mac(struct rtl8169_private *tp) r8168_mac_ocp_modify(tp, 0xe040, 0, BIT(1) | BIT(0)); } -static void rtl_rar_exgmac_set(struct rtl8169_private *tp, u8 *addr) +static void rtl_rar_exgmac_set(struct rtl8169_private *tp, const u8 *addr) { - const u16 w[] = { - addr[0] | (addr[1] << 8), - addr[2] | (addr[3] << 8), - addr[4] | (addr[5] << 8) - }; - - rtl_eri_write(tp, 0xe0, ERIAR_MASK_1111, w[0] | (w[1] << 16)); - rtl_eri_write(tp, 0xe4, ERIAR_MASK_1111, w[2]); - rtl_eri_write(tp, 0xf0, ERIAR_MASK_1111, w[0] << 16); - rtl_eri_write(tp, 0xf4, ERIAR_MASK_1111, w[1] | (w[2] << 16)); + rtl_eri_write(tp, 0xe0, ERIAR_MASK_1111, get_unaligned_le32(addr)); + rtl_eri_write(tp, 0xe4, ERIAR_MASK_1111, get_unaligned_le16(addr + 4)); + rtl_eri_write(tp, 0xf0, ERIAR_MASK_1111, get_unaligned_le16(addr) << 16); + rtl_eri_write(tp, 0xf4, ERIAR_MASK_1111, get_unaligned_le32(addr + 2)); } u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp) @@ -2165,14 +2160,14 @@ static void rtl8169_init_phy(struct rtl8169_private *tp) genphy_soft_reset(tp->phydev); } -static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr) +static void rtl_rar_set(struct rtl8169_private *tp, const u8 *addr) { rtl_unlock_config_regs(tp); - RTL_W32(tp, MAC4, addr[4] | addr[5] << 8); + RTL_W32(tp, MAC4, get_unaligned_le16(addr + 4)); rtl_pci_commit(tp); - RTL_W32(tp, MAC0, addr[0] | addr[1] << 8 | addr[2] << 16 | addr[3] << 24); + RTL_W32(tp, MAC0, get_unaligned_le32(addr)); rtl_pci_commit(tp); if (tp->mac_version == RTL_GIGA_MAC_VER_34) @@ -4977,16 +4972,12 @@ static void rtl_read_mac_address(struct rtl8169_private *tp, { /* Get MAC address */ if (rtl_is_8168evl_up(tp) && tp->mac_version != RTL_GIGA_MAC_VER_34) { - u32 value = rtl_eri_read(tp, 0xe0); - - mac_addr[0] = (value >> 0) & 0xff; - mac_addr[1] = (value >> 8) & 0xff; - mac_addr[2] = (value >> 16) & 0xff; - mac_addr[3] = (value >> 24) & 0xff; + u32 value; + value = rtl_eri_read(tp, 0xe0); + put_unaligned_le32(value, mac_addr); value = rtl_eri_read(tp, 0xe4); - mac_addr[4] = (value >> 0) & 0xff; - mac_addr[5] = (value >> 8) & 0xff; + put_unaligned_le16(value, mac_addr + 4); } else if (rtl_is_8125(tp)) { rtl_read_mac_from_reg(tp, mac_addr, MAC0_BKP); } -- GitLab From 6f83802a1a06e74eafbdbc9b52c05516d3083d02 Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Sun, 10 Jan 2021 21:23:02 +0200 Subject: [PATCH 0505/2012] net: mvpp2: Remove Pause and Asym_Pause support Packet Processor hardware not connected to MAC flow control unit and cannot support TX flow control. This patch disable flow control support. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Stefan Chulski Acked-by: Marcin Wojtas Link: https://lore.kernel.org/r/1610306582-16641-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 4b1808acef581..358119d983582 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -5882,8 +5882,6 @@ static void mvpp2_phylink_validate(struct phylink_config *config, phylink_set(mask, Autoneg); phylink_set_port_modes(mask); - phylink_set(mask, Pause); - phylink_set(mask, Asym_Pause); switch (state->interface) { case PHY_INTERFACE_MODE_10GBASER: -- GitLab From 9224d97183d9b1e773f11b1c0d8065023ae3c260 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 10 Jan 2021 20:48:40 +0100 Subject: [PATCH 0506/2012] r8169: enable PLL power-down for chip versions 34, 35, 36, 42 There's no known reason why PLL powerdown on D3 shouldn't be enabled on chip versions 34, 35, 36, and 42. At least the vendor driver doesn't exclude any of these chip versions. Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index dbf0c2909d911..9c87fb9f1209c 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2226,10 +2226,8 @@ static void rtl_pll_power_down(struct rtl8169_private *tp) switch (tp->mac_version) { case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: - case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33: - case RTL_GIGA_MAC_VER_37: - case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_41: - case RTL_GIGA_MAC_VER_43 ... RTL_GIGA_MAC_VER_63: + case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_37: + case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_63: RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80); break; default: @@ -2241,13 +2239,12 @@ static void rtl_pll_power_up(struct rtl8169_private *tp) { switch (tp->mac_version) { case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: - case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33: - case RTL_GIGA_MAC_VER_37: + case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_37: case RTL_GIGA_MAC_VER_39: case RTL_GIGA_MAC_VER_43: RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80); break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_41: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_42: case RTL_GIGA_MAC_VER_44 ... RTL_GIGA_MAC_VER_63: RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0); break; -- GitLab From 128735a1530ec92254ddc211cbc796f5fca3b501 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 10 Jan 2021 20:50:08 +0100 Subject: [PATCH 0507/2012] r8169: improve handling D3 PLL power-down Realtek provided a description of bits 6 and 7 in register PMCH. They configure whether the chip powers down certain PLL in D3hot and D3cold respectively. They do not actually power down the PLL. Reflect this in the code and configure D3 PLL powerdown based on whether WOL is enabled. Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 51 ++++++++++------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 9c87fb9f1209c..64fdc168fc693 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -261,6 +261,9 @@ enum rtl8168_8101_registers { #define CSIAR_BYTE_ENABLE 0x0000f000 #define CSIAR_ADDR_MASK 0x00000fff PMCH = 0x6f, +#define D3COLD_NO_PLL_DOWN BIT(7) +#define D3HOT_NO_PLL_DOWN BIT(6) +#define D3_NO_PLL_DOWN (BIT(7) | BIT(6)) EPHYAR = 0x80, #define EPHYAR_FLAG 0x80000000 #define EPHYAR_WRITE_CMD 0x80000000 @@ -1250,6 +1253,22 @@ static bool r8168_check_dash(struct rtl8169_private *tp) } } +static void rtl_set_d3_pll_down(struct rtl8169_private *tp, bool enable) +{ + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: + case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_37: + case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_63: + if (enable) + RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~D3_NO_PLL_DOWN); + else + RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | D3_NO_PLL_DOWN); + break; + default: + break; + } +} + static void rtl_reset_packet_filter(struct rtl8169_private *tp) { rtl_eri_clear_bits(tp, 0xdc, BIT(0)); @@ -1416,6 +1435,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) rtl_lock_config_regs(tp); device_set_wakeup_enable(tp_to_dev(tp), wolopts); + rtl_set_d3_pll_down(tp, !wolopts); tp->dev->wol_enabled = wolopts ? 1 : 0; } @@ -2221,37 +2241,11 @@ static void rtl_pll_power_down(struct rtl8169_private *tp) if (device_may_wakeup(tp_to_dev(tp))) { phy_speed_down(tp->phydev, false); rtl_wol_suspend_quirk(tp); - return; - } - - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: - case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_37: - case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_63: - RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80); - break; - default: - break; } } static void rtl_pll_power_up(struct rtl8169_private *tp) { - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: - case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_37: - case RTL_GIGA_MAC_VER_39: - case RTL_GIGA_MAC_VER_43: - RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0x80); - break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_42: - case RTL_GIGA_MAC_VER_44 ... RTL_GIGA_MAC_VER_63: - RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0); - break; - default: - break; - } - phy_resume(tp->phydev); } @@ -5330,6 +5324,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* configure chip for default features */ rtl8169_set_features(dev, dev->features); + rtl_set_d3_pll_down(tp, true); + jumbo_max = rtl_jumbo_max(tp); if (jumbo_max) dev->max_mtu = jumbo_max; @@ -5350,9 +5346,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) return rc; - /* chip gets powered up in rtl_open() */ - rtl_pll_power_down(tp); - rc = register_netdev(dev); if (rc) return rc; -- GitLab From 7257c977c811237fbec8a8a173f44a139c32a506 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 10 Jan 2021 20:50:57 +0100 Subject: [PATCH 0508/2012] r8169: clean up rtl_pll_power_down/up functions Clean up the remainings of rtl_pll_power_down/up and rename rtl_pll_power_down() to rtl_prepare_power_down(). Signed-off-by: Heiner Kallweit Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 64fdc168fc693..33336098b1e56 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2229,7 +2229,7 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp) } } -static void rtl_pll_power_down(struct rtl8169_private *tp) +static void rtl_prepare_power_down(struct rtl8169_private *tp) { if (r8168_check_dash(tp)) return; @@ -2244,11 +2244,6 @@ static void rtl_pll_power_down(struct rtl8169_private *tp) } } -static void rtl_pll_power_up(struct rtl8169_private *tp) -{ - phy_resume(tp->phydev); -} - static void rtl_init_rxcfg(struct rtl8169_private *tp) { switch (tp->mac_version) { @@ -4604,12 +4599,12 @@ static void rtl8169_down(struct rtl8169_private *tp) rtl8169_cleanup(tp, true); - rtl_pll_power_down(tp); + rtl_prepare_power_down(tp); } static void rtl8169_up(struct rtl8169_private *tp) { - rtl_pll_power_up(tp); + phy_resume(tp->phydev); rtl8169_init_phy(tp); napi_enable(&tp->napi); set_bit(RTL_FLAG_TASK_ENABLED, tp->wk.flags); -- GitLab From 1d04ccb916ce81b8313712bae8a2304c62769c1f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 10 Jan 2021 21:29:22 -0800 Subject: [PATCH 0509/2012] net: bareudp: simplify error paths calling dellink bareudp_dellink() only needs the device list to hand it to unregister_netdevice_queue(). We can pass NULL in, and unregister_netdevice_queue() will do the unregistering. There is no chance for batching on the error path, anyway. Suggested-by: Cong Wang Reviewed-by: Cong Wang Link: https://lore.kernel.org/r/20210111052922.2145003-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 85de5f96c02bb..0965d136def3a 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -658,7 +658,6 @@ static int bareudp_newlink(struct net *net, struct net_device *dev, struct netlink_ext_ack *extack) { struct bareudp_conf conf; - LIST_HEAD(list_kill); int err; err = bareudp2info(data, &conf, extack); @@ -676,8 +675,7 @@ static int bareudp_newlink(struct net *net, struct net_device *dev, return 0; err_unconfig: - bareudp_dellink(dev, &list_kill); - unregister_netdevice_many(&list_kill); + bareudp_dellink(dev, NULL); return err; } @@ -729,7 +727,6 @@ struct net_device *bareudp_dev_create(struct net *net, const char *name, { struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; - LIST_HEAD(list_kill); int err; memset(tb, 0, sizeof(tb)); @@ -753,8 +750,7 @@ struct net_device *bareudp_dev_create(struct net *net, const char *name, return dev; err: - bareudp_dellink(dev, &list_kill); - unregister_netdevice_many(&list_kill); + bareudp_dellink(dev, NULL); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(bareudp_dev_create); -- GitLab From ad0bfc233ae2e7ee3bcb9a6089e4aa54e2b44fa1 Mon Sep 17 00:00:00 2001 From: Danilo Carvalho Date: Fri, 8 Jan 2021 22:21:04 +0000 Subject: [PATCH 0510/2012] Fix whitespace in uapi/linux/tcp.h. List of things fixed: - Two of the socket options were idented with spaces instead of tabs. - Trailing whitespace in some lines. - Improper spacing around parenthesis caught by checkpatch.pl. - Mix of space and tabs in tcp_word_hdr union. Signed-off-by: Danilo Carvalho Link: https://lore.kernel.org/r/20210108222104.2079472-1-doak@google.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/tcp.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 13ceeb395eb8f..768e93bd5b511 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -51,7 +51,7 @@ struct tcphdr { fin:1; #else #error "Adjust your defines" -#endif +#endif __be16 window; __sum16 check; __be16 urg_ptr; @@ -62,14 +62,14 @@ struct tcphdr { * (union is compatible to any of its members) * This means this part of the code is -fstrict-aliasing safe now. */ -union tcp_word_hdr { +union tcp_word_hdr { struct tcphdr hdr; - __be32 words[5]; -}; + __be32 words[5]; +}; -#define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) +#define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3]) -enum { +enum { TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000), TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000), TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000), @@ -80,7 +80,7 @@ enum { TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000), TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000), TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000) -}; +}; /* * TCP general constants @@ -103,8 +103,8 @@ enum { #define TCP_QUICKACK 12 /* Block/reenable quick acks */ #define TCP_CONGESTION 13 /* Congestion control algorithm */ #define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ -#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ -#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ +#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ +#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ #define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ #define TCP_REPAIR 19 /* TCP sock is under repair right now */ #define TCP_REPAIR_QUEUE 20 -- GitLab From c73a45965dd54a10c368191804b9de661eee1007 Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Sun, 10 Jan 2021 16:30:59 +0200 Subject: [PATCH 0511/2012] net: mvpp2: prs: improve ipv4 parse flow Patch didn't fix any issue, just improve parse flow and align ipv4 parse flow with ipv6 parse flow. Currently ipv4 kenguru parser first check IP protocol(TCP/UDP) and then destination IP address. Patch introduce reverse ipv4 parse, first destination IP address parsed and only then IP protocol. This would allow extend capability for packet L4 parsing and align ipv4 parsing flow with ipv6. Suggested-by: Liron Himi Signed-off-by: Stefan Chulski Link: https://lore.kernel.org/r/1610289059-14962-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/mvpp2/mvpp2_prs.c | 64 +++++++++++-------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index a30eb90ba3d28..6ee53c52627f7 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -914,15 +914,15 @@ static int mvpp2_prs_ip4_proto(struct mvpp2 *priv, unsigned short proto, mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_IP4); pe.index = tid; - /* Set next lu to IPv4 */ - mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4); - mvpp2_prs_sram_shift_set(&pe, 12, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + /* Finished: go to flowid generation */ + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS); + mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1); + /* Set L4 offset */ mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4, sizeof(struct iphdr) - 4, MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); - mvpp2_prs_sram_ai_update(&pe, MVPP2_PRS_IPV4_DIP_AI_BIT, - MVPP2_PRS_IPV4_DIP_AI_BIT); + mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_IPV4_DIP_AI_BIT); mvpp2_prs_sram_ri_update(&pe, ri, ri_mask | MVPP2_PRS_RI_IP_FRAG_MASK); mvpp2_prs_tcam_data_byte_set(&pe, 2, 0x00, @@ -931,7 +931,8 @@ static int mvpp2_prs_ip4_proto(struct mvpp2 *priv, unsigned short proto, MVPP2_PRS_TCAM_PROTO_MASK); mvpp2_prs_tcam_data_byte_set(&pe, 5, proto, MVPP2_PRS_TCAM_PROTO_MASK); - mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_IPV4_DIP_AI_BIT); + mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_IPV4_DIP_AI_BIT, + MVPP2_PRS_IPV4_DIP_AI_BIT); /* Unmask all ports */ mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK); @@ -999,12 +1000,17 @@ static int mvpp2_prs_ip4_cast(struct mvpp2 *priv, unsigned short l3_cast) return -EINVAL; } - /* Finished: go to flowid generation */ - mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS); - mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1); + /* Go again to ipv4 */ + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4); - mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_IPV4_DIP_AI_BIT, + mvpp2_prs_sram_ai_update(&pe, MVPP2_PRS_IPV4_DIP_AI_BIT, MVPP2_PRS_IPV4_DIP_AI_BIT); + + /* Shift back to IPv4 proto */ + mvpp2_prs_sram_shift_set(&pe, -12, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + + mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_IPV4_DIP_AI_BIT); + /* Unmask all ports */ mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK); @@ -1425,8 +1431,9 @@ static int mvpp2_prs_etype_init(struct mvpp2 *priv) mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4); mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4, MVPP2_PRS_RI_L3_PROTO_MASK); - /* Skip eth_type + 4 bytes of IP header */ - mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + 4, + /* goto ipv4 dest-address (skip eth_type + IP-header-size - 4) */ + mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + + sizeof(struct iphdr) - 4, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); /* Set L3 offset */ mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, @@ -1630,8 +1637,9 @@ static int mvpp2_prs_pppoe_init(struct mvpp2 *priv) mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4); mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_IP4_OPT, MVPP2_PRS_RI_L3_PROTO_MASK); - /* Skip eth_type + 4 bytes of IP header */ - mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + 4, + /* goto ipv4 dest-address (skip eth_type + IP-header-size - 4) */ + mvpp2_prs_sram_shift_set(&pe, MVPP2_ETH_TYPE_LEN + + sizeof(struct iphdr) - 4, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); /* Set L3 offset */ mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L3, @@ -1761,19 +1769,20 @@ static int mvpp2_prs_ip4_init(struct mvpp2 *priv) mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_IP4); pe.index = MVPP2_PE_IP4_PROTO_UN; - /* Set next lu to IPv4 */ - mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4); - mvpp2_prs_sram_shift_set(&pe, 12, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + /* Finished: go to flowid generation */ + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS); + mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1); + /* Set L4 offset */ mvpp2_prs_sram_offset_set(&pe, MVPP2_PRS_SRAM_UDF_TYPE_L4, sizeof(struct iphdr) - 4, MVPP2_PRS_SRAM_OP_SEL_UDF_ADD); - mvpp2_prs_sram_ai_update(&pe, MVPP2_PRS_IPV4_DIP_AI_BIT, - MVPP2_PRS_IPV4_DIP_AI_BIT); + mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_IPV4_DIP_AI_BIT); mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L4_OTHER, MVPP2_PRS_RI_L4_PROTO_MASK); - mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_IPV4_DIP_AI_BIT); + mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_IPV4_DIP_AI_BIT, + MVPP2_PRS_IPV4_DIP_AI_BIT); /* Unmask all ports */ mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK); @@ -1786,14 +1795,19 @@ static int mvpp2_prs_ip4_init(struct mvpp2 *priv) mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_IP4); pe.index = MVPP2_PE_IP4_ADDR_UN; - /* Finished: go to flowid generation */ - mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_FLOWS); - mvpp2_prs_sram_bits_set(&pe, MVPP2_PRS_SRAM_LU_GEN_BIT, 1); + /* Go again to ipv4 */ + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_IP4); + + mvpp2_prs_sram_ai_update(&pe, MVPP2_PRS_IPV4_DIP_AI_BIT, + MVPP2_PRS_IPV4_DIP_AI_BIT); + + /* Shift back to IPv4 proto */ + mvpp2_prs_sram_shift_set(&pe, -12, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_L3_UCAST, MVPP2_PRS_RI_L3_ADDR_MASK); + mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_IPV4_DIP_AI_BIT); - mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_IPV4_DIP_AI_BIT, - MVPP2_PRS_IPV4_DIP_AI_BIT); /* Unmask all ports */ mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK); -- GitLab From e56b3d94d939f52d46209b9e1b6700c5bfff3123 Mon Sep 17 00:00:00 2001 From: Andrey Zhizhikin Date: Fri, 8 Jan 2021 09:58:39 +0000 Subject: [PATCH 0512/2012] rndis_host: set proper input size for OID_GEN_PHYSICAL_MEDIUM request MSFT ActiveSync implementation requires that the size of the response for incoming query is to be provided in the request input length. Failure to set the input size proper results in failed request transfer, where the ActiveSync counterpart reports the NDIS_STATUS_INVALID_LENGTH (0xC0010014L) error. Set the input size for OID_GEN_PHYSICAL_MEDIUM query to the expected size of the response in order for the ActiveSync to properly respond to the request. Fixes: 039ee17d1baa ("rndis_host: Add RNDIS physical medium checking into generic_rndis_bind()") Signed-off-by: Andrey Zhizhikin Link: https://lore.kernel.org/r/20210108095839.3335-1-andrey.zhizhikin@leica-geosystems.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/rndis_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index 6609d21ef8942..f813ca9dec531 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -387,7 +387,7 @@ generic_rndis_bind(struct usbnet *dev, struct usb_interface *intf, int flags) reply_len = sizeof *phym; retval = rndis_query(dev, intf, u.buf, RNDIS_OID_GEN_PHYSICAL_MEDIUM, - 0, (void **) &phym, &reply_len); + reply_len, (void **)&phym, &reply_len); if (retval != 0 || !phym) { /* OID is optional so don't fail here. */ phym_unspec = cpu_to_le32(RNDIS_PHYSICAL_MEDIUM_UNSPECIFIED); -- GitLab From 29766bcffad03da66892bef82674883e31f78fec Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 9 Jan 2021 17:18:32 -0500 Subject: [PATCH 0513/2012] net: support kmap_local forced debugging in skb_frag_foreach Skb frags may be backed by highmem and/or compound pages. Highmem pages need kmap_atomic mappings to access. But kmap_atomic maps a single page, not the entire compound page. skb_foreach_page iterates over an skb frag, in one step in the common case, page by page only if kmap_atomic must be called for each page. The decision logic is captured in skb_frag_must_loop. CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP extends kmap from highmem to all pages, to increase code coverage. Extend skb_frag_must_loop to this new condition. Link: https://lore.kernel.org/linux-mm/20210106180132.41dc249d@gandalf.local.home/ Fixes: 0e91a0c6984c ("mm/highmem: Provide CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP") Reported-by: Steven Rostedt (VMware) Signed-off-by: Linus Torvalds Signed-off-by: Willem de Bruijn Tested-by: Steven Rostedt (VMware) Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 333bcdc39635b..c858adfb5a825 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -366,7 +366,7 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) static inline bool skb_frag_must_loop(struct page *p) { #if defined(CONFIG_HIGHMEM) - if (PageHighMem(p)) + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) || PageHighMem(p)) return true; #endif return false; -- GitLab From 97550f6fa59254435d864b92603de3ca4b5a99f8 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 9 Jan 2021 17:18:33 -0500 Subject: [PATCH 0514/2012] net: compound page support in skb_seq_read skb_seq_read iterates over an skb, returning pointer and length of the next data range with each call. It relies on kmap_atomic to access highmem pages when needed. An skb frag may be backed by a compound page, but kmap_atomic maps only a single page. There are not enough kmap slots to always map all pages concurrently. Instead, if kmap_atomic is needed, iterate over each page. As this increases the number of calls, avoid this unless needed. The necessary condition is captured in skb_frag_must_loop. I tried to make the change as obvious as possible. It should be easy to verify that nothing changes if skb_frag_must_loop returns false. Tested: On an x86 platform with CONFIG_HIGHMEM=y CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP=y CONFIG_NETFILTER_XT_MATCH_STRING=y Run ip link set dev lo mtu 1500 iptables -A OUTPUT -m string --string 'badstring' -algo bm -j ACCEPT dd if=/dev/urandom of=in bs=1M count=20 nc -l -p 8000 > /dev/null & nc -w 1 -q 0 localhost 8000 < in Signed-off-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 28 +++++++++++++++++++++++----- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c858adfb5a825..5f60c9e907c9d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1203,6 +1203,7 @@ struct skb_seq_state { struct sk_buff *root_skb; struct sk_buff *cur_skb; __u8 *frag_data; + __u32 frag_off; }; void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b6f2b520a9b74..0da035c1e53f1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3442,6 +3442,7 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, st->root_skb = st->cur_skb = skb; st->frag_idx = st->stepped_offset = 0; st->frag_data = NULL; + st->frag_off = 0; } EXPORT_SYMBOL(skb_prepare_seq_read); @@ -3496,14 +3497,27 @@ next_skb: st->stepped_offset += skb_headlen(st->cur_skb); while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { + unsigned int pg_idx, pg_off, pg_sz; + frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; - block_limit = skb_frag_size(frag) + st->stepped_offset; + pg_idx = 0; + pg_off = skb_frag_off(frag); + pg_sz = skb_frag_size(frag); + + if (skb_frag_must_loop(skb_frag_page(frag))) { + pg_idx = (pg_off + st->frag_off) >> PAGE_SHIFT; + pg_off = offset_in_page(pg_off + st->frag_off); + pg_sz = min_t(unsigned int, pg_sz - st->frag_off, + PAGE_SIZE - pg_off); + } + + block_limit = pg_sz + st->stepped_offset; if (abs_offset < block_limit) { if (!st->frag_data) - st->frag_data = kmap_atomic(skb_frag_page(frag)); + st->frag_data = kmap_atomic(skb_frag_page(frag) + pg_idx); - *data = (u8 *) st->frag_data + skb_frag_off(frag) + + *data = (u8 *)st->frag_data + pg_off + (abs_offset - st->stepped_offset); return block_limit - abs_offset; @@ -3514,8 +3528,12 @@ next_skb: st->frag_data = NULL; } - st->frag_idx++; - st->stepped_offset += skb_frag_size(frag); + st->stepped_offset += pg_sz; + st->frag_off += pg_sz; + if (st->frag_off == skb_frag_size(frag)) { + st->frag_off = 0; + st->frag_idx++; + } } if (st->frag_data) { -- GitLab From 9bd6b629c39e3fa9e14243a6d8820492be1a5b2e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 9 Jan 2021 17:18:34 -0500 Subject: [PATCH 0515/2012] esp: avoid unneeded kmap_atomic call esp(6)_output_head uses skb_page_frag_refill to allocate a buffer for the esp trailer. It accesses the page with kmap_atomic to handle highmem. But skb_page_frag_refill can return compound pages, of which kmap_atomic only maps the first underlying page. skb_page_frag_refill does not return highmem, because flag __GFP_HIGHMEM is not set. ESP uses it in the same manner as TCP. That also does not call kmap_atomic, but directly uses page_address, in skb_copy_to_page_nocache. Do the same for ESP. This issue has become easier to trigger with recent kmap local debugging feature CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP. Fixes: cac2661c53f3 ("esp4: Avoid skb_cow_data whenever possible") Fixes: 03e2a30f6a27 ("esp6: Avoid skb_cow_data whenever possible") Signed-off-by: Willem de Bruijn Acked-by: Steffen Klassert Signed-off-by: Jakub Kicinski --- net/ipv4/esp4.c | 7 +------ net/ipv6/esp6.c | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 8b07f3a4f2db2..a3271ec3e1627 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -443,7 +443,6 @@ static int esp_output_encap(struct xfrm_state *x, struct sk_buff *skb, int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { u8 *tail; - u8 *vaddr; int nfrags; int esph_offset; struct page *page; @@ -485,14 +484,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * page = pfrag->page; get_page(page); - vaddr = kmap_atomic(page); - - tail = vaddr + pfrag->offset; + tail = page_address(page) + pfrag->offset; esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); - kunmap_atomic(vaddr); - nfrags = skb_shinfo(skb)->nr_frags; __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 52c2f063529fb..2b804fcebcc65 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -478,7 +478,6 @@ static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb, int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { u8 *tail; - u8 *vaddr; int nfrags; int esph_offset; struct page *page; @@ -519,14 +518,10 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info page = pfrag->page; get_page(page); - vaddr = kmap_atomic(page); - - tail = vaddr + pfrag->offset; + tail = page_address(page) + pfrag->offset; esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto); - kunmap_atomic(vaddr); - nfrags = skb_shinfo(skb)->nr_frags; __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, -- GitLab From 2225a8dda263edc35a0e8b858fe2945cf6240fde Mon Sep 17 00:00:00 2001 From: Ariel Marcovitch Date: Sat, 2 Jan 2021 22:11:56 +0200 Subject: [PATCH 0516/2012] powerpc: Fix alignment bug within the init sections This is a bug that causes early crashes in builds with an .exit.text section smaller than a page and an .init.text section that ends in the beginning of a physical page (this is kinda random, which might explain why this wasn't really encountered before). The init sections are ordered like this: .init.text .exit.text .init.data Currently, these sections aren't page aligned. Because the init code might become read-only at runtime and because the .init.text section can potentially reside on the same physical page as .init.data, the beginning of .init.data might be mapped read-only along with .init.text. Then when the kernel tries to modify a variable in .init.data (like kthreadd_done, used in kernel_init()) the kernel panics. To avoid this, make _einittext page aligned and also align .exit.text to make sure .init.data is always seperated from the text segments. Fixes: 060ef9d89d18 ("powerpc32: PAGE_EXEC required for inittext") Signed-off-by: Ariel Marcovitch Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210102201156.10805-1-ariel.marcovitch@gmail.com --- arch/powerpc/kernel/vmlinux.lds.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8e0b1298bf19b..4ab426b8b0e02 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -187,6 +187,12 @@ SECTIONS .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; INIT_TEXT + + /* + *.init.text might be RO so we must ensure this section ends on + * a page boundary. + */ + . = ALIGN(PAGE_SIZE); _einittext = .; #ifdef CONFIG_PPC64 *(.tramp.ftrace.init); @@ -200,6 +206,8 @@ SECTIONS EXIT_TEXT } + . = ALIGN(PAGE_SIZE); + INIT_DATA_SECTION(16) . = ALIGN(8); -- GitLab From 3e096a2112b7b407549020cf095e2a425f00fabb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 1 Jan 2021 23:19:42 +0100 Subject: [PATCH 0517/2012] ALSA: doc: Fix reference to mixart.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIXART.txt has been converted to ReST and renamed. Fix the reference in alsa-configuration.rst. Fixes: 3d8e81862ce4 ("ALSA: doc: ReSTize MIXART.txt") Signed-off-by: Jonathan Neuschäfer Cc: Link: https://lore.kernel.org/r/20210101221942.1068388-1-j.neuschaefer@gmx.net Signed-off-by: Takashi Iwai --- Documentation/sound/alsa-configuration.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/sound/alsa-configuration.rst b/Documentation/sound/alsa-configuration.rst index fe52c314b7639..b36af65a08edf 100644 --- a/Documentation/sound/alsa-configuration.rst +++ b/Documentation/sound/alsa-configuration.rst @@ -1501,7 +1501,7 @@ Module for Digigram miXart8 sound cards. This module supports multiple cards. Note: One miXart8 board will be represented as 4 alsa cards. -See MIXART.txt for details. +See Documentation/sound/cards/mixart.rst for details. When the driver is compiled as a module and the hotplug firmware is supported, the firmware data is loaded via hotplug automatically. -- GitLab From 76e2fc63ca40977af893b724b00cc2f8e9ce47a4 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 11 Jan 2021 11:04:29 +0100 Subject: [PATCH 0518/2012] x86/cpu/amd: Set __max_die_per_package on AMD Set the maximum DIE per package variable on AMD using the NodesPerProcessor topology value. This will be used by RAPL, among others, to determine the maximum number of DIEs on the system in order to do per-DIE manipulations. [ bp: Productize into a proper patch. ] Fixes: 028c221ed190 ("x86/CPU/AMD: Save AMD NodeId as cpu_die_id") Reported-by: Johnathan Smithinovic Reported-by: Rafael Kitover Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Tested-by: Johnathan Smithinovic Tested-by: Rafael Kitover Link: https://bugzilla.kernel.org/show_bug.cgi?id=210939 Link: https://lkml.kernel.org/r/20210106112106.GE5729@zn.tnic Link: https://lkml.kernel.org/r/20210111101455.1194-1-bp@alien8.de --- arch/x86/kernel/cpu/amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f8ca66f3d8617..347a956f71ca0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -542,12 +542,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) u32 ecx; ecx = cpuid_ecx(0x8000001e); - nodes_per_socket = ((ecx >> 8) & 7) + 1; + __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1; } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); - nodes_per_socket = ((value >> 3) & 7) + 1; + __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1; } if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && -- GitLab From e1def45b5291278590bc3033cc518bf5c964a18d Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Sat, 9 Jan 2021 21:10:55 +0100 Subject: [PATCH 0519/2012] media: rc: ite-cir: fix min_timeout calculation Commit 528222d853f92 ("media: rc: harmonize infrared durations to microseconds") missed to switch the min_timeout calculation from ns to us. This resulted in a minimum timeout of 1.2 seconds instead of 1.2ms, leading to large delays and long key repeats. Fix this by applying proper ns->us conversion. Cc: stable@vger.kernel.org Fixes: 528222d853f92 ("media: rc: harmonize infrared durations to microseconds") Signed-off-by: Matthias Reichl Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/ite-cir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/rc/ite-cir.c b/drivers/media/rc/ite-cir.c index a905113fef6ea..0c6229592e132 100644 --- a/drivers/media/rc/ite-cir.c +++ b/drivers/media/rc/ite-cir.c @@ -1551,7 +1551,7 @@ static int ite_probe(struct pnp_dev *pdev, const struct pnp_device_id rdev->s_rx_carrier_range = ite_set_rx_carrier_range; /* FIFO threshold is 17 bytes, so 17 * 8 samples minimum */ rdev->min_timeout = 17 * 8 * ITE_BAUDRATE_DIVISOR * - itdev->params.sample_period; + itdev->params.sample_period / 1000; rdev->timeout = IR_DEFAULT_TIMEOUT; rdev->max_timeout = 10 * IR_DEFAULT_TIMEOUT; rdev->rx_resolution = ITE_BAUDRATE_DIVISOR * -- GitLab From a9d4ef643430d638de1910377f50e0d492d85a43 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 11 Jan 2021 13:49:38 +0200 Subject: [PATCH 0520/2012] habanalabs: fix dma_addr passed to dma_mmap_coherent When doing dma_alloc_coherent in the driver, we add a certain hard-coded offset to the DMA address before returning to the callee function. This offset is needed when our device use this DMA address to perform outbound transactions to the host. However, if we want to map the DMA'able memory to the user via dma_mmap_coherent(), we need to pass the original dma address, without this offset. Otherwise, we will get erronouos mapping. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 3 ++- drivers/misc/habanalabs/goya/goya.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 8c09e4466af8c..b328ddaa64ee5 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -4002,7 +4002,8 @@ static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE; - rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); + rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, + (dma_addr - HOST_PHYS_BASE), size); if (rc) dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index b8b4aa636b7cb..63679a747d2cd 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2719,7 +2719,8 @@ static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE; - rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); + rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, + (dma_addr - HOST_PHYS_BASE), size); if (rc) dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); -- GitLab From aa6df6533b8f9ead98889baa92e2b19793b1c77e Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 11 Jan 2021 15:00:38 +0200 Subject: [PATCH 0521/2012] habanalabs: fix reset process in case of failures There are some points in the reset process where if the code fails for some reason, and the system admin tries to initiate the reset process again we will get a kernel panic. This is because there aren't any protections in different fini functions that are called during the reset process. The protections that are added in this patch make sure that if the fini functions are called multiple times, without calling init functions between them, there won't be double release of already released resources. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 2 +- drivers/misc/habanalabs/common/mmu_v1.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 1456eabf96010..1ea57d86caa37 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1037,7 +1037,7 @@ kill_processes: if (hard_reset) { /* Release kernel context */ - if (hl_ctx_put(hdev->kernel_ctx) == 1) + if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1) hdev->kernel_ctx = NULL; hl_vm_fini(hdev); hl_mmu_fini(hdev); diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu_v1.c index 2ce6ea89d4fa2..06d8a44dd5d42 100644 --- a/drivers/misc/habanalabs/common/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu_v1.c @@ -467,8 +467,16 @@ static void hl_mmu_v1_fini(struct hl_device *hdev) { /* MMU H/W fini was already done in device hw_fini() */ - kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); - gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); + if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) { + kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); + gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); + } + + /* Make sure that if we arrive here again without init was called we + * won't cause kernel panic. This can happen for example if we fail + * during hard reset code at certain points + */ + hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL; } /** -- GitLab From 9488307a5559255f2fc9a3ab61e1c31e243ca7c6 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 11 Jan 2021 17:49:30 +0200 Subject: [PATCH 0522/2012] habanalabs: prevent soft lockup during unmap When using Deep learning framework such as tensorflow or pytorch, there are tens of thousands of host memory mappings. When the user frees all those mappings at the same time, the process of unmapping and unpinning them can take a long time, which may cause a soft lockup bug. To prevent this, we need to free the core to do other things during the unmapping process. For now, we chose to do it every 32K unmappings (each unmap is a single 4K page). Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 1 + drivers/misc/habanalabs/common/memory.c | 10 ++++++++-- drivers/misc/habanalabs/common/mmu.c | 6 +++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index e0d7f5fbaa5c3..60e16dc4bcac3 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2182,6 +2182,7 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr); int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, struct hl_mmu_hop_info *hops); +bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr); int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, void __iomem *dst, u32 src_offset, u32 size); diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index cbe9da4e0211b..5d4fbdcaefe3f 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -886,8 +886,10 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, { struct hl_device *hdev = ctx->hdev; u64 next_vaddr, i; + bool is_host_addr; u32 page_size; + is_host_addr = !hl_is_dram_va(hdev, vaddr); page_size = phys_pg_pack->page_size; next_vaddr = vaddr; @@ -900,9 +902,13 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, /* * unmapping on Palladium can be really long, so avoid a CPU * soft lockup bug by sleeping a little between unmapping pages + * + * In addition, when unmapping host memory we pass through + * the Linux kernel to unpin the pages and that takes a long + * time. Therefore, sleep every 32K pages to avoid soft lockup */ - if (hdev->pldm) - usleep_range(500, 1000); + if (hdev->pldm || (is_host_addr && (i & 0x7FFF) == 0)) + usleep_range(50, 200); } } diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c index 33ae953d3a368..28a4638741d88 100644 --- a/drivers/misc/habanalabs/common/mmu.c +++ b/drivers/misc/habanalabs/common/mmu.c @@ -9,7 +9,7 @@ #include "habanalabs.h" -static bool is_dram_va(struct hl_device *hdev, u64 virt_addr) +bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr) { struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -156,7 +156,7 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, if (!hdev->mmu_enable) return 0; - is_dram_addr = is_dram_va(hdev, virt_addr); + is_dram_addr = hl_is_dram_va(hdev, virt_addr); if (is_dram_addr) mmu_prop = &prop->dmmu; @@ -236,7 +236,7 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, if (!hdev->mmu_enable) return 0; - is_dram_addr = is_dram_va(hdev, virt_addr); + is_dram_addr = hl_is_dram_va(hdev, virt_addr); if (is_dram_addr) mmu_prop = &prop->dmmu; -- GitLab From bb52cb0dec8d2fecdb22843a805131478a180728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 11 Jan 2021 14:44:57 +0100 Subject: [PATCH 0523/2012] drm/ttm: make the pool shrinker lock a mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit set_pages_wb() might sleep and so we can't do this in an atomic context. Signed-off-by: Christian König Reported-by: Mikhail Gavrilov Tested-by: Mikhail Gavrilov Fixes: d099fc8f540a ("drm/ttm: new TT backend allocation pool v3") Reviewed-by: Huang Rui Link: https://patchwork.freedesktop.org/patch/413409/ --- drivers/gpu/drm/ttm/ttm_pool.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 255c457349799..8cd776adc592b 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -66,7 +66,7 @@ static struct ttm_pool_type global_uncached[MAX_ORDER]; static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; -static spinlock_t shrinker_lock; +static struct mutex shrinker_lock; static struct list_head shrinker_list; static struct shrinker mm_shrinker; @@ -249,9 +249,9 @@ static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool, spin_lock_init(&pt->lock); INIT_LIST_HEAD(&pt->pages); - spin_lock(&shrinker_lock); + mutex_lock(&shrinker_lock); list_add_tail(&pt->shrinker_list, &shrinker_list); - spin_unlock(&shrinker_lock); + mutex_unlock(&shrinker_lock); } /* Remove a pool_type from the global shrinker list and free all pages */ @@ -259,9 +259,9 @@ static void ttm_pool_type_fini(struct ttm_pool_type *pt) { struct page *p, *tmp; - spin_lock(&shrinker_lock); + mutex_lock(&shrinker_lock); list_del(&pt->shrinker_list); - spin_unlock(&shrinker_lock); + mutex_unlock(&shrinker_lock); list_for_each_entry_safe(p, tmp, &pt->pages, lru) ttm_pool_free_page(pt->pool, pt->caching, pt->order, p); @@ -302,7 +302,7 @@ static unsigned int ttm_pool_shrink(void) unsigned int num_freed; struct page *p; - spin_lock(&shrinker_lock); + mutex_lock(&shrinker_lock); pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list); p = ttm_pool_type_take(pt); @@ -314,7 +314,7 @@ static unsigned int ttm_pool_shrink(void) } list_move_tail(&pt->shrinker_list, &shrinker_list); - spin_unlock(&shrinker_lock); + mutex_unlock(&shrinker_lock); return num_freed; } @@ -564,7 +564,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) { unsigned int i; - spin_lock(&shrinker_lock); + mutex_lock(&shrinker_lock); seq_puts(m, "\t "); for (i = 0; i < MAX_ORDER; ++i) @@ -600,7 +600,7 @@ int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) seq_printf(m, "\ntotal\t: %8lu of %8lu\n", atomic_long_read(&allocated_pages), page_pool_size); - spin_unlock(&shrinker_lock); + mutex_unlock(&shrinker_lock); return 0; } @@ -644,7 +644,7 @@ int ttm_pool_mgr_init(unsigned long num_pages) if (!page_pool_size) page_pool_size = num_pages; - spin_lock_init(&shrinker_lock); + mutex_init(&shrinker_lock); INIT_LIST_HEAD(&shrinker_list); for (i = 0; i < MAX_ORDER; ++i) { -- GitLab From f4eccc7fea203cfb35205891eced1ab51836f362 Mon Sep 17 00:00:00 2001 From: Peter Geis Date: Fri, 8 Jan 2021 13:59:12 +0000 Subject: [PATCH 0524/2012] clk: tegra30: Add hda clock default rates to clock driver Current implementation defaults the hda clocks to clk_m. This causes hda to run too slow to operate correctly. Fix this by defaulting to pll_p and setting the frequency to the correct rate. This matches upstream t124 and downstream t30. Acked-by: Jon Hunter Tested-by: Ion Agorria Acked-by: Sameer Pujar Acked-by: Thierry Reding Signed-off-by: Peter Geis Link: https://lore.kernel.org/r/20210108135913.2421585-2-pgwipeout@gmail.com Signed-off-by: Takashi Iwai --- drivers/clk/tegra/clk-tegra30.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c index 37244a7e68c22..9cf249c344d9e 100644 --- a/drivers/clk/tegra/clk-tegra30.c +++ b/drivers/clk/tegra/clk-tegra30.c @@ -1256,6 +1256,8 @@ static struct tegra_clk_init_table init_table[] __initdata = { { TEGRA30_CLK_I2S3_SYNC, TEGRA30_CLK_CLK_MAX, 24000000, 0 }, { TEGRA30_CLK_I2S4_SYNC, TEGRA30_CLK_CLK_MAX, 24000000, 0 }, { TEGRA30_CLK_VIMCLK_SYNC, TEGRA30_CLK_CLK_MAX, 24000000, 0 }, + { TEGRA30_CLK_HDA, TEGRA30_CLK_PLL_P, 102000000, 0 }, + { TEGRA30_CLK_HDA2CODEC_2X, TEGRA30_CLK_PLL_P, 48000000, 0 }, /* must be the last entry */ { TEGRA30_CLK_CLK_MAX, TEGRA30_CLK_CLK_MAX, 0, 0 }, }; -- GitLab From 615d435400435876ac68c1de37e9526a9164eaec Mon Sep 17 00:00:00 2001 From: Peter Geis Date: Fri, 8 Jan 2021 13:59:13 +0000 Subject: [PATCH 0525/2012] ALSA: hda/tegra: fix tegra-hda on tegra30 soc Currently hda on tegra30 fails to open a stream with an input/output error. For example: speaker-test -Dhw:0,3 -c 2 speaker-test 1.2.2 Playback device is hw:0,3 Stream parameters are 48000Hz, S16_LE, 2 channels Using 16 octaves of pink noise Rate set to 48000Hz (requested 48000Hz) Buffer size range from 64 to 16384 Period size range from 32 to 8192 Using max buffer size 16384 Periods = 4 was set period_size = 4096 was set buffer_size = 16384 0 - Front Left Write error: -5,Input/output error xrun_recovery failed: -5,Input/output error Transfer failed: Input/output error The tegra-hda device was introduced in tegra30 but only utilized in tegra124 until recent chips. Tegra210/186 work only due to a hardware change. For this reason it is unknown when this issue first manifested. Discussions with the hardware team show this applies to all current tegra chips. It has been resolved in the tegra234, which does not have hda support at this time. The explanation from the hardware team is this: Below is the striping formula referenced from HD audio spec. { ((num_channels * bits_per_sample) / number of SDOs) >= 8 } The current issue is seen because Tegra HW has a problem with boundary condition (= 8) for striping. The reason why it is not seen on Tegra210/Tegra186 is because it uses max 2SDO lines. Max SDO lines is read from GCAP register. For the given stream (channels = 2, bps = 16); ratio = (channels * bps) / NSDO = 32 / NSDO; On Tegra30, ratio = 32/4 = 8 (FAIL) On Tegra210/186, ratio = 32/2 = 16 (PASS) On Tegra194, ratio = 32/4 = 8 (FAIL) ==> Earlier workaround was applied for it If Tegra210/186 is forced to use 4SDO, it fails there as well. So the behavior is consistent across all these chips. Applying the fix in [1] universally resolves this issue on tegra30-hda. Tested on the Ouya game console and the tf201 tablet. [1] commit 60019d8c650d ("ALSA: hda/tegra: workaround playback failure on Tegra194") Reviewed-by: Jon Hunter Tested-by: Ion Agorria Reviewed-by: Sameer Pujar Acked-by: Thierry Reding Signed-off-by: Peter Geis Link: https://lore.kernel.org/r/20210108135913.2421585-3-pgwipeout@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c index 70164d1428d40..361cf2041911a 100644 --- a/sound/pci/hda/hda_tegra.c +++ b/sound/pci/hda/hda_tegra.c @@ -388,7 +388,7 @@ static int hda_tegra_first_init(struct azx *chip, struct platform_device *pdev) * in powers of 2, next available ratio is 16 which can be * used as a limiting factor here. */ - if (of_device_is_compatible(np, "nvidia,tegra194-hda")) + if (of_device_is_compatible(np, "nvidia,tegra30-hda")) chip->bus.core.sdo_limit = 16; /* codec detection */ -- GitLab From 51b2ee7d006a736a9126e8111d1f24e4fd0afaa6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 11 Jan 2021 16:01:29 -0500 Subject: [PATCH 0526/2012] nfsd4: readdirplus shouldn't return parent of export If you export a subdirectory of a filesystem, a READDIRPLUS on the root of that export will return the filehandle of the parent with the ".." entry. The filehandle is optional, so let's just not return the filehandle for ".." if we're at the root of an export. Note that once the client learns one filehandle outside of the export, they can trivially access the rest of the export using further lookups. However, it is also not very difficult to guess filehandles outside of the export. So exporting a subdirectory of a filesystem should considered equivalent to providing access to the entire filesystem. To avoid confusion, we recommend only exporting entire filesystems. Reported-by: Youjipeng Signed-off-by: J. Bruce Fields Cc: stable@vger.kernel.org Signed-off-by: Chuck Lever --- fs/nfsd/nfs3xdr.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 821db21ba072c..34b880211e5ea 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -865,9 +865,14 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, if (isdotent(name, namlen)) { if (namlen == 2) { dchild = dget_parent(dparent); - /* filesystem root - cannot return filehandle for ".." */ + /* + * Don't return filehandle for ".." if we're at + * the filesystem or export root: + */ if (dchild == dparent) goto out; + if (dparent == exp->ex_path.dentry) + goto out; } else dchild = dget(dparent); } else -- GitLab From e7c22eeaff8565d9a8374f320238c251ca31480b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 Jan 2021 14:02:50 +0100 Subject: [PATCH 0527/2012] ALSA: fireface: Fix integer overflow in transmit_midi_msg() As snd_ff.rx_bytes[] is unsigned int, and NSEC_PER_SEC is 1000000000L, the second multiplication in ff->rx_bytes[port] * 8 * NSEC_PER_SEC / 31250 always overflows on 32-bit platforms, truncating the result. Fix this by precalculating "NSEC_PER_SEC / 31250", which is an integer constant. Note that this assumes ff->rx_bytes[port] <= 16777. Fixes: 19174295788de77d ("ALSA: fireface: add transaction support") Reviewed-by: Takashi Sakamoto Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210111130251.361335-2-geert+renesas@glider.be Signed-off-by: Takashi Iwai --- sound/firewire/fireface/ff-transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/fireface/ff-transaction.c b/sound/firewire/fireface/ff-transaction.c index 7f82762ccc8c8..ee7122c461d46 100644 --- a/sound/firewire/fireface/ff-transaction.c +++ b/sound/firewire/fireface/ff-transaction.c @@ -88,7 +88,7 @@ static void transmit_midi_msg(struct snd_ff *ff, unsigned int port) /* Set interval to next transaction. */ ff->next_ktime[port] = ktime_add_ns(ktime_get(), - ff->rx_bytes[port] * 8 * NSEC_PER_SEC / 31250); + ff->rx_bytes[port] * 8 * (NSEC_PER_SEC / 31250)); if (quad_count == 1) tcode = TCODE_WRITE_QUADLET_REQUEST; -- GitLab From 9f65df9c589f249435255da37a5dd11f1bc86f4d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 Jan 2021 14:02:51 +0100 Subject: [PATCH 0528/2012] ALSA: firewire-tascam: Fix integer overflow in midi_port_work() As snd_fw_async_midi_port.consume_bytes is unsigned int, and NSEC_PER_SEC is 1000000000L, the second multiplication in port->consume_bytes * 8 * NSEC_PER_SEC / 31250 always overflows on 32-bit platforms, truncating the result. Fix this by precalculating "NSEC_PER_SEC / 31250", which is an integer constant. Note that this assumes port->consume_bytes <= 16777. Fixes: 531f471834227d03 ("ALSA: firewire-lib/firewire-tascam: localize async midi port") Reviewed-by: Takashi Sakamoto Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210111130251.361335-3-geert+renesas@glider.be Signed-off-by: Takashi Iwai --- sound/firewire/tascam/tascam-transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/firewire/tascam/tascam-transaction.c b/sound/firewire/tascam/tascam-transaction.c index 90288b4b46379..a073cece4a7d5 100644 --- a/sound/firewire/tascam/tascam-transaction.c +++ b/sound/firewire/tascam/tascam-transaction.c @@ -209,7 +209,7 @@ static void midi_port_work(struct work_struct *work) /* Set interval to next transaction. */ port->next_ktime = ktime_add_ns(ktime_get(), - port->consume_bytes * 8 * NSEC_PER_SEC / 31250); + port->consume_bytes * 8 * (NSEC_PER_SEC / 31250)); /* Start this transaction. */ port->idling = false; -- GitLab From 14ff8e1970c03831bf64cf098f56e6ba83349170 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 8 Jan 2021 17:06:10 +0100 Subject: [PATCH 0529/2012] btrfs: no need to run delayed refs after commit_fs_roots during commit The inode number cache has been removed in this dev cycle, there's one more leftover. We don't need to run the delayed refs again after commit_fs_roots as stated in the comment, because btrfs_save_ino_cache is no more since 5297199a8bca ("btrfs: remove inode number cache feature"). Nothing else between commit_fs_roots and btrfs_qgroup_account_extents could create new delayed refs so the qgroup consistency should be safe. Reviewed-by: Nikolay Borisov Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8e0f7a1029c6c..6af7f2bf92de7 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2264,14 +2264,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) */ btrfs_free_log_root_tree(trans, fs_info); - /* - * commit_fs_roots() can call btrfs_save_ino_cache(), which generates - * new delayed refs. Must handle them or qgroup can be wrong. - */ - ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); - if (ret) - goto unlock_tree_log; - /* * Since fs roots are all committed, we can get a quite accurate * new_roots. So let's do quota accounting. -- GitLab From 518837e65068c385dddc0a87b3e577c8be7c13b1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 11 Jan 2021 11:41:42 +0000 Subject: [PATCH 0530/2012] btrfs: send: fix invalid clone operations when cloning from the same file and root When an incremental send finds an extent that is shared, it checks which file extent items in the range refer to that extent, and for those it emits clone operations, while for others it emits regular write operations to avoid corruption at the destination (as described and fixed by commit d906d49fc5f4 ("Btrfs: send, fix file corruption due to incorrect cloning operations")). However when the root we are cloning from is the send root, we are cloning from the inode currently being processed and the source file range has several extent items that partially point to the desired extent, with an offset smaller than the offset in the file extent item for the range we want to clone into, it can cause the algorithm to issue a clone operation that starts at the current eof of the file being processed in the receiver side, in which case the receiver will fail, with EINVAL, when attempting to execute the clone operation. Example reproducer: $ cat test-send-clone.sh #!/bin/bash DEV=/dev/sdi MNT=/mnt/sdi mkfs.btrfs -f $DEV >/dev/null mount $DEV $MNT # Create our test file with a single and large extent (1M) and with # different content for different file ranges that will be reflinked # later. xfs_io -f \ -c "pwrite -S 0xab 0 128K" \ -c "pwrite -S 0xcd 128K 128K" \ -c "pwrite -S 0xef 256K 256K" \ -c "pwrite -S 0x1a 512K 512K" \ $MNT/foobar btrfs subvolume snapshot -r $MNT $MNT/snap1 btrfs send -f /tmp/snap1.send $MNT/snap1 # Now do a series of changes to our file such that we end up with # different parts of the extent reflinked into different file offsets # and we overwrite a large part of the extent too, so no file extent # items refer to that part that was overwritten. This used to confuse # the algorithm used by the kernel to figure out which file ranges to # clone, making it attempt to clone from a source range starting at # the current eof of the file, resulting in the receiver to fail since # it is an invalid clone operation. # xfs_io -c "reflink $MNT/foobar 64K 1M 960K" \ -c "reflink $MNT/foobar 0K 512K 256K" \ -c "reflink $MNT/foobar 512K 128K 256K" \ -c "pwrite -S 0x73 384K 640K" \ $MNT/foobar btrfs subvolume snapshot -r $MNT $MNT/snap2 btrfs send -f /tmp/snap2.send -p $MNT/snap1 $MNT/snap2 echo -e "\nFile digest in the original filesystem:" md5sum $MNT/snap2/foobar # Now unmount the filesystem, create a new one, mount it and try to # apply both send streams to recreate both snapshots. umount $DEV mkfs.btrfs -f $DEV >/dev/null mount $DEV $MNT btrfs receive -f /tmp/snap1.send $MNT btrfs receive -f /tmp/snap2.send $MNT # Must match what we got in the original filesystem of course. echo -e "\nFile digest in the new filesystem:" md5sum $MNT/snap2/foobar umount $MNT When running the reproducer, the incremental send operation fails due to an invalid clone operation: $ ./test-send-clone.sh wrote 131072/131072 bytes at offset 0 128 KiB, 32 ops; 0.0015 sec (80.906 MiB/sec and 20711.9741 ops/sec) wrote 131072/131072 bytes at offset 131072 128 KiB, 32 ops; 0.0013 sec (90.514 MiB/sec and 23171.6148 ops/sec) wrote 262144/262144 bytes at offset 262144 256 KiB, 64 ops; 0.0025 sec (98.270 MiB/sec and 25157.2327 ops/sec) wrote 524288/524288 bytes at offset 524288 512 KiB, 128 ops; 0.0052 sec (95.730 MiB/sec and 24506.9883 ops/sec) Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap1' At subvol /mnt/sdi/snap1 linked 983040/983040 bytes at offset 1048576 960 KiB, 1 ops; 0.0006 sec (1.419 GiB/sec and 1550.3876 ops/sec) linked 262144/262144 bytes at offset 524288 256 KiB, 1 ops; 0.0020 sec (120.192 MiB/sec and 480.7692 ops/sec) linked 262144/262144 bytes at offset 131072 256 KiB, 1 ops; 0.0018 sec (133.833 MiB/sec and 535.3319 ops/sec) wrote 655360/655360 bytes at offset 393216 640 KiB, 160 ops; 0.0093 sec (66.781 MiB/sec and 17095.8436 ops/sec) Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap2' At subvol /mnt/sdi/snap2 File digest in the original filesystem: 9c13c61cb0b9f5abf45344375cb04dfa /mnt/sdi/snap2/foobar At subvol snap1 At snapshot snap2 ERROR: failed to clone extents to foobar: Invalid argument File digest in the new filesystem: 132f0396da8f48d2e667196bff882cfc /mnt/sdi/snap2/foobar The clone operation is invalid because its source range starts at the current eof of the file in the receiver, causing the receiver to get an EINVAL error from the clone operation when attempting it. For the example above, what happens is the following: 1) When processing the extent at file offset 1M, the algorithm checks that the extent is shared and can be (fully or partially) found at file offset 0. At this point the file has a size (and eof) of 1M at the receiver; 2) It finds that our extent item at file offset 1M has a data offset of 64K and, since the file extent item at file offset 0 has a data offset of 0, it issues a clone operation, from the same file and root, that has a source range offset of 64K, destination offset of 1M and a length of 64K, since the extent item at file offset 0 refers only to the first 128K of the shared extent. After this clone operation, the file size (and eof) at the receiver is increased from 1M to 1088K (1M + 64K); 3) Now there's still 896K (960K - 64K) of data left to clone or write, so it checks for the next file extent item, which starts at file offset 128K. This file extent item has a data offset of 0 and a length of 256K, so a clone operation with a source range offset of 256K, a destination offset of 1088K (1M + 64K) and length of 128K is issued. After this operation the file size (and eof) at the receiver increases from 1088K to 1216K (1088K + 128K); 4) Now there's still 768K (896K - 128K) of data left to clone or write, so it checks for the next file extent item, located at file offset 384K. This file extent item points to a different extent, not the one we want to clone, with a length of 640K. So we issue a write operation into the file range 1216K (1088K + 128K, end of the last clone operation), with a length of 640K and with a data matching the one we can find for that range in send root. After this operation, the file size (and eof) at the receiver increases from 1216K to 1856K (1216K + 640K); 5) Now there's still 128K (768K - 640K) of data left to clone or write, so we look into the file extent item, which is for file offset 1M and it points to the extent we want to clone, with a data offset of 64K and a length of 960K. However this matches the file offset we started with, the start of the range to clone into. So we can't for sure find any file extent item from here onwards with the rest of the data we want to clone, yet we proceed and since the file extent item points to the shared extent, with a data offset of 64K, we issue a clone operation with a source range starting at file offset 1856K, which matches the file extent item's offset, 1M, plus the amount of data cloned and written so far, which is 64K (step 2) + 128K (step 3) + 640K (step 4). This clone operation is invalid since the source range offset matches the current eof of the file in the receiver. We should have stopped looking for extents to clone at this point and instead fallback to write, which would simply the contain the data in the file range from 1856K to 1856K + 128K. So fix this by stopping the loop that looks for file ranges to clone at clone_range() when we reach the current eof of the file being processed, if we are cloning from the same file and using the send root as the clone root. This ensures any data not yet cloned will be sent to the receiver through a write operation. A test case for fstests will follow soon. Reported-by: Massimo B. Link: https://lore.kernel.org/linux-btrfs/6ae34776e85912960a253a8327068a892998e685.camel@gmx.net/ Fixes: 11f2069c113e ("Btrfs: send, allow clone operations within the same file") CC: stable@vger.kernel.org # 5.5+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/send.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index ae97f4dbaff30..78a35374d4929 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5512,6 +5512,21 @@ static int clone_range(struct send_ctx *sctx, break; offset += clone_len; clone_root->offset += clone_len; + + /* + * If we are cloning from the file we are currently processing, + * and using the send root as the clone root, we must stop once + * the current clone offset reaches the current eof of the file + * at the receiver, otherwise we would issue an invalid clone + * operation (source range going beyond eof) and cause the + * receiver to fail. So if we reach the current eof, bail out + * and fallback to a regular write. + */ + if (clone_root->root == sctx->send_root && + clone_root->ino == sctx->cur_ino && + clone_root->offset >= sctx->cur_inode_next_write_offset) + break; + data_offset += clone_len; next: path->slots[0]++; -- GitLab From 95e9295daa849095d8be05fb6e26b2ba9be1594f Mon Sep 17 00:00:00 2001 From: Naushir Patuck Date: Wed, 6 Jan 2021 16:16:57 +0100 Subject: [PATCH 0531/2012] media: Revert "media: videobuf2: Fix length check for single plane dmabuf queueing" The updated length check for dmabuf types broke existing usage in v4l2 userland clients. Fixes: 961d3b27 ("media: videobuf2: Fix length check for single plane dmabuf queueing") Cc: stable@vger.kernel.org Signed-off-by: Naushir Patuck Tested-by: Kieran Bingham Reviewed-by: Kieran Bingham Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/common/videobuf2/videobuf2-v4l2.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c index 96d3b2b2aa318..3f61f5863bf77 100644 --- a/drivers/media/common/videobuf2/videobuf2-v4l2.c +++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c @@ -118,8 +118,7 @@ static int __verify_length(struct vb2_buffer *vb, const struct v4l2_buffer *b) return -EINVAL; } } else { - length = (b->memory == VB2_MEMORY_USERPTR || - b->memory == VB2_MEMORY_DMABUF) + length = (b->memory == VB2_MEMORY_USERPTR) ? b->length : vb->planes[0].length; if (b->bytesused > length) -- GitLab From 20c7842ed8374e1c3ee750b2fe7ca8cdd071bda6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 5 Jan 2021 12:52:45 -0500 Subject: [PATCH 0532/2012] ALSA: hda/hdmi - enable runtime pm for CI AMD display audio We are able to power down the GPU and audio via the GPU driver so flag these asics as supporting runtime pm. Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Link: https://lore.kernel.org/r/20210105175245.963451-1-alexander.deucher@amd.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 770ad25f1907c..e4dd2ff5e4737 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2598,7 +2598,8 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_AMD_SB }, /* ATI HDMI */ { PCI_DEVICE(0x1002, 0x0002), - .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | + AZX_DCAPS_PM_RUNTIME }, { PCI_DEVICE(0x1002, 0x1308), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0x157a), @@ -2660,9 +2661,11 @@ static const struct pci_device_id azx_ids[] = { { PCI_DEVICE(0x1002, 0xaab0), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, { PCI_DEVICE(0x1002, 0xaac0), - .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | + AZX_DCAPS_PM_RUNTIME }, { PCI_DEVICE(0x1002, 0xaac8), - .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | + AZX_DCAPS_PM_RUNTIME }, { PCI_DEVICE(0x1002, 0xaad8), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | AZX_DCAPS_PM_RUNTIME }, -- GitLab From 1a9c72ad4c26821e215a396167c14959cf24a7f1 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 12 Jan 2021 07:55:24 +0000 Subject: [PATCH 0533/2012] bpf: Local storage helpers should check nullness of owner ptr passed The verifier allows ARG_PTR_TO_BTF_ID helper arguments to be NULL, so helper implementations need to check this before dereferencing them. This was already fixed for the socket storage helpers but not for task and inode. The issue can be reproduced by attaching an LSM program to inode_rename hook (called when moving files) which tries to get the inode of the new file without checking for its nullness and then trying to move an existing file to a new path: mv existing_file new_file_does_not_exist The report including the sample program and the steps for reproducing the bug: https://lore.kernel.org/bpf/CANaYP3HWkH91SN=wTNO9FL_2ztHfqcXKX38SSE-JJ2voh+vssw@mail.gmail.com Fixes: 4cf1bc1f1045 ("bpf: Implement task local storage") Fixes: 8ea636848aca ("bpf: Implement bpf_local_storage for inodes") Reported-by: Gilad Reti Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210112075525.256820-3-kpsingh@kernel.org --- kernel/bpf/bpf_inode_storage.c | 5 ++++- kernel/bpf/bpf_task_storage.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 6edff97ad594b..dbc1dbdd2cbf0 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -176,7 +176,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, * bpf_local_storage_update expects the owner to have a * valid storage pointer. */ - if (!inode_storage_ptr(inode)) + if (!inode || !inode_storage_ptr(inode)) return (unsigned long)NULL; sdata = inode_storage_lookup(inode, map, true); @@ -200,6 +200,9 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, BPF_CALL_2(bpf_inode_storage_delete, struct bpf_map *, map, struct inode *, inode) { + if (!inode) + return -EINVAL; + /* This helper must only called from where the inode is gurranteed * to have a refcount and cannot be freed. */ diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index 4ef1959a78f27..e0da0258b732d 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -218,7 +218,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, * bpf_local_storage_update expects the owner to have a * valid storage pointer. */ - if (!task_storage_ptr(task)) + if (!task || !task_storage_ptr(task)) return (unsigned long)NULL; sdata = task_storage_lookup(task, map, true); @@ -243,6 +243,9 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, task) { + if (!task) + return -EINVAL; + /* This helper must only be called from places where the lifetime of the task * is guaranteed. Either by being refcounted or by being protected * by an RCU read-side critical section. -- GitLab From 84d571d46c7046a957ff3d1c916a1b9dcc7f1ce8 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 12 Jan 2021 07:55:25 +0000 Subject: [PATCH 0534/2012] bpf: Fix typo in bpf_inode_storage.c Fix "gurranteed" -> "guaranteed" in bpf_inode_storage.c Suggested-by: Andrii Nakryiko Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210112075525.256820-4-kpsingh@kernel.org --- kernel/bpf/bpf_inode_storage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index dbc1dbdd2cbf0..2f0597320b6d2 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -183,7 +183,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, if (sdata) return (unsigned long)sdata->data; - /* This helper must only called from where the inode is gurranteed + /* This helper must only called from where the inode is guaranteed * to have a refcount and cannot be freed. */ if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { @@ -203,7 +203,7 @@ BPF_CALL_2(bpf_inode_storage_delete, if (!inode) return -EINVAL; - /* This helper must only called from where the inode is gurranteed + /* This helper must only called from where the inode is guaranteed * to have a refcount and cannot be freed. */ return inode_storage_delete(inode, map); -- GitLab From 2f94ac19184665263b7a285ae88abe19dedf9c1b Mon Sep 17 00:00:00 2001 From: KP Singh Date: Tue, 12 Jan 2021 07:55:23 +0000 Subject: [PATCH 0535/2012] bpf: Update local storage test to check handling of null ptrs It was found in [1] that bpf_inode_storage_get helper did not check the nullness of the passed owner ptr which caused an oops when dereferenced. This change incorporates the example suggested in [1] into the local storage selftest. The test is updated to create a temporary directory instead of just using a tempfile. In order to replicate the issue this copied rm binary is renamed tiggering the inode_rename with a null pointer for the new_inode. The logic to verify the setting and deletion of the inode local storage of the old inode is also moved to this LSM hook. The change also removes the copy_rm function and simply shells out to copy files and recursively delete directories and consolidates the logic of setting the initial inode storage to the bprm_committed_creds hook and removes the file_open hook. [1]: https://lore.kernel.org/bpf/CANaYP3HWkH91SN=wTNO9FL_2ztHfqcXKX38SSE-JJ2voh+vssw@mail.gmail.com Suggested-by: Gilad Reti Signed-off-by: KP Singh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210112075525.256820-2-kpsingh@kernel.org --- .../bpf/prog_tests/test_local_storage.c | 96 +++++-------------- .../selftests/bpf/progs/local_storage.c | 62 ++++++------ 2 files changed, 61 insertions(+), 97 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c index c0fe73a17ed12..3bfcf00c0a673 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -34,61 +34,6 @@ struct storage { struct bpf_spin_lock lock; }; -/* Copies an rm binary to a temp file. dest is a mkstemp template */ -static int copy_rm(char *dest) -{ - int fd_in, fd_out = -1, ret = 0; - struct stat stat; - char *buf = NULL; - - fd_in = open("/bin/rm", O_RDONLY); - if (fd_in < 0) - return -errno; - - fd_out = mkstemp(dest); - if (fd_out < 0) { - ret = -errno; - goto out; - } - - ret = fstat(fd_in, &stat); - if (ret == -1) { - ret = -errno; - goto out; - } - - buf = malloc(stat.st_blksize); - if (!buf) { - ret = -errno; - goto out; - } - - while (ret = read(fd_in, buf, stat.st_blksize), ret > 0) { - ret = write(fd_out, buf, ret); - if (ret < 0) { - ret = -errno; - goto out; - - } - } - if (ret < 0) { - ret = -errno; - goto out; - - } - - /* Set executable permission on the copied file */ - ret = chmod(dest, 0100); - if (ret == -1) - ret = -errno; - -out: - free(buf); - close(fd_in); - close(fd_out); - return ret; -} - /* Fork and exec the provided rm binary and return the exit code of the * forked process and its pid. */ @@ -168,9 +113,11 @@ static bool check_syscall_operations(int map_fd, int obj_fd) void test_test_local_storage(void) { - char tmp_exec_path[PATH_MAX] = "/tmp/copy_of_rmXXXXXX"; + char tmp_dir_path[64] = "/tmp/local_storageXXXXXX"; int err, serv_sk = -1, task_fd = -1, rm_fd = -1; struct local_storage *skel = NULL; + char tmp_exec_path[64]; + char cmd[256]; skel = local_storage__open_and_load(); if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) @@ -189,18 +136,24 @@ void test_test_local_storage(void) task_fd)) goto close_prog; - err = copy_rm(tmp_exec_path); - if (CHECK(err < 0, "copy_rm", "err %d errno %d\n", err, errno)) + if (CHECK(!mkdtemp(tmp_dir_path), "mkdtemp", + "unable to create tmpdir: %d\n", errno)) goto close_prog; + snprintf(tmp_exec_path, sizeof(tmp_exec_path), "%s/copy_of_rm", + tmp_dir_path); + snprintf(cmd, sizeof(cmd), "cp /bin/rm %s", tmp_exec_path); + if (CHECK_FAIL(system(cmd))) + goto close_prog_rmdir; + rm_fd = open(tmp_exec_path, O_RDONLY); if (CHECK(rm_fd < 0, "open", "failed to open %s err:%d, errno:%d", tmp_exec_path, rm_fd, errno)) - goto close_prog; + goto close_prog_rmdir; if (!check_syscall_operations(bpf_map__fd(skel->maps.inode_storage_map), rm_fd)) - goto close_prog; + goto close_prog_rmdir; /* Sets skel->bss->monitored_pid to the pid of the forked child * forks a child process that executes tmp_exec_path and tries to @@ -209,33 +162,36 @@ void test_test_local_storage(void) */ err = run_self_unlink(&skel->bss->monitored_pid, tmp_exec_path); if (CHECK(err != EPERM, "run_self_unlink", "err %d want EPERM\n", err)) - goto close_prog_unlink; + goto close_prog_rmdir; /* Set the process being monitored to be the current process */ skel->bss->monitored_pid = getpid(); - /* Remove the temporary created executable */ - err = unlink(tmp_exec_path); - if (CHECK(err != 0, "unlink", "unable to unlink %s: %d", tmp_exec_path, - errno)) - goto close_prog_unlink; + /* Move copy_of_rm to a new location so that it triggers the + * inode_rename LSM hook with a new_dentry that has a NULL inode ptr. + */ + snprintf(cmd, sizeof(cmd), "mv %s/copy_of_rm %s/check_null_ptr", + tmp_dir_path, tmp_dir_path); + if (CHECK_FAIL(system(cmd))) + goto close_prog_rmdir; CHECK(skel->data->inode_storage_result != 0, "inode_storage_result", "inode_local_storage not set\n"); serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); if (CHECK(serv_sk < 0, "start_server", "failed to start server\n")) - goto close_prog; + goto close_prog_rmdir; CHECK(skel->data->sk_storage_result != 0, "sk_storage_result", "sk_local_storage not set\n"); if (!check_syscall_operations(bpf_map__fd(skel->maps.sk_storage_map), serv_sk)) - goto close_prog; + goto close_prog_rmdir; -close_prog_unlink: - unlink(tmp_exec_path); +close_prog_rmdir: + snprintf(cmd, sizeof(cmd), "rm -rf %s", tmp_dir_path); + system(cmd); close_prog: close(serv_sk); close(rm_fd); diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index 3e3de130f28f3..95868bc7ada98 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -50,7 +50,6 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) __u32 pid = bpf_get_current_pid_tgid() >> 32; struct local_storage *storage; bool is_self_unlink; - int err; if (pid != monitored_pid) return 0; @@ -66,8 +65,27 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) return -EPERM; } - storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + return 0; +} + +SEC("lsm/inode_rename") +int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct local_storage *storage; + int err; + + /* new_dentry->d_inode can be NULL when the inode is renamed to a file + * that did not exist before. The helper should be able to handle this + * NULL pointer. + */ + bpf_inode_storage_get(&inode_storage_map, new_dentry->d_inode, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + + storage = bpf_inode_storage_get(&inode_storage_map, old_dentry->d_inode, + 0, 0); if (!storage) return 0; @@ -76,7 +94,7 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) inode_storage_result = -1; bpf_spin_unlock(&storage->lock); - err = bpf_inode_storage_delete(&inode_storage_map, victim->d_inode); + err = bpf_inode_storage_delete(&inode_storage_map, old_dentry->d_inode); if (!err) inode_storage_result = err; @@ -133,37 +151,18 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, return 0; } -SEC("lsm/file_open") -int BPF_PROG(file_open, struct file *file) -{ - __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct local_storage *storage; - - if (pid != monitored_pid) - return 0; - - if (!file->f_inode) - return 0; - - storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); - if (!storage) - return 0; - - bpf_spin_lock(&storage->lock); - storage->value = DUMMY_STORAGE_VALUE; - bpf_spin_unlock(&storage->lock); - return 0; -} - /* This uses the local storage to remember the inode of the binary that a * process was originally executing. */ SEC("lsm/bprm_committed_creds") void BPF_PROG(exec, struct linux_binprm *bprm) { + __u32 pid = bpf_get_current_pid_tgid() >> 32; struct local_storage *storage; + if (pid != monitored_pid) + return; + storage = bpf_task_storage_get(&task_storage_map, bpf_get_current_task_btf(), 0, BPF_LOCAL_STORAGE_GET_F_CREATE); @@ -172,4 +171,13 @@ void BPF_PROG(exec, struct linux_binprm *bprm) storage->exec_inode = bprm->file->f_inode; bpf_spin_unlock(&storage->lock); } + + storage = bpf_inode_storage_get(&inode_storage_map, bprm->file->f_inode, + 0, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!storage) + return; + + bpf_spin_lock(&storage->lock); + storage->value = DUMMY_STORAGE_VALUE; + bpf_spin_unlock(&storage->lock); } -- GitLab From 2d6ffc63f12417b979955a5b22ad9a76d2af5de9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 31 Dec 2020 08:53:20 +0800 Subject: [PATCH 0536/2012] iommu/vt-d: Fix unaligned addresses for intel_flush_svm_range_dev() The VT-d hardware will ignore those Addr bits which have been masked by the AM field in the PASID-based-IOTLB invalidation descriptor. As the result, if the starting address in the descriptor is not aligned with the address mask, some IOTLB caches might not invalidate. Hence people will see below errors. [ 1093.704661] dmar_fault: 29 callbacks suppressed [ 1093.704664] DMAR: DRHD: handling fault status reg 3 [ 1093.712738] DMAR: [DMA Read] Request device [7a:02.0] PASID 2 fault addr 7f81c968d000 [fault reason 113] SM: Present bit in first-level paging entry is clear Fix this by using aligned address for PASID-based-IOTLB invalidation. Fixes: 1c4f88b7f1f9 ("iommu/vt-d: Shared virtual address in scalable mode") Reported-and-tested-by: Guo Kaijie Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20201231005323.2178523-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/svm.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 790ef3497e7e3..18a9f05df4079 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -118,8 +118,10 @@ void intel_svm_check(struct intel_iommu *iommu) iommu->flags |= VTD_FLAG_SVM_CAPABLE; } -static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev, - unsigned long address, unsigned long pages, int ih) +static void __flush_svm_range_dev(struct intel_svm *svm, + struct intel_svm_dev *sdev, + unsigned long address, + unsigned long pages, int ih) { struct qi_desc desc; @@ -170,6 +172,22 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } } +static void intel_flush_svm_range_dev(struct intel_svm *svm, + struct intel_svm_dev *sdev, + unsigned long address, + unsigned long pages, int ih) +{ + unsigned long shift = ilog2(__roundup_pow_of_two(pages)); + unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift)); + unsigned long start = ALIGN_DOWN(address, align); + unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align); + + while (start < end) { + __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih); + start += align; + } +} + static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, unsigned long pages, int ih) { -- GitLab From b812834b5329fe78d643c9a61350d227db904361 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 9 Jan 2021 17:56:21 +0100 Subject: [PATCH 0537/2012] iommu: arm-smmu-qcom: Add sdm630/msm8998 compatibles for qcom quirks SDM630 and MSM8998 are among the SoCs that use Qualcomm's implementation of SMMUv2 which has already proven to be problematic over the years. Add their compatibles to the lookup list to prevent the platforms from being shut down by the hypervisor at MMU probe. Signed-off-by: Konrad Dybcio Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20210109165622.149777-1-konrad.dybcio@somainline.org Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 1b83d140742f3..bcda17012aee8 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -325,7 +325,9 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, } static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { + { .compatible = "qcom,msm8998-smmu-v2" }, { .compatible = "qcom,sc7180-smmu-500" }, + { .compatible = "qcom,sdm630-smmu-v2" }, { .compatible = "qcom,sdm845-smmu-500" }, { .compatible = "qcom,sm8150-smmu-500" }, { .compatible = "qcom,sm8250-smmu-500" }, -- GitLab From 796130b1de29575e2e3fc3b0da4bda162b750db7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 19 Dec 2020 10:05:45 +0100 Subject: [PATCH 0538/2012] ia64: fix timer cleanup regression A cleanup patch from my legacy timer series broke ia64 and led to RCU stall errors and a fast system clock: [ 909.360108] INFO: task systemd-sysv-ge:200 blocked for more than 127 seconds. [ 909.360108] Not tainted 5.10.0+ #130 [ 909.360108] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 909.360108] task:systemd-sysv-ge state:D stack: 0 pid: 200 ppid: 189 flags:0x00000000 [ 909.364108] [ 909.364108] Call Trace: [ 909.364423] [] __schedule+0x890/0x21e0 [ 909.364423] sp=e0000100487d7b70 bsp=e0000100487d1748 [ 909.368423] [] schedule+0xa0/0x240 [ 909.368423] sp=e0000100487d7b90 bsp=e0000100487d16e0 [ 909.368558] [] io_schedule+0x70/0xa0 [ 909.368558] sp=e0000100487d7b90 bsp=e0000100487d16c0 [ 909.372290] [] bit_wait_io+0x20/0xe0 [ 909.372290] sp=e0000100487d7b90 bsp=e0000100487d1698 [ 909.374168] rcu: INFO: rcu_sched detected stalls on CPUs/tasks: [ 909.376290] [] __wait_on_bit+0xc0/0x1c0 [ 909.376290] sp=e0000100487d7b90 bsp=e0000100487d1648 [ 909.374168] rcu: 3-....: (2 ticks this GP) idle=19e/1/0x4000000000000002 softirq=1581/1581 fqs=2 [ 909.374168] (detected by 0, t=5661 jiffies, g=1089, q=3) [ 909.376290] [] out_of_line_wait_on_bit+0x120/0x140 [ 909.376290] sp=e0000100487d7b90 bsp=e0000100487d1610 [ 909.374168] Task dump for CPU 3: [ 909.374168] task:khungtaskd state:R running task Revert most of my patch to make this work again, including the extra update_process_times()/profile_tick() and the local_irq_enable() in the loop that I expected not to be needed here. I have not found out exactly what goes wrong, and would suggest that someone with hardware access tries to convert this code into a singleshot clockevent driver, which should give better behavior in all cases. Reported-by: John Paul Adrian Glaubitz Fixes: 2b49ddcef297 ("ia64: convert to legacy_timer_tick") Cc: John Stultz Cc: Thomas Gleixner Cc: Stephen Boyd Cc: Frederic Weisbecker Signed-off-by: Arnd Bergmann --- arch/ia64/kernel/time.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index ed9fc3d057a6d..43e8050145bef 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -171,29 +171,34 @@ void vtime_account_hardirq(struct task_struct *tsk) static irqreturn_t timer_interrupt (int irq, void *dev_id) { - unsigned long cur_itm, new_itm, ticks; + unsigned long new_itm; if (cpu_is_offline(smp_processor_id())) { return IRQ_HANDLED; } new_itm = local_cpu_data->itm_next; - cur_itm = ia64_get_itc(); - if (!time_after(cur_itm, new_itm)) { + if (!time_after(ia64_get_itc(), new_itm)) printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n", - cur_itm, new_itm); - ticks = 1; - } else { - ticks = DIV_ROUND_UP(cur_itm - new_itm, - local_cpu_data->itm_delta); - new_itm += ticks * local_cpu_data->itm_delta; - } + ia64_get_itc(), new_itm); + + while (1) { + new_itm += local_cpu_data->itm_delta; + + legacy_timer_tick(smp_processor_id() == time_keeper_id); - if (smp_processor_id() != time_keeper_id) - ticks = 0; + local_cpu_data->itm_next = new_itm; - legacy_timer_tick(ticks); + if (time_after(new_itm, ia64_get_itc())) + break; + + /* + * Allow IPIs to interrupt the timer loop. + */ + local_irq_enable(); + local_irq_disable(); + } do { /* -- GitLab From 968d7764e35b2fa4aad36481690b297e2c497c99 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Jan 2021 09:52:48 +0100 Subject: [PATCH 0539/2012] ia64: fix xchg() warning The definition if xchg() causes a harmless warning in some files, like: In file included from ../arch/ia64/include/uapi/asm/intrinsics.h:22, from ../arch/ia64/include/asm/intrinsics.h:11, from ../arch/ia64/include/asm/bitops.h:19, from ../include/linux/bitops.h:32, from ../include/linux/kernel.h:11, from ../fs/nfs/read.c:12: ../fs/nfs/read.c: In function 'nfs_read_completion': ../arch/ia64/include/uapi/asm/cmpxchg.h:57:2: warning: value computed is not used [-Wunused-value] 57 | ((__typeof__(*(ptr))) __xchg((unsigned long) (x), (ptr), sizeof(*(ptr)))) | ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ../fs/nfs/read.c:196:5: note: in expansion of macro 'xchg' 196 | xchg(&nfs_req_openctx(req)->error, error); | ^~~~ Change it to a compound expression like the other architectures have to get a clean defconfig build. Signed-off-by: Arnd Bergmann --- arch/ia64/include/uapi/asm/cmpxchg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h index d69c979936d41..5d90307fd6e07 100644 --- a/arch/ia64/include/uapi/asm/cmpxchg.h +++ b/arch/ia64/include/uapi/asm/cmpxchg.h @@ -54,7 +54,7 @@ extern void ia64_xchg_called_with_bad_pointer(void); }) #define xchg(ptr, x) \ -((__typeof__(*(ptr))) __xchg((unsigned long) (x), (ptr), sizeof(*(ptr)))) +({(__typeof__(*(ptr))) __xchg((unsigned long) (x), (ptr), sizeof(*(ptr)));}) /* * Atomic compare and exchange. Compare OLD with MEM, if identical, -- GitLab From 96ec72a3425d1515b69b7f9dc34a4a6ce5862a37 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Jan 2021 14:49:27 +0100 Subject: [PATCH 0540/2012] ia64: Mark architecture as orphaned Tony Luck has maintained arch/ia64 for the past 16 years, but mentioned that he no longer has working ia64 machines, nor time to look at patches, so he is stepping down as the maintainer. Fenghua Yu came in as a temporary co-maintainer when Tony was on sabbatical in 2009, but has not worked on it after that either. This leaves the architecture as Orphaned, meaning that patches will have to get routed through other trees from now on. Acked-by: Tony Luck Cc: Fenghua Yu Cc: linux-ia64@vger.kernel.org Link: https://lore.kernel.org/lkml/20210105153603.GA17644@agluck-desk2.amr.corp.intel.com/ Signed-off-by: Arnd Bergmann --- MAINTAINERS | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 546aa66428c9f..472cfcc7c11a9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8435,11 +8435,8 @@ F: drivers/i3c/ F: include/linux/i3c/ IA64 (Itanium) PLATFORM -M: Tony Luck -M: Fenghua Yu L: linux-ia64@vger.kernel.org -S: Odd Fixes -T: git git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux.git +S: Orphan F: Documentation/ia64/ F: arch/ia64/ -- GitLab From 280a9045bb18833db921b316a5527d2b565e9f2e Mon Sep 17 00:00:00 2001 From: Eugene Korenevsky Date: Sun, 10 Jan 2021 20:36:09 +0300 Subject: [PATCH 0541/2012] ehci: fix EHCI host controller initialization sequence According to EHCI spec, EHCI HC clears USBSTS.HCHalted whenever USBCMD.RS=1. However, it is a good practice to wait some time after setting USBCMD.RS (approximately 100ms) until USBSTS.HCHalted become zero. Without this waiting, VirtualBox's EHCI virtual HC accidentally hangs (see BugLink). BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=211095 Acked-by: Alan Stern Signed-off-by: Eugene Korenevsky Cc: stable Link: https://lore.kernel.org/r/20210110173609.GA17313@himera.home Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hcd.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index e358ae17d51e7..1926b328b6aa7 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -574,6 +574,7 @@ static int ehci_run (struct usb_hcd *hcd) struct ehci_hcd *ehci = hcd_to_ehci (hcd); u32 temp; u32 hcc_params; + int rc; hcd->uses_new_polling = 1; @@ -629,9 +630,20 @@ static int ehci_run (struct usb_hcd *hcd) down_write(&ehci_cf_port_reset_rwsem); ehci->rh_state = EHCI_RH_RUNNING; ehci_writel(ehci, FLAG_CF, &ehci->regs->configured_flag); + + /* Wait until HC become operational */ ehci_readl(ehci, &ehci->regs->command); /* unblock posted writes */ msleep(5); + rc = ehci_handshake(ehci, &ehci->regs->status, STS_HALT, 0, 100 * 1000); + up_write(&ehci_cf_port_reset_rwsem); + + if (rc) { + ehci_err(ehci, "USB %x.%x, controller refused to start: %d\n", + ((ehci->sbrn & 0xf0)>>4), (ehci->sbrn & 0x0f), rc); + return rc; + } + ehci->last_periodic_enable = ktime_get_real(); temp = HC_VERSION(ehci, ehci_readl(ehci, &ehci->caps->hc_capbase)); -- GitLab From 643a4df7fe3f6831d14536fd692be85f92670a52 Mon Sep 17 00:00:00 2001 From: Longfang Liu Date: Tue, 12 Jan 2021 09:57:27 +0800 Subject: [PATCH 0542/2012] USB: ehci: fix an interrupt calltrace error The system that use Synopsys USB host controllers goes to suspend when using USB audio player. This causes the USB host controller continuous send interrupt signal to system, When the number of interrupts exceeds 100000, the system will forcibly close the interrupts and output a calltrace error. When the system goes to suspend, the last interrupt is reported to the driver. At this time, the system has set the state to suspend. This causes the last interrupt to not be processed by the system and not clear the interrupt flag. This uncleared interrupt flag constantly triggers new interrupt event. This causing the driver to receive more than 100,000 interrupts, which causes the system to forcibly close the interrupt report and report the calltrace error. so, when the driver goes to sleep and changes the system state to suspend, the interrupt flag needs to be cleared. Signed-off-by: Longfang Liu Acked-by: Alan Stern Link: https://lore.kernel.org/r/1610416647-45774-1-git-send-email-liulongfang@huawei.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hub.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index 087402aec5cbe..9f9ab5ccea889 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -345,6 +345,9 @@ static int ehci_bus_suspend (struct usb_hcd *hcd) unlink_empty_async_suspended(ehci); + /* Some Synopsys controllers mistakenly leave IAA turned on */ + ehci_writel(ehci, STS_IAA, &ehci->regs->status); + /* Any IAA cycle that started before the suspend is now invalid */ end_iaa_cycle(ehci); ehci_handle_start_intr_unlinks(ehci); -- GitLab From 4e0dcf62ab4cf917d0cbe751b8bf229a065248d4 Mon Sep 17 00:00:00 2001 From: Ryan Chen Date: Fri, 8 Jan 2021 16:12:38 +0800 Subject: [PATCH 0543/2012] usb: gadget: aspeed: fix stop dma register setting. The vhub engine has two dma mode, one is descriptor list, another is single stage DMA. Each mode has different stop register setting. Descriptor list operation (bit2) : 0 disable reset, 1: enable reset Single mode operation (bit0) : 0 : disable, 1: enable Fixes: 7ecca2a4080c ("usb/gadget: Add driver for Aspeed SoC virtual hub") Cc: stable Acked-by: Felipe Balbi Acked-by: Joel Stanley Signed-off-by: Ryan Chen Link: https://lore.kernel.org/r/20210108081238.10199-2-ryan_chen@aspeedtech.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/aspeed-vhub/epn.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/aspeed-vhub/epn.c b/drivers/usb/gadget/udc/aspeed-vhub/epn.c index 0bd6b20435b8a..02d8bfae58fb1 100644 --- a/drivers/usb/gadget/udc/aspeed-vhub/epn.c +++ b/drivers/usb/gadget/udc/aspeed-vhub/epn.c @@ -420,7 +420,10 @@ static void ast_vhub_stop_active_req(struct ast_vhub_ep *ep, u32 state, reg, loops; /* Stop DMA activity */ - writel(0, ep->epn.regs + AST_VHUB_EP_DMA_CTLSTAT); + if (ep->epn.desc_mode) + writel(VHUB_EP_DMA_CTRL_RESET, ep->epn.regs + AST_VHUB_EP_DMA_CTLSTAT); + else + writel(0, ep->epn.regs + AST_VHUB_EP_DMA_CTLSTAT); /* Wait for it to complete */ for (loops = 0; loops < 1000; loops++) { -- GitLab From 694a1c0adebee9152a9ba0320468f7921aca647d Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Mon, 28 Dec 2020 09:26:14 +0800 Subject: [PATCH 0544/2012] iommu/vt-d: Fix duplicate included linux/dma-map-ops.h linux/dma-map-ops.h is included more than once, Remove the one that isn't necessary. Signed-off-by: Tian Tao Acked-by: Lu Baolu Link: https://lore.kernel.org/r/1609118774-10083-1-git-send-email-tiantao6@hisilicon.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 65cf06d70bf4e..f665322a09919 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include -- GitLab From ffaf97899c4a58b9fefb11534f730785443611a8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Jan 2021 22:52:18 +0000 Subject: [PATCH 0545/2012] drm/i915/gt: Limit VFE threads based on GT MEDIA_STATE_VFE only accepts the 'maximum number of threads' in the range [0, n-1] where n is #EU * (#threads/EU) with the number of threads based on plaform and the number of EU based on the number of slices and subslices. This is a fixed number per platform/gt, so appropriately limit the number of threads we spawn to match the device. v2: Oversaturate the system with tasks to force execution on every HW thread; if the thread idles it is returned to the pool and may be reused again before an unused thread. v3: Fix more state commands, which was causing Baytrail to barf. v4: STATE_CACHE_INVALIDATE requires a stall on Ivybridge Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2024 Fixes: 47f8253d2b89 ("drm/i915/gen7: Clear all EU/L3 residual contexts") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Prathap Kumar Valsan Cc: Akeem G Abodunrin Cc: Jon Bloomfield Cc: Rodrigo Vivi Cc: Randy Wright Cc: stable@vger.kernel.org # v5.7+ Reviewed-by: Akeem G Abodunrin Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210111225220.3483-1-chris@chris-wilson.co.uk (cherry picked from commit eebfb32e26851662d24ea86dd381fd0f83cd4b47) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/gen7_renderclear.c | 157 ++++++++++++--------- 1 file changed, 94 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c index d93d85cd30270..94465374ca2fe 100644 --- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -7,8 +7,6 @@ #include "i915_drv.h" #include "intel_gpu_commands.h" -#define MAX_URB_ENTRIES 64 -#define STATE_SIZE (4 * 1024) #define GT3_INLINE_DATA_DELAYS 0x1E00 #define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS)) @@ -34,38 +32,59 @@ struct batch_chunk { }; struct batch_vals { - u32 max_primitives; - u32 max_urb_entries; - u32 cmd_size; - u32 state_size; + u32 max_threads; u32 state_start; - u32 batch_size; + u32 surface_start; u32 surface_height; u32 surface_width; - u32 scratch_size; - u32 max_size; + u32 size; }; +static inline int num_primitives(const struct batch_vals *bv) +{ + /* + * We need to saturate the GPU with work in order to dispatch + * a shader on every HW thread, and clear the thread-local registers. + * In short, we have to dispatch work faster than the shaders can + * run in order to fill the EU and occupy each HW thread. + */ + return bv->max_threads; +} + static void batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv) { if (IS_HASWELL(i915)) { - bv->max_primitives = 280; - bv->max_urb_entries = MAX_URB_ENTRIES; + switch (INTEL_INFO(i915)->gt) { + default: + case 1: + bv->max_threads = 70; + break; + case 2: + bv->max_threads = 140; + break; + case 3: + bv->max_threads = 280; + break; + } bv->surface_height = 16 * 16; bv->surface_width = 32 * 2 * 16; } else { - bv->max_primitives = 128; - bv->max_urb_entries = MAX_URB_ENTRIES / 2; + switch (INTEL_INFO(i915)->gt) { + default: + case 1: /* including vlv */ + bv->max_threads = 36; + break; + case 2: + bv->max_threads = 128; + break; + } bv->surface_height = 16 * 8; bv->surface_width = 32 * 16; } - bv->cmd_size = bv->max_primitives * 4096; - bv->state_size = STATE_SIZE; - bv->state_start = bv->cmd_size; - bv->batch_size = bv->cmd_size + bv->state_size; - bv->scratch_size = bv->surface_height * bv->surface_width; - bv->max_size = bv->batch_size + bv->scratch_size; + bv->state_start = round_up(SZ_1K + num_primitives(bv) * 64, SZ_4K); + bv->surface_start = bv->state_start + SZ_4K; + bv->size = bv->surface_start + bv->surface_height * bv->surface_width; } static void batch_init(struct batch_chunk *bc, @@ -155,7 +174,8 @@ static u32 gen7_fill_binding_table(struct batch_chunk *state, const struct batch_vals *bv) { - u32 surface_start = gen7_fill_surface_state(state, bv->batch_size, bv); + u32 surface_start = + gen7_fill_surface_state(state, bv->surface_start, bv); u32 *cs = batch_alloc_items(state, 32, 8); u32 offset = batch_offset(state, cs); @@ -214,9 +234,9 @@ static void gen7_emit_state_base_address(struct batch_chunk *batch, u32 surface_state_base) { - u32 *cs = batch_alloc_items(batch, 0, 12); + u32 *cs = batch_alloc_items(batch, 0, 10); - *cs++ = STATE_BASE_ADDRESS | (12 - 2); + *cs++ = STATE_BASE_ADDRESS | (10 - 2); /* general */ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; /* surface */ @@ -233,8 +253,6 @@ gen7_emit_state_base_address(struct batch_chunk *batch, *cs++ = BASE_ADDRESS_MODIFY; *cs++ = 0; *cs++ = BASE_ADDRESS_MODIFY; - *cs++ = 0; - *cs++ = 0; batch_advance(batch, cs); } @@ -244,8 +262,7 @@ gen7_emit_vfe_state(struct batch_chunk *batch, u32 urb_size, u32 curbe_size, u32 mode) { - u32 urb_entries = bv->max_urb_entries; - u32 threads = bv->max_primitives - 1; + u32 threads = bv->max_threads - 1; u32 *cs = batch_alloc_items(batch, 32, 8); *cs++ = MEDIA_VFE_STATE | (8 - 2); @@ -254,7 +271,7 @@ gen7_emit_vfe_state(struct batch_chunk *batch, *cs++ = 0; /* number of threads & urb entries for GPGPU vs Media Mode */ - *cs++ = threads << 16 | urb_entries << 8 | mode << 2; + *cs++ = threads << 16 | 1 << 8 | mode << 2; *cs++ = 0; @@ -293,17 +310,12 @@ gen7_emit_media_object(struct batch_chunk *batch, { unsigned int x_offset = (media_object_index % 16) * 64; unsigned int y_offset = (media_object_index / 16) * 16; - unsigned int inline_data_size; - unsigned int media_batch_size; - unsigned int i; + unsigned int pkt = 6 + 3; u32 *cs; - inline_data_size = 112 * 8; - media_batch_size = inline_data_size + 6; - - cs = batch_alloc_items(batch, 8, media_batch_size); + cs = batch_alloc_items(batch, 8, pkt); - *cs++ = MEDIA_OBJECT | (media_batch_size - 2); + *cs++ = MEDIA_OBJECT | (pkt - 2); /* interface descriptor offset */ *cs++ = 0; @@ -317,25 +329,44 @@ gen7_emit_media_object(struct batch_chunk *batch, *cs++ = 0; /* inline */ - *cs++ = (y_offset << 16) | (x_offset); + *cs++ = y_offset << 16 | x_offset; *cs++ = 0; *cs++ = GT3_INLINE_DATA_DELAYS; - for (i = 3; i < inline_data_size; i++) - *cs++ = 0; batch_advance(batch, cs); } static void gen7_emit_pipeline_flush(struct batch_chunk *batch) { - u32 *cs = batch_alloc_items(batch, 0, 5); + u32 *cs = batch_alloc_items(batch, 0, 4); - *cs++ = GFX_OP_PIPE_CONTROL(5); - *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE | - PIPE_CONTROL_GLOBAL_GTT_IVB; + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL; *cs++ = 0; *cs++ = 0; + + batch_advance(batch, cs); +} + +static void gen7_emit_pipeline_invalidate(struct batch_chunk *batch) +{ + u32 *cs = batch_alloc_items(batch, 0, 8); + + /* ivb: Stall before STATE_CACHE_INVALIDATE */ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_STALL_AT_SCOREBOARD | + PIPE_CONTROL_CS_STALL; + *cs++ = 0; + *cs++ = 0; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE; *cs++ = 0; + *cs++ = 0; + batch_advance(batch, cs); } @@ -344,34 +375,34 @@ static void emit_batch(struct i915_vma * const vma, const struct batch_vals *bv) { struct drm_i915_private *i915 = vma->vm->i915; - unsigned int desc_count = 64; - const u32 urb_size = 112; + const unsigned int desc_count = 1; + const unsigned int urb_size = 1; struct batch_chunk cmds, state; - u32 interface_descriptor; + u32 descriptors; unsigned int i; - batch_init(&cmds, vma, start, 0, bv->cmd_size); - batch_init(&state, vma, start, bv->state_start, bv->state_size); + batch_init(&cmds, vma, start, 0, bv->state_start); + batch_init(&state, vma, start, bv->state_start, SZ_4K); - interface_descriptor = - gen7_fill_interface_descriptor(&state, bv, - IS_HASWELL(i915) ? - &cb_kernel_hsw : - &cb_kernel_ivb, - desc_count); - gen7_emit_pipeline_flush(&cmds); + descriptors = gen7_fill_interface_descriptor(&state, bv, + IS_HASWELL(i915) ? + &cb_kernel_hsw : + &cb_kernel_ivb, + desc_count); + + gen7_emit_pipeline_invalidate(&cmds); batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); batch_add(&cmds, MI_NOOP); - gen7_emit_state_base_address(&cmds, interface_descriptor); + gen7_emit_pipeline_invalidate(&cmds); + gen7_emit_pipeline_flush(&cmds); + gen7_emit_state_base_address(&cmds, descriptors); + gen7_emit_pipeline_invalidate(&cmds); gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0); + gen7_emit_interface_descriptor_load(&cmds, descriptors, desc_count); - gen7_emit_interface_descriptor_load(&cmds, - interface_descriptor, - desc_count); - - for (i = 0; i < bv->max_primitives; i++) + for (i = 0; i < num_primitives(bv); i++) gen7_emit_media_object(&cmds, i); batch_add(&cmds, MI_BATCH_BUFFER_END); @@ -385,15 +416,15 @@ int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine, batch_get_defaults(engine->i915, &bv); if (!vma) - return bv.max_size; + return bv.size; - GEM_BUG_ON(vma->obj->base.size < bv.max_size); + GEM_BUG_ON(vma->obj->base.size < bv.size); batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); if (IS_ERR(batch)) return PTR_ERR(batch); - emit_batch(vma, memset(batch, 0, bv.max_size), &bv); + emit_batch(vma, memset(batch, 0, bv.size), &bv); i915_gem_object_flush_map(vma->obj); __i915_gem_object_release_map(vma->obj); -- GitLab From 09aa9e45863e9e25dfbf350bae89fc3c2964482c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Jan 2021 22:52:19 +0000 Subject: [PATCH 0546/2012] drm/i915/gt: Restore clear-residual mitigations for Ivybridge, Baytrail The mitigation is required for all gen7 platforms, now that it does not cause GPU hangs, restore it for Ivybridge and Baytrail. Fixes: 47f8253d2b89 ("drm/i915/gen7: Clear all EU/L3 residual contexts") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Prathap Kumar Valsan Cc: Akeem G Abodunrin Cc: Bloomfield Jon Reviewed-by: Akeem G Abodunrin Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210111225220.3483-2-chris@chris-wilson.co.uk (cherry picked from commit 008ead6ef8f588a8c832adfe9db201d9be5fd410) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index a41b43f445b8a..d809789f9cfb7 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1290,7 +1290,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); - if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) { + if (IS_GEN(engine->i915, 7) && engine->class == RENDER_CLASS) { err = gen7_ctx_switch_bb_init(engine); if (err) goto err_ring_unpin; -- GitLab From 984cadea032b103c5824a5f29d0a36b3e9df6333 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Jan 2021 22:52:20 +0000 Subject: [PATCH 0547/2012] drm/i915: Allow the sysadmin to override security mitigations The clear-residuals mitigation is a relatively heavy hammer and under some circumstances the user may wish to forgo the context isolation in order to meet some performance requirement. Introduce a generic module parameter to allow selectively enabling/disabling different mitigations. To disable just the clear-residuals mitigation (on Ivybridge, Baytrail, or Haswell) use the module parameter: i915.mitigations=auto,!residuals Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1858 Fixes: 47f8253d2b89 ("drm/i915/gen7: Clear all EU/L3 residual contexts") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Jon Bloomfield Cc: Rodrigo Vivi Cc: stable@vger.kernel.org # v5.7 Reviewed-by: Jon Bloomfield Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210111225220.3483-3-chris@chris-wilson.co.uk (cherry picked from commit f7452c7cbd5b5dfb9a6c84cb20bea04c89be50cd) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/Makefile | 1 + .../gpu/drm/i915/gt/intel_ring_submission.c | 4 +- drivers/gpu/drm/i915/i915_mitigations.c | 146 ++++++++++++++++++ drivers/gpu/drm/i915/i915_mitigations.h | 13 ++ 4 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/i915/i915_mitigations.c create mode 100644 drivers/gpu/drm/i915/i915_mitigations.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e5574e506a5cc..6d9e81ea67f4b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -38,6 +38,7 @@ i915-y += i915_drv.o \ i915_config.o \ i915_irq.o \ i915_getparam.o \ + i915_mitigations.o \ i915_params.o \ i915_pci.o \ i915_scatterlist.o \ diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index d809789f9cfb7..ecf3a6118a6d7 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -32,6 +32,7 @@ #include "gen6_ppgtt.h" #include "gen7_renderclear.h" #include "i915_drv.h" +#include "i915_mitigations.h" #include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_gt.h" @@ -886,7 +887,8 @@ static int switch_context(struct i915_request *rq) GEM_BUG_ON(HAS_EXECLISTS(engine->i915)); if (engine->wa_ctx.vma && ce != engine->kernel_context) { - if (engine->wa_ctx.vma->private != ce) { + if (engine->wa_ctx.vma->private != ce && + i915_mitigate_clear_residuals()) { ret = clear_residuals(rq); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_mitigations.c b/drivers/gpu/drm/i915/i915_mitigations.c new file mode 100644 index 0000000000000..84f12598d1458 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mitigations.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include +#include +#include +#include + +#include "i915_drv.h" +#include "i915_mitigations.h" + +static unsigned long mitigations __read_mostly = ~0UL; + +enum { + CLEAR_RESIDUALS = 0, +}; + +static const char * const names[] = { + [CLEAR_RESIDUALS] = "residuals", +}; + +bool i915_mitigate_clear_residuals(void) +{ + return READ_ONCE(mitigations) & BIT(CLEAR_RESIDUALS); +} + +static int mitigations_set(const char *val, const struct kernel_param *kp) +{ + unsigned long new = ~0UL; + char *str, *sep, *tok; + bool first = true; + int err = 0; + + BUILD_BUG_ON(ARRAY_SIZE(names) >= BITS_PER_TYPE(mitigations)); + + str = kstrdup(val, GFP_KERNEL); + if (!str) + return -ENOMEM; + + for (sep = str; (tok = strsep(&sep, ","));) { + bool enable = true; + int i; + + /* Be tolerant of leading/trailing whitespace */ + tok = strim(tok); + + if (first) { + first = false; + + if (!strcmp(tok, "auto")) + continue; + + new = 0; + if (!strcmp(tok, "off")) + continue; + } + + if (*tok == '!') { + enable = !enable; + tok++; + } + + if (!strncmp(tok, "no", 2)) { + enable = !enable; + tok += 2; + } + + if (*tok == '\0') + continue; + + for (i = 0; i < ARRAY_SIZE(names); i++) { + if (!strcmp(tok, names[i])) { + if (enable) + new |= BIT(i); + else + new &= ~BIT(i); + break; + } + } + if (i == ARRAY_SIZE(names)) { + pr_err("Bad \"%s.mitigations=%s\", '%s' is unknown\n", + DRIVER_NAME, val, tok); + err = -EINVAL; + break; + } + } + kfree(str); + if (err) + return err; + + WRITE_ONCE(mitigations, new); + return 0; +} + +static int mitigations_get(char *buffer, const struct kernel_param *kp) +{ + unsigned long local = READ_ONCE(mitigations); + int count, i; + bool enable; + + if (!local) + return scnprintf(buffer, PAGE_SIZE, "%s\n", "off"); + + if (local & BIT(BITS_PER_LONG - 1)) { + count = scnprintf(buffer, PAGE_SIZE, "%s,", "auto"); + enable = false; + } else { + enable = true; + count = 0; + } + + for (i = 0; i < ARRAY_SIZE(names); i++) { + if ((local & BIT(i)) != enable) + continue; + + count += scnprintf(buffer + count, PAGE_SIZE - count, + "%s%s,", enable ? "" : "!", names[i]); + } + + buffer[count - 1] = '\n'; + return count; +} + +static const struct kernel_param_ops ops = { + .set = mitigations_set, + .get = mitigations_get, +}; + +module_param_cb_unsafe(mitigations, &ops, NULL, 0600); +MODULE_PARM_DESC(mitigations, +"Selectively enable security mitigations for all Intel® GPUs in the system.\n" +"\n" +" auto -- enables all mitigations required for the platform [default]\n" +" off -- disables all mitigations\n" +"\n" +"Individual mitigations can be enabled by passing a comma-separated string,\n" +"e.g. mitigations=residuals to enable only clearing residuals or\n" +"mitigations=auto,noresiduals to disable only the clear residual mitigation.\n" +"Either '!' or 'no' may be used to switch from enabling the mitigation to\n" +"disabling it.\n" +"\n" +"Active mitigations for Ivybridge, Baytrail, Haswell:\n" +" residuals -- clear all thread-local registers between contexts" +); diff --git a/drivers/gpu/drm/i915/i915_mitigations.h b/drivers/gpu/drm/i915/i915_mitigations.h new file mode 100644 index 0000000000000..1359d8135287a --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mitigations.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __I915_MITIGATIONS_H__ +#define __I915_MITIGATIONS_H__ + +#include + +bool i915_mitigate_clear_residuals(void); + +#endif /* __I915_MITIGATIONS_H__ */ -- GitLab From d78050ee35440d7879ed94011c52994b8932e96e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 7 Jan 2021 14:40:08 +0000 Subject: [PATCH 0548/2012] arm64: Remove arm64_dma32_phys_limit and its uses With the introduction of a dynamic ZONE_DMA range based on DT or IORT information, there's no need for CMA allocations from the wider ZONE_DMA32 since on most platforms ZONE_DMA will cover the 32-bit addressable range. Remove the arm64_dma32_phys_limit and set arm64_dma_phys_limit to cover the smallest DMA range required on the platform. CMA allocation and crashkernel reservation now go in the dynamically sized ZONE_DMA, allowing correct functionality on RPi4. Signed-off-by: Catalin Marinas Cc: Chen Zhou Reviewed-by: Nicolas Saenz Julienne Tested-by: Nicolas Saenz Julienne # On RPi4B --- arch/arm64/include/asm/processor.h | 3 +-- arch/arm64/mm/init.c | 33 ++++++++++++++++-------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 69ad25fbeae4a..ca2cd75d32863 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -94,8 +94,7 @@ #endif /* CONFIG_ARM64_FORCE_52BIT */ extern phys_addr_t arm64_dma_phys_limit; -extern phys_addr_t arm64_dma32_phys_limit; -#define ARCH_LOW_ADDRESS_LIMIT ((arm64_dma_phys_limit ? : arm64_dma32_phys_limit) - 1) +#define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1) struct debug_info { #ifdef CONFIG_HAVE_HW_BREAKPOINT diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 7deddf56f7c3e..709d98fea90cc 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -53,13 +53,13 @@ s64 memstart_addr __ro_after_init = -1; EXPORT_SYMBOL(memstart_addr); /* - * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of - * memory as some devices, namely the Raspberry Pi 4, have peripherals with - * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32 - * bit addressable memory area. + * If the corresponding config options are enabled, we create both ZONE_DMA + * and ZONE_DMA32. By default ZONE_DMA covers the 32-bit addressable memory + * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4). + * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory, + * otherwise it is empty. */ phys_addr_t arm64_dma_phys_limit __ro_after_init; -phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* @@ -84,7 +84,7 @@ static void __init reserve_crashkernel(void) if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit, + crash_base = memblock_find_in_range(0, arm64_dma_phys_limit, crash_size, SZ_2M); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", @@ -196,6 +196,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; unsigned int __maybe_unused acpi_zone_dma_bits; unsigned int __maybe_unused dt_zone_dma_bits; + phys_addr_t __maybe_unused dma32_phys_limit = max_zone_phys(32); #ifdef CONFIG_ZONE_DMA acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address()); @@ -205,8 +206,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); #endif #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit); + if (!arm64_dma_phys_limit) + arm64_dma_phys_limit = dma32_phys_limit; #endif + if (!arm64_dma_phys_limit) + arm64_dma_phys_limit = PHYS_MASK + 1; max_zone_pfns[ZONE_NORMAL] = max; free_area_init(max_zone_pfns); @@ -394,16 +399,9 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); - if (IS_ENABLED(CONFIG_ZONE_DMA32)) - arm64_dma32_phys_limit = max_zone_phys(32); - else - arm64_dma32_phys_limit = PHYS_MASK + 1; - reserve_elfcorehdr(); high_memory = __va(memblock_end_of_DRAM() - 1) + 1; - - dma_contiguous_reserve(arm64_dma32_phys_limit); } void __init bootmem_init(void) @@ -438,6 +436,11 @@ void __init bootmem_init(void) sparse_init(); zone_sizes_init(min, max); + /* + * Reserve the CMA area after arm64_dma_phys_limit was initialised. + */ + dma_contiguous_reserve(arm64_dma_phys_limit); + /* * request_standard_resources() depends on crashkernel's memory being * reserved, so do it here. @@ -455,7 +458,7 @@ void __init bootmem_init(void) void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit)) + max_pfn > PFN_DOWN(arm64_dma_phys_limit)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; -- GitLab From 895bee270863588fe3d46dca86cd15d461f47a7a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 12 Jan 2021 19:02:29 +0100 Subject: [PATCH 0549/2012] Revert "driver core: Reorder devices on successful probe" This reverts commit 5b6164d3465fcc13b5679c860c452963443172a7. Stephan reports problems with this commit, so revert it for now. Fixes: 5b6164d3465f ("driver core: Reorder devices on successful probe") Link: https://lore.kernel.org/r/X/ycQpu7NIGI969v@gerhold.net Reported-by: Stephan Gerhold Cc: Jonathan Hunter Cc: Rafael. J. Wysocki Cc: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 0b76b54237781..9179825ff646f 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -370,13 +370,6 @@ static void driver_bound(struct device *dev) device_pm_check_callbacks(dev); - /* - * Reorder successfully probed devices to the end of the device list. - * This ensures that suspend/resume order matches probe order, which - * is usually what drivers rely on. - */ - device_pm_move_to_tail(dev); - /* * Make sure the device is no longer in one of the deferred lists and * kick off retrying all pending devices -- GitLab From 8e14f610159d524cd7aac37982826d3ef75c09e8 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Sat, 9 Jan 2021 15:17:06 +0000 Subject: [PATCH 0550/2012] dm crypt: do not call bio_endio() from the dm-crypt tasklet Sometimes, when dm-crypt executes decryption in a tasklet, we may get "BUG: KASAN: use-after-free in tasklet_action_common.constprop..." with a kasan-enabled kernel. When the decryption fully completes in the tasklet, dm-crypt will call bio_endio(), which in turn will call clone_endio() from dm.c core code. That function frees the resources associated with the bio, including per bio private structures. For dm-crypt it will free the current struct dm_crypt_io, which contains our tasklet object, causing use-after-free, when the tasklet is being dequeued by the kernel. To avoid this, do not call bio_endio() from the current tasklet context, but delay its execution to the dm-crypt IO workqueue. Fixes: 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") Cc: # v5.9+ Signed-off-by: Ignat Korchagin Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d2d6d3000f5bd..b4d7418b50368 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1730,6 +1730,12 @@ static void crypt_inc_pending(struct dm_crypt_io *io) atomic_inc(&io->io_pending); } +static void kcryptd_io_bio_endio(struct work_struct *work) +{ + struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work); + bio_endio(io->base_bio); +} + /* * One of the bios was finished. Check for completion of * the whole request and correctly clean up the buffer. @@ -1752,7 +1758,23 @@ static void crypt_dec_pending(struct dm_crypt_io *io) kfree(io->integrity_metadata); base_bio->bi_status = error; - bio_endio(base_bio); + + /* + * If we are running this function from our tasklet, + * we can't call bio_endio() here, because it will call + * clone_endio() from dm.c, which in turn will + * free the current struct dm_crypt_io structure with + * our tasklet. In this case we need to delay bio_endio() + * execution to after the tasklet is done and dequeued. + */ + if (tasklet_trylock(&io->tasklet)) { + tasklet_unlock(&io->tasklet); + bio_endio(base_bio); + return; + } + + INIT_WORK(&io->work, kcryptd_io_bio_endio); + queue_work(cc->io_queue, &io->work); } /* -- GitLab From 98829137a6a04785c8812670a7fa16d7dd59f05a Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Mon, 21 Dec 2020 23:33:36 +0530 Subject: [PATCH 0551/2012] clk: qcom: gcc-sc7180: Mark the camera abh clock always ON The camera clock controller requires the AHB clock, the driver when moved to use the pm_runtime_get() API, the camera ahb clock failed turn on before access, thus mark it as always ON. Reported-by: Stephen Boyd Fixes: 8d4025943e13 ("clk: qcom: camcc-sc7180: Use runtime PM ops instead of clk ones") Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/1608573816-1465-1-git-send-email-tdas@codeaurora.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-sc7180.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/drivers/clk/qcom/gcc-sc7180.c b/drivers/clk/qcom/gcc-sc7180.c index d82d725ac2319..b05901b249172 100644 --- a/drivers/clk/qcom/gcc-sc7180.c +++ b/drivers/clk/qcom/gcc-sc7180.c @@ -891,21 +891,6 @@ static struct clk_branch gcc_boot_rom_ahb_clk = { }, }; -static struct clk_branch gcc_camera_ahb_clk = { - .halt_reg = 0xb008, - .halt_check = BRANCH_HALT, - .hwcg_reg = 0xb008, - .hwcg_bit = 1, - .clkr = { - .enable_reg = 0xb008, - .enable_mask = BIT(0), - .hw.init = &(struct clk_init_data){ - .name = "gcc_camera_ahb_clk", - .ops = &clk_branch2_ops, - }, - }, -}; - static struct clk_branch gcc_camera_hf_axi_clk = { .halt_reg = 0xb020, .halt_check = BRANCH_HALT, @@ -2317,7 +2302,6 @@ static struct clk_regmap *gcc_sc7180_clocks[] = { [GCC_AGGRE_UFS_PHY_AXI_CLK] = &gcc_aggre_ufs_phy_axi_clk.clkr, [GCC_AGGRE_USB3_PRIM_AXI_CLK] = &gcc_aggre_usb3_prim_axi_clk.clkr, [GCC_BOOT_ROM_AHB_CLK] = &gcc_boot_rom_ahb_clk.clkr, - [GCC_CAMERA_AHB_CLK] = &gcc_camera_ahb_clk.clkr, [GCC_CAMERA_HF_AXI_CLK] = &gcc_camera_hf_axi_clk.clkr, [GCC_CAMERA_THROTTLE_HF_AXI_CLK] = &gcc_camera_throttle_hf_axi_clk.clkr, [GCC_CAMERA_XO_CLK] = &gcc_camera_xo_clk.clkr, @@ -2519,11 +2503,12 @@ static int gcc_sc7180_probe(struct platform_device *pdev) /* * Keep the clocks always-ON - * GCC_CPUSS_GNOC_CLK, GCC_VIDEO_AHB_CLK, GCC_DISP_AHB_CLK - * GCC_GPU_CFG_AHB_CLK + * GCC_CPUSS_GNOC_CLK, GCC_VIDEO_AHB_CLK, GCC_CAMERA_AHB_CLK, + * GCC_DISP_AHB_CLK, GCC_GPU_CFG_AHB_CLK */ regmap_update_bits(regmap, 0x48004, BIT(0), BIT(0)); regmap_update_bits(regmap, 0x0b004, BIT(0), BIT(0)); + regmap_update_bits(regmap, 0x0b008, BIT(0), BIT(0)); regmap_update_bits(regmap, 0x0b00c, BIT(0), BIT(0)); regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0)); -- GitLab From 73f6b7ed9835ad9f953aebd60dd720aabc487b81 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Dec 2020 16:52:25 +0100 Subject: [PATCH 0552/2012] clk: imx: fix Kconfig warning for i.MX SCU clk A previous patch introduced a harmless randconfig warning: WARNING: unmet direct dependencies detected for MXC_CLK_SCU Depends on [n]: COMMON_CLK [=y] && ARCH_MXC [=n] && IMX_SCU [=y] && HAVE_ARM_SMCCC [=y] Selected by [m]: - CLK_IMX8QXP [=m] && COMMON_CLK [=y] && (ARCH_MXC [=n] && ARM64 [=y] || COMPILE_TEST [=y]) && IMX_SCU [=y] && HAVE_ARM_SMCCC [=y] Since the symbol is now hidden and only selected by other symbols, just remove the dependencies and require the other drivers to get it right. Fixes: 6247e31b7530 ("clk: imx: scu: fix MXC_CLK_SCU module build break") Reported-by: Anders Roxell Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20201230155244.981757-1-arnd@kernel.org Signed-off-by: Stephen Boyd --- drivers/clk/imx/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/clk/imx/Kconfig b/drivers/clk/imx/Kconfig index 3061896503f30..47d9ec3abd2f7 100644 --- a/drivers/clk/imx/Kconfig +++ b/drivers/clk/imx/Kconfig @@ -6,8 +6,6 @@ config MXC_CLK config MXC_CLK_SCU tristate - depends on ARCH_MXC - depends on IMX_SCU && HAVE_ARM_SMCCC config CLK_IMX1 def_bool SOC_IMX1 -- GitLab From 4be34f3d0731b38a1b24566b37fbb39500aaf3a2 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 12 Jan 2021 08:28:29 -0800 Subject: [PATCH 0553/2012] bpf: Don't leak memory in bpf getsockopt when optlen == 0 optlen == 0 indicates that the kernel should ignore BPF buffer and use the original one from the user. We, however, forget to free the temporary buffer that we've allocated for BPF. Fixes: d8fe449a9c51 ("bpf: Don't return EINVAL from {get,set}sockopt when optlen > PAGE_SIZE") Reported-by: Martin KaFai Lau Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210112162829.775079-1-sdf@google.com --- kernel/bpf/cgroup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 6ec088a96302f..96555a8a2c545 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1391,12 +1391,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, if (ctx.optlen != 0) { *optlen = ctx.optlen; *kernel_optval = ctx.optval; + /* export and don't free sockopt buf */ + return 0; } } out: - if (ret) - sockopt_free_buf(&ctx); + sockopt_free_buf(&ctx); return ret; } -- GitLab From fd2383093593b23f8814a879093b746e502fe3cf Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 9 Jan 2021 04:33:14 +0300 Subject: [PATCH 0554/2012] clk: qcom: gcc-sm250: Use floor ops for sdcc clks Followup to the commits 5e4b7e82d497 ("clk: qcom: gcc-sdm845: Use floor ops for sdcc clks") and 6d37a8d19283 ("clk: qcom: gcc-sc7180: Use floor ops for sdcc clks"). Use floor ops for sdcc clocks on sm8250. Signed-off-by: Dmitry Baryshkov Fixes: 3e5770921a88 ("clk: qcom: gcc: Add global clock controller driver for SM8250") Link: https://lore.kernel.org/r/20210109013314.3443134-1-dmitry.baryshkov@linaro.org Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gcc-sm8250.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/gcc-sm8250.c b/drivers/clk/qcom/gcc-sm8250.c index 6cb6617b8d88c..ab594a0f0c408 100644 --- a/drivers/clk/qcom/gcc-sm8250.c +++ b/drivers/clk/qcom/gcc-sm8250.c @@ -722,7 +722,7 @@ static struct clk_rcg2 gcc_sdcc2_apps_clk_src = { .name = "gcc_sdcc2_apps_clk_src", .parent_data = gcc_parent_data_4, .num_parents = 5, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_floor_ops, }, }; @@ -745,7 +745,7 @@ static struct clk_rcg2 gcc_sdcc4_apps_clk_src = { .name = "gcc_sdcc4_apps_clk_src", .parent_data = gcc_parent_data_0, .num_parents = 3, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_floor_ops, }, }; -- GitLab From c361c5a6c559d1e0a2717abe9162a71aa602954f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 3 Jan 2021 14:54:53 +0100 Subject: [PATCH 0555/2012] clk: mmp2: fix build without CONFIG_PM pm_clk_suspend()/pm_clk_resume() are defined as NULL pointers rather than empty inline stubs without CONFIG_PM: drivers/clk/mmp/clk-audio.c:402:16: error: called object type 'void *' is not a function or function pointer pm_clk_suspend(dev); drivers/clk/mmp/clk-audio.c:411:15: error: called object type 'void *' is not a function or function pointer pm_clk_resume(dev); I tried redefining the helper functions, but that caused additional problems. This is the simple solution of replacing the __maybe_unused trick with an #ifdef. Fixes: 725262d29139 ("clk: mmp2: Add audio clock controller driver") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210103135503.3668784-1-arnd@kernel.org Signed-off-by: Stephen Boyd --- drivers/clk/mmp/clk-audio.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/clk/mmp/clk-audio.c b/drivers/clk/mmp/clk-audio.c index eea69d498bd27..7aa7f4a9564fd 100644 --- a/drivers/clk/mmp/clk-audio.c +++ b/drivers/clk/mmp/clk-audio.c @@ -392,7 +392,8 @@ static int mmp2_audio_clk_remove(struct platform_device *pdev) return 0; } -static int __maybe_unused mmp2_audio_clk_suspend(struct device *dev) +#ifdef CONFIG_PM +static int mmp2_audio_clk_suspend(struct device *dev) { struct mmp2_audio_clk *priv = dev_get_drvdata(dev); @@ -404,7 +405,7 @@ static int __maybe_unused mmp2_audio_clk_suspend(struct device *dev) return 0; } -static int __maybe_unused mmp2_audio_clk_resume(struct device *dev) +static int mmp2_audio_clk_resume(struct device *dev) { struct mmp2_audio_clk *priv = dev_get_drvdata(dev); @@ -415,6 +416,7 @@ static int __maybe_unused mmp2_audio_clk_resume(struct device *dev) return 0; } +#endif static const struct dev_pm_ops mmp2_audio_clk_pm_ops = { SET_RUNTIME_PM_OPS(mmp2_audio_clk_suspend, mmp2_audio_clk_resume, NULL) -- GitLab From 9caa7ff509add50959a793b811cc7c9339e281cd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Jan 2021 15:36:20 +0100 Subject: [PATCH 0556/2012] x86/entry: Fix noinstr fail vmlinux.o: warning: objtool: __do_fast_syscall_32()+0x47: call to syscall_enter_from_user_mode_work() leaves .noinstr.text section Fixes: 4facb95b7ada ("x86/entry: Unbreak 32bit fast syscall") Reported-by: Randy Dunlap Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210106144017.472696632@infradead.org --- arch/x86/entry/common.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 18d8f17f755c4..0904f5676e4d8 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -73,10 +73,8 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, unsigned int nr) { if (likely(nr < IA32_NR_syscalls)) { - instrumentation_begin(); nr = array_index_nospec(nr, IA32_NR_syscalls); regs->ax = ia32_sys_call_table[nr](regs); - instrumentation_end(); } } @@ -91,8 +89,11 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) * or may not be necessary, but it matches the old asm behavior. */ nr = (unsigned int)syscall_enter_from_user_mode(regs, nr); + instrumentation_begin(); do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); syscall_exit_to_user_mode(regs); } @@ -121,11 +122,12 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) res = get_user(*(u32 *)®s->bp, (u32 __user __force *)(unsigned long)(u32)regs->sp); } - instrumentation_end(); if (res) { /* User code screwed up. */ regs->ax = -EFAULT; + + instrumentation_end(); syscall_exit_to_user_mode(regs); return false; } @@ -135,6 +137,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) /* Now this is just like a normal syscall. */ do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); syscall_exit_to_user_mode(regs); return true; } -- GitLab From a1d5c98aac33a5a0004ecf88905dcc261c52f988 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Jan 2021 15:36:21 +0100 Subject: [PATCH 0557/2012] x86/sev: Fix nonistr violation When the compiler fails to inline, it violates nonisntr: vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0xc7: call to sev_es_wr_ghcb_msr() leaves .noinstr.text section Fixes: 4ca68e023b11 ("x86/sev-es: Handle NMI State") Reported-by: Randy Dunlap Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210106144017.532902065@infradead.org --- arch/x86/kernel/sev-es.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index ab31c34ba508c..84c1821819afb 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -225,7 +225,7 @@ static inline u64 sev_es_rd_ghcb_msr(void) return __rdmsr(MSR_AMD64_SEV_ES_GHCB); } -static inline void sev_es_wr_ghcb_msr(u64 val) +static __always_inline void sev_es_wr_ghcb_msr(u64 val) { u32 low, high; -- GitLab From 0afda3a888dccf12557b41ef42eee942327d122b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Jan 2021 15:36:22 +0100 Subject: [PATCH 0558/2012] locking/lockdep: Cure noinstr fail When the compiler doesn't feel like inlining, it causes a noinstr fail: vmlinux.o: warning: objtool: lock_is_held_type()+0xb: call to lockdep_enabled() leaves .noinstr.text section Fixes: 4d004099a668 ("lockdep: Fix lockdep recursion") Reported-by: Randy Dunlap Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210106144017.592595176@infradead.org --- kernel/locking/lockdep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c1418b47f625a..02bc5b8f1eb27 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -79,7 +79,7 @@ module_param(lock_stat, int, 0644); DEFINE_PER_CPU(unsigned int, lockdep_recursion); EXPORT_PER_CPU_SYMBOL_GPL(lockdep_recursion); -static inline bool lockdep_enabled(void) +static __always_inline bool lockdep_enabled(void) { if (!debug_locks) return false; -- GitLab From 77ca93a6b1223e210e58e1000c09d8d420403c94 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Jan 2021 15:36:23 +0100 Subject: [PATCH 0559/2012] locking/lockdep: Avoid noinstr warning for DEBUG_LOCKDEP vmlinux.o: warning: objtool: lock_is_held_type()+0x60: call to check_flags.part.0() leaves .noinstr.text section Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210106144017.652218215@infradead.org --- kernel/locking/lockdep.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 02bc5b8f1eb27..bdaf4829098c0 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -5271,12 +5271,15 @@ static void __lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie /* * Check whether we follow the irq-flags state precisely: */ -static void check_flags(unsigned long flags) +static noinstr void check_flags(unsigned long flags) { #if defined(CONFIG_PROVE_LOCKING) && defined(CONFIG_DEBUG_LOCKDEP) if (!debug_locks) return; + /* Get the warning out.. */ + instrumentation_begin(); + if (irqs_disabled_flags(flags)) { if (DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled())) { printk("possible reason: unannotated irqs-off.\n"); @@ -5304,6 +5307,8 @@ static void check_flags(unsigned long flags) if (!debug_locks) print_irqtrace_events(current); + + instrumentation_end(); #endif } -- GitLab From 737495361d4469477ffe45d51e6fc56f44f3cc6a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Jan 2021 15:36:24 +0100 Subject: [PATCH 0560/2012] x86/mce: Remove explicit/superfluous tracing There's some explicit tracing left in exc_machine_check_kernel(), remove it, as it's already implied by irqentry_nmi_enter(). Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210106144017.719310466@infradead.org --- arch/x86/kernel/cpu/mce/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 13d3f1cbda176..e133ce1e562b3 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1992,10 +1992,9 @@ static __always_inline void exc_machine_check_kernel(struct pt_regs *regs) * that out because it's an indirect call. Annotate it. */ instrumentation_begin(); - trace_hardirqs_off_finish(); + machine_check_vector(regs); - if (regs->flags & X86_EFLAGS_IF) - trace_hardirqs_on_prepare(); + instrumentation_end(); irqentry_nmi_exit(regs, irq_state); } @@ -2004,7 +2003,9 @@ static __always_inline void exc_machine_check_user(struct pt_regs *regs) { irqentry_enter_from_user_mode(regs); instrumentation_begin(); + machine_check_vector(regs); + instrumentation_end(); irqentry_exit_to_user_mode(regs); } -- GitLab From 66a425011c61e71560c234492d204e83cfb73d1d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Jan 2021 11:14:25 +0100 Subject: [PATCH 0561/2012] x86: __always_inline __{rd,wr}msr() When the compiler choses to not inline the trivial MSR helpers: vmlinux.o: warning: objtool: __sev_es_nmi_complete()+0xce: call to __wrmsr.constprop.14() leaves .noinstr.text section Reported-by: Randy Dunlap Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Acked-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/X/bf3gV+BW7kGEsB@hirez.programming.kicks-ass.net --- arch/x86/include/asm/msr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 0b4920a7238e3..e16cccdd04207 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -86,7 +86,7 @@ static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {} * think of extending them - you will be slapped with a stinking trout or a frozen * shark will reach you, wherever you are! You've been warned. */ -static inline unsigned long long notrace __rdmsr(unsigned int msr) +static __always_inline unsigned long long __rdmsr(unsigned int msr) { DECLARE_ARGS(val, low, high); @@ -98,7 +98,7 @@ static inline unsigned long long notrace __rdmsr(unsigned int msr) return EAX_EDX_VAL(val, low, high); } -static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high) +static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high) { asm volatile("1: wrmsr\n" "2:\n" -- GitLab From bcc5e6162d66d44f7929f30fce032f95855fc8b4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 9 Jan 2021 23:03:40 -0800 Subject: [PATCH 0562/2012] bpf: Allow empty module BTFs Some modules don't declare any new types and end up with an empty BTF, containing only valid BTF header and no types or strings sections. This currently causes BTF validation error. There is nothing wrong with such BTF, so fix the issue by allowing module BTFs with no types or strings. Fixes: 36e68442d1af ("bpf: Load and verify kernel module BTFs") Reported-by: Christopher William Snowhill Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210110070341.1380086-1-andrii@kernel.org --- kernel/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 8d6bdb4f4d618..84a36ee4a4c20 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -4172,7 +4172,7 @@ static int btf_parse_hdr(struct btf_verifier_env *env) return -ENOTSUPP; } - if (btf_data_size == hdr->hdr_len) { + if (!btf->base_btf && btf_data_size == hdr->hdr_len) { btf_verifier_log(env, "No data"); return -EINVAL; } -- GitLab From b8d52264df85ec12f370c0a8b28d0ac59a05877a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sat, 9 Jan 2021 23:03:41 -0800 Subject: [PATCH 0563/2012] libbpf: Allow loading empty BTFs Empty BTFs do come up (e.g., simple kernel modules with no new types and strings, compared to the vmlinux BTF) and there is nothing technically wrong with them. So remove unnecessary check preventing loading empty BTFs. Fixes: d8123624506c ("libbpf: Fix BTF data layout checks and allow empty BTF") Reported-by: Christopher William Snowhill Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210110070341.1380086-2-andrii@kernel.org --- tools/lib/bpf/btf.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 3c3f2bc6c6528..9970a288dda53 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -240,11 +240,6 @@ static int btf_parse_hdr(struct btf *btf) } meta_left = btf->raw_size - sizeof(*hdr); - if (!meta_left) { - pr_debug("BTF has no data\n"); - return -EINVAL; - } - if (meta_left < hdr->str_off + hdr->str_len) { pr_debug("Invalid BTF total size:%u\n", btf->raw_size); return -EINVAL; -- GitLab From aba428a0c612bb259891307da12e22efd0fab14c Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Tue, 1 Dec 2020 17:52:31 +0800 Subject: [PATCH 0564/2012] timekeeping: Remove unused get_seconds() The get_seconds() cleanup seems to have been completed, now it is time to delete the legacy interface to avoid misuse later. Signed-off-by: Chunguang Xu Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/1606816351-26900-1-git-send-email-brookxu@tencent.com --- include/linux/ktime.h | 1 - include/linux/timekeeping32.h | 14 -------------- kernel/time/timekeeping.c | 3 +-- 3 files changed, 1 insertion(+), 17 deletions(-) delete mode 100644 include/linux/timekeeping32.h diff --git a/include/linux/ktime.h b/include/linux/ktime.h index a12b5523cc18e..73f20deb497d5 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -230,6 +230,5 @@ static inline ktime_t ms_to_ktime(u64 ms) } # include -# include #endif diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h deleted file mode 100644 index 266017fc9ee9c..0000000000000 --- a/include/linux/timekeeping32.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _LINUX_TIMEKEEPING32_H -#define _LINUX_TIMEKEEPING32_H -/* - * These interfaces are all based on the old timespec type - * and should get replaced with the timespec64 based versions - * over time so we can remove the file here. - */ - -static inline unsigned long get_seconds(void) -{ - return ktime_get_real_seconds(); -} - -#endif diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index a45cedda93a7c..6aee5768c86ff 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -991,8 +991,7 @@ EXPORT_SYMBOL_GPL(ktime_get_seconds); /** * ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME * - * Returns the wall clock seconds since 1970. This replaces the - * get_seconds() interface which is not y2038 safe on 32bit systems. + * Returns the wall clock seconds since 1970. * * For 64bit systems the fast access to tk->xtime_sec is preserved. On * 32bit systems the access must be protected with the sequence -- GitLab From e3fab2f3de081e98c50b7b4ace1b040161d95310 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 Jan 2021 11:39:56 +0100 Subject: [PATCH 0565/2012] ntp: Fix RTC synchronization on 32-bit platforms Due to an integer overflow, RTC synchronization now happens every 2s instead of the intended 11 minutes. Fix this by forcing 64-bit arithmetic for the sync period calculation. Annotate the other place which multiplies seconds for consistency as well. Fixes: c9e6189fb03123a7 ("ntp: Make the RTC synchronization more reliable") Signed-off-by: Geert Uytterhoeven Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20210111103956.290378-1-geert+renesas@glider.be --- kernel/time/ntp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 7404d38315276..87389b9e21aba 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -498,7 +498,7 @@ out: static void sync_hw_clock(struct work_struct *work); static DECLARE_WORK(sync_work, sync_hw_clock); static struct hrtimer sync_hrtimer; -#define SYNC_PERIOD_NS (11UL * 60 * NSEC_PER_SEC) +#define SYNC_PERIOD_NS (11ULL * 60 * NSEC_PER_SEC) static enum hrtimer_restart sync_timer_callback(struct hrtimer *timer) { @@ -512,7 +512,7 @@ static void sched_sync_hw_clock(unsigned long offset_nsec, bool retry) ktime_t exp = ktime_set(ktime_get_real_seconds(), 0); if (retry) - exp = ktime_add_ns(exp, 2 * NSEC_PER_SEC - offset_nsec); + exp = ktime_add_ns(exp, 2ULL * NSEC_PER_SEC - offset_nsec); else exp = ktime_add_ns(exp, SYNC_PERIOD_NS - offset_nsec); -- GitLab From c6458e72f6fd6ac7e390da0d9abe8446084886e5 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 12 Jan 2021 12:34:22 +0000 Subject: [PATCH 0566/2012] bpf: Clarify return value of probe str helpers When the buffer is too small to contain the input string, these helpers return the length of the buffer, not the length of the original string. This tries to make the docs totally clear about that, since "the length of the [copied ]string" could also refer to the length of the input. Signed-off-by: Brendan Jackman Signed-off-by: Daniel Borkmann Acked-by: KP Singh Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210112123422.2011234-1-jackmanb@google.com --- include/uapi/linux/bpf.h | 10 +++++----- tools/include/uapi/linux/bpf.h | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 77d7c1bb29233..a1ad32456f89a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2993,10 +2993,10 @@ union bpf_attr { * string length is larger than *size*, just *size*-1 bytes are * copied and the last byte is set to NUL. * - * On success, the length of the copied string is returned. This - * makes this helper useful in tracing programs for reading - * strings, and more importantly to get its length at runtime. See - * the following snippet: + * On success, returns the number of bytes that were written, + * including the terminal NUL. This makes this helper useful in + * tracing programs for reading strings, and more importantly to + * get its length at runtime. See the following snippet: * * :: * @@ -3024,7 +3024,7 @@ union bpf_attr { * **->mm->env_start**: using this helper and the return value, * one can quickly iterate at the right offset of the memory area. * Return - * On success, the strictly positive length of the string, + * On success, the strictly positive length of the output string, * including the trailing NUL character. On error, a negative * value. * diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 77d7c1bb29233..a1ad32456f89a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2993,10 +2993,10 @@ union bpf_attr { * string length is larger than *size*, just *size*-1 bytes are * copied and the last byte is set to NUL. * - * On success, the length of the copied string is returned. This - * makes this helper useful in tracing programs for reading - * strings, and more importantly to get its length at runtime. See - * the following snippet: + * On success, returns the number of bytes that were written, + * including the terminal NUL. This makes this helper useful in + * tracing programs for reading strings, and more importantly to + * get its length at runtime. See the following snippet: * * :: * @@ -3024,7 +3024,7 @@ union bpf_attr { * **->mm->env_start**: using this helper and the return value, * one can quickly iterate at the right offset of the memory area. * Return - * On success, the strictly positive length of the string, + * On success, the strictly positive length of the output string, * including the trailing NUL character. On error, a negative * value. * -- GitLab From 28a8add64181059034b7f281491132112cd95bb4 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Tue, 12 Jan 2021 12:39:13 +0000 Subject: [PATCH 0567/2012] bpf: Fix a verifier message for alloc size helper arg The error message here is misleading, the argument will be rejected unless it is a known constant. Signed-off-by: Brendan Jackman Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210112123913.2016804-1-jackmanb@google.com --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 17270b8404f17..5534e667bdb1b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4319,7 +4319,7 @@ skip_type_check: err = mark_chain_precision(env, regno); } else if (arg_type_is_alloc_size(arg_type)) { if (!tnum_is_const(reg->var_off)) { - verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n", + verbose(env, "R%d is not a known constant'\n", regno); return -EACCES; } -- GitLab From 17ffc193cdc6dc7a613d00d8ad47fc1f801b9bf0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 12 Jan 2021 14:54:47 -0500 Subject: [PATCH 0568/2012] dm integrity: fix the maximum number of arguments Advance the maximum number of arguments from 9 to 15 to account for all potential feature flags that may be supplied. Linux 4.19 added "meta_device" (356d9d52e1221ba0c9f10b8b38652f78a5298329) and "recalculate" (a3fcf7253139609bf9ff901fbf955fba047e75dd) flags. Commit 468dfca38b1a6fbdccd195d875599cb7c8875cd9 added "sectors_per_bit" and "bitmap_flush_interval". Commit 84597a44a9d86ac949900441cea7da0af0f2f473 added "allow_discards". And the commit d537858ac8aaf4311b51240893add2fc62003b97 added "fix_padding". Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 11c7c538f7a98..81df019ab284a 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3792,7 +3792,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) unsigned extra_args; struct dm_arg_set as; static const struct dm_arg _args[] = { - {0, 9, "Invalid number of feature args"}, + {0, 15, "Invalid number of feature args"}, }; unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; bool should_write_sb; -- GitLab From bcd6f4a8bedabac6949d05e418e73a0a1b309e84 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 12 Jan 2021 00:09:39 +0100 Subject: [PATCH 0569/2012] bpf: Allow to retrieve sol_socket opts from sock_addr progs The _bpf_setsockopt() is able to set some of the SOL_SOCKET level options, however, _bpf_getsockopt() has little support to actually retrieve them. This small patch adds few misc options such as SO_MARK, SO_PRIORITY and SO_BINDTOIFINDEX. For the latter getter and setter are added. The mark and priority in particular allow to retrieve the options from BPF cgroup hooks to then implement custom behavior / settings on the syscall hooks compared to other sockets that stick to the defaults, for example. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/cba44439b801e5ddc1170e5be787f4dc93a2d7f9.1610406333.git.daniel@iogearbox.net --- net/core/filter.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 255aeee724026..9ab94e90d6605 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4770,6 +4770,10 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, ifindex = dev->ifindex; dev_put(dev); } + fallthrough; + case SO_BINDTOIFINDEX: + if (optname == SO_BINDTOIFINDEX) + ifindex = val; ret = sock_bindtoindex(sk, ifindex, false); break; case SO_KEEPALIVE: @@ -4932,8 +4936,25 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname, sock_owned_by_me(sk); + if (level == SOL_SOCKET) { + if (optlen != sizeof(int)) + goto err_clear; + + switch (optname) { + case SO_MARK: + *((int *)optval) = sk->sk_mark; + break; + case SO_PRIORITY: + *((int *)optval) = sk->sk_priority; + break; + case SO_BINDTOIFINDEX: + *((int *)optval) = sk->sk_bound_dev_if; + break; + default: + goto err_clear; + } #ifdef CONFIG_INET - if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) { + } else if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) { struct inet_connection_sock *icsk; struct tcp_sock *tp; @@ -4986,12 +5007,12 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname, default: goto err_clear; } +#endif #endif } else { goto err_clear; } return 0; -#endif err_clear: memset(optval, 0, optlen); return -EINVAL; -- GitLab From 3218231dbb16cd85d94831759b6c78e6feb54b58 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 12 Jan 2021 00:09:40 +0100 Subject: [PATCH 0570/2012] bpf: Extend bind v4/v6 selftests for mark/prio/bindtoifindex Extend existing cgroup bind4/bind6 tests to add coverage for setting and retrieving SO_MARK, SO_PRIORITY and SO_BINDTOIFINDEX at the bind hook. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/384fdc90e5fa83f8335a37aa90fa2f5f3661929c.1610406333.git.daniel@iogearbox.net --- .../testing/selftests/bpf/progs/bind4_prog.c | 42 +++++++++++++++++-- .../testing/selftests/bpf/progs/bind6_prog.c | 42 +++++++++++++++++-- 2 files changed, 76 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c index c6520f21f5f56..115a3b0ad984a 100644 --- a/tools/testing/selftests/bpf/progs/bind4_prog.c +++ b/tools/testing/selftests/bpf/progs/bind4_prog.c @@ -29,18 +29,48 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx) char veth2[IFNAMSIZ] = "test_sock_addr2"; char missing[IFNAMSIZ] = "nonexistent_dev"; char del_bind[IFNAMSIZ] = ""; + int veth1_idx, veth2_idx; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &veth1, sizeof(veth1))) + &veth1, sizeof(veth1))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, + &veth1_idx, sizeof(veth1_idx)) || !veth1_idx) return 1; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &veth2, sizeof(veth2))) + &veth2, sizeof(veth2))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, + &veth2_idx, sizeof(veth2_idx)) || !veth2_idx || + veth1_idx == veth2_idx) return 1; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &missing, sizeof(missing)) != -ENODEV) + &missing, sizeof(missing)) != -ENODEV) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, + &veth1_idx, sizeof(veth1_idx))) return 1; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &del_bind, sizeof(del_bind))) + &del_bind, sizeof(del_bind))) + return 1; + + return 0; +} + +static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt) +{ + int old, tmp, new = 0xeb9f; + + /* Socket in test case has guarantee that old never equals to new. */ + if (bpf_getsockopt(ctx, SOL_SOCKET, opt, &old, sizeof(old)) || + old == new) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, opt, &new, sizeof(new))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, opt, &tmp, sizeof(tmp)) || + tmp != new) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, opt, &old, sizeof(old))) return 1; return 0; @@ -93,6 +123,10 @@ int bind_v4_prog(struct bpf_sock_addr *ctx) if (bind_to_device(ctx)) return 0; + /* Test for misc socket options. */ + if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY)) + return 0; + ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP); ctx->user_port = bpf_htons(SERV4_REWRITE_PORT); diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c index 4358e44dcf472..4c0d348034b93 100644 --- a/tools/testing/selftests/bpf/progs/bind6_prog.c +++ b/tools/testing/selftests/bpf/progs/bind6_prog.c @@ -35,18 +35,48 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx) char veth2[IFNAMSIZ] = "test_sock_addr2"; char missing[IFNAMSIZ] = "nonexistent_dev"; char del_bind[IFNAMSIZ] = ""; + int veth1_idx, veth2_idx; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &veth1, sizeof(veth1))) + &veth1, sizeof(veth1))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, + &veth1_idx, sizeof(veth1_idx)) || !veth1_idx) return 1; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &veth2, sizeof(veth2))) + &veth2, sizeof(veth2))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, + &veth2_idx, sizeof(veth2_idx)) || !veth2_idx || + veth1_idx == veth2_idx) return 1; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &missing, sizeof(missing)) != -ENODEV) + &missing, sizeof(missing)) != -ENODEV) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX, + &veth1_idx, sizeof(veth1_idx))) return 1; if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE, - &del_bind, sizeof(del_bind))) + &del_bind, sizeof(del_bind))) + return 1; + + return 0; +} + +static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt) +{ + int old, tmp, new = 0xeb9f; + + /* Socket in test case has guarantee that old never equals to new. */ + if (bpf_getsockopt(ctx, SOL_SOCKET, opt, &old, sizeof(old)) || + old == new) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, opt, &new, sizeof(new))) + return 1; + if (bpf_getsockopt(ctx, SOL_SOCKET, opt, &tmp, sizeof(tmp)) || + tmp != new) + return 1; + if (bpf_setsockopt(ctx, SOL_SOCKET, opt, &old, sizeof(old))) return 1; return 0; @@ -107,6 +137,10 @@ int bind_v6_prog(struct bpf_sock_addr *ctx) if (bind_to_device(ctx)) return 0; + /* Test for misc socket options. */ + if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY)) + return 0; + ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0); ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1); ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2); -- GitLab From a643bff752dcf72a07e1b2ab2f8587e4f51118be Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 11 Jan 2021 23:55:14 -0800 Subject: [PATCH 0571/2012] bpf: Add bpf_patch_call_args prototype to include/linux/bpf.h Add bpf_patch_call_args() prototype. This function is called from BPF verifier and only if CONFIG_BPF_JIT_ALWAYS_ON is not defined. This fixes compiler warning about missing prototype in some kernel configurations. Fixes: 1ea47e01ad6e ("bpf: add support for bpf_call to interpreter") Reported-by: kernel test robot Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210112075520.4103414-2-andrii@kernel.org --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 07cb5d15e7439..ef9309604b3e5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1403,7 +1403,10 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, union bpf_attr __user *uattr); + +#ifndef CONFIG_BPF_JIT_ALWAYS_ON void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); +#endif struct btf *bpf_get_btf_vmlinux(void); -- GitLab From 6943c2b05bf09fd5c5729f7d7d803bf3f126cb9a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 11 Jan 2021 23:55:15 -0800 Subject: [PATCH 0572/2012] bpf: Avoid warning when re-casting __bpf_call_base into __bpf_call_base_args BPF interpreter uses extra input argument, so re-casts __bpf_call_base into __bpf_call_base_args. Avoid compiler warning about incompatible function prototypes by casting to void * first. Fixes: 1ea47e01ad6e ("bpf: add support for bpf_call to interpreter") Reported-by: kernel test robot Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210112075520.4103414-3-andrii@kernel.org --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 29c27656165b2..5edf2b6608812 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -886,7 +886,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); #define __bpf_call_base_args \ ((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \ - __bpf_call_base) + (void *)__bpf_call_base) struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); -- GitLab From 936f8946bdb48239f4292812d4d2e26c6d328c95 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 11 Jan 2021 23:55:16 -0800 Subject: [PATCH 0573/2012] bpf: Declare __bpf_free_used_maps() unconditionally __bpf_free_used_maps() is always defined in kernel/bpf/core.c, while include/linux/bpf.h is guarding it behind CONFIG_BPF_SYSCALL. Move it out of that guard region and fix compiler warning. Fixes: a2ea07465c8d ("bpf: Fix missing prog untrack in release_maps") Reported-by: kernel test robot Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210112075520.4103414-4-andrii@kernel.org --- include/linux/bpf.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ef9309604b3e5..6e585dbc10df3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1206,8 +1206,6 @@ void bpf_prog_sub(struct bpf_prog *prog, int i); void bpf_prog_inc(struct bpf_prog *prog); struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); -void __bpf_free_used_maps(struct bpf_prog_aux *aux, - struct bpf_map **used_maps, u32 len); void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); @@ -1676,6 +1674,9 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, return bpf_prog_get_type_dev(ufd, type, false); } +void __bpf_free_used_maps(struct bpf_prog_aux *aux, + struct bpf_map **used_maps, u32 len); + bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); int bpf_prog_offload_compile(struct bpf_prog *prog); -- GitLab From df85bc140a4d6cbaa78d8e9c35154e1a2f0622c7 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 11 Jan 2021 18:07:07 +0100 Subject: [PATCH 0574/2012] net: dcb: Accept RTM_GETDCB messages carrying set-like DCB commands In commit 826f328e2b7e ("net: dcb: Validate netlink message in DCB handler"), Linux started rejecting RTM_GETDCB netlink messages if they contained a set-like DCB_CMD_ command. The reason was that privileges were only verified for RTM_SETDCB messages, but the value that determined the action to be taken is the command, not the message type. And validation of message type against the DCB command was the obvious missing piece. Unfortunately it turns out that mlnx_qos, a somewhat widely deployed tool for configuration of DCB, accesses the DCB set-like APIs through RTM_GETDCB. Therefore do not bounce the discrepancy between message type and command. Instead, in addition to validating privileges based on the actual message type, validate them also based on the expected message type. This closes the loophole of allowing DCB configuration on non-admin accounts, while maintaining backward compatibility. Fixes: 2f90b8657ec9 ("ixgbe: this patch adds support for DCB to the kernel and ixgbe driver") Fixes: 826f328e2b7e ("net: dcb: Validate netlink message in DCB handler") Signed-off-by: Petr Machata Link: https://lore.kernel.org/r/a3edcfda0825f2aa2591801c5232f2bbf2d8a554.1610384801.git.me@pmachata.org Signed-off-by: Jakub Kicinski --- net/dcb/dcbnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 7d49b6fd6cef9..653e3bc9c87b9 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1765,7 +1765,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, fn = &reply_funcs[dcb->cmd]; if (!fn->cb) return -EOPNOTSUPP; - if (fn->type != nlh->nlmsg_type) + if (fn->type == RTM_SETDCB && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!tb[DCB_ATTR_IFNAME]) -- GitLab From 635599bace259a2c42741c3ea61bfa7be6f15556 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 11 Jan 2021 23:55:17 -0800 Subject: [PATCH 0575/2012] selftests/bpf: Sync RCU before unloading bpf_testmod If some of the subtests use module BTFs through ksyms, they will cause bpf_prog to take a refcount on bpf_testmod module, which will prevent it from successfully unloading. Module's refcnt is decremented when bpf_prog is freed, which generally happens in RCU callback. So we need to trigger syncronize_rcu() in the kernel, which can be achieved nicely with membarrier(MEMBARRIER_CMD_SHARED) or membarrier(MEMBARRIER_CMD_GLOBAL) syscall. So do that in kernel_sync_rcu() and make it available to other test inside the test_progs. This synchronize_rcu() is called before attempting to unload bpf_testmod. Fixes: 9f7fa225894c ("selftests/bpf: Add bpf_testmod kernel module for testing") Suggested-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: Hao Luo Link: https://lore.kernel.org/bpf/20210112075520.4103414-5-andrii@kernel.org --- .../selftests/bpf/prog_tests/btf_map_in_map.c | 33 ------------------- tools/testing/selftests/bpf/test_progs.c | 11 +++++++ tools/testing/selftests/bpf/test_progs.h | 1 + 3 files changed, 12 insertions(+), 33 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c index 76ebe4c250f11..eb90a6b8850d2 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c @@ -20,39 +20,6 @@ static __u32 bpf_map_id(struct bpf_map *map) return info.id; } -/* - * Trigger synchronize_rcu() in kernel. - * - * ARRAY_OF_MAPS/HASH_OF_MAPS lookup/update operations trigger synchronize_rcu() - * if looking up an existing non-NULL element or updating the map with a valid - * inner map FD. Use this fact to trigger synchronize_rcu(): create map-in-map, - * create a trivial ARRAY map, update map-in-map with ARRAY inner map. Then - * cleanup. At the end, at least one synchronize_rcu() would be called. - */ -static int kern_sync_rcu(void) -{ - int inner_map_fd, outer_map_fd, err, zero = 0; - - inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 4, 1, 0); - if (CHECK(inner_map_fd < 0, "inner_map_create", "failed %d\n", -errno)) - return -1; - - outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, - sizeof(int), inner_map_fd, 1, 0); - if (CHECK(outer_map_fd < 0, "outer_map_create", "failed %d\n", -errno)) { - close(inner_map_fd); - return -1; - } - - err = bpf_map_update_elem(outer_map_fd, &zero, &inner_map_fd, 0); - if (err) - err = -errno; - CHECK(err, "outer_map_update", "failed %d\n", err); - close(inner_map_fd); - close(outer_map_fd); - return err; -} - static void test_lookup_update(void) { int map1_fd, map2_fd, map3_fd, map4_fd, map5_fd, map1_id, map2_id; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 7d077d48cadd0..213628ee721c1 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -11,6 +11,7 @@ #include #include #include /* backtrace */ +#include #define EXIT_NO_TEST 2 #define EXIT_ERR_SETUP_INFRA 3 @@ -370,8 +371,18 @@ static int delete_module(const char *name, int flags) return syscall(__NR_delete_module, name, flags); } +/* + * Trigger synchronize_rcu() in kernel. + */ +int kern_sync_rcu(void) +{ + return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0); +} + static void unload_bpf_testmod(void) { + if (kern_sync_rcu()) + fprintf(env.stderr, "Failed to trigger kernel-side RCU sync!\n"); if (delete_module("bpf_testmod", 0)) { if (errno == ENOENT) { if (env.verbosity > VERBOSE_NONE) diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 115953243f623..e49e2fdde9425 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -219,6 +219,7 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name); int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); int extract_build_id(char *build_id, size_t size); +int kern_sync_rcu(void); #ifdef __x86_64__ #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep" -- GitLab From 541c3bad8dc51b253ba8686d0cd7628e6b9b5f4c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 11 Jan 2021 23:55:18 -0800 Subject: [PATCH 0576/2012] bpf: Support BPF ksym variables in kernel modules Add support for directly accessing kernel module variables from BPF programs using special ldimm64 instructions. This functionality builds upon vmlinux ksym support, but extends ldimm64 with src_reg=BPF_PSEUDO_BTF_ID to allow specifying kernel module BTF's FD in insn[1].imm field. During BPF program load time, verifier will resolve FD to BTF object and will take reference on BTF object itself and, for module BTFs, corresponding module as well, to make sure it won't be unloaded from under running BPF program. The mechanism used is similar to how bpf_prog keeps track of used bpf_maps. One interesting change is also in how per-CPU variable is determined. The logic is to find .data..percpu data section in provided BTF, but both vmlinux and module each have their own .data..percpu entries in BTF. So for module's case, the search for DATASEC record needs to look at only module's added BTF types. This is implemented with custom search function. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: Hao Luo Link: https://lore.kernel.org/bpf/20210112075520.4103414-6-andrii@kernel.org --- include/linux/bpf.h | 10 +++ include/linux/bpf_verifier.h | 3 + include/linux/btf.h | 3 + kernel/bpf/btf.c | 31 ++++++- kernel/bpf/core.c | 23 ++++++ kernel/bpf/verifier.c | 154 ++++++++++++++++++++++++++++------- 6 files changed, 194 insertions(+), 30 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6e585dbc10df3..1aac2af12fed2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -761,9 +761,15 @@ struct bpf_ctx_arg_aux { u32 btf_id; }; +struct btf_mod_pair { + struct btf *btf; + struct module *module; +}; + struct bpf_prog_aux { atomic64_t refcnt; u32 used_map_cnt; + u32 used_btf_cnt; u32 max_ctx_offset; u32 max_pkt_offset; u32 max_tp_access; @@ -802,6 +808,7 @@ struct bpf_prog_aux { const struct bpf_prog_ops *ops; struct bpf_map **used_maps; struct mutex used_maps_mutex; /* mutex for used_maps and used_map_cnt */ + struct btf_mod_pair *used_btfs; struct bpf_prog *prog; struct user_struct *user; u64 load_time; /* ns since boottime */ @@ -1668,6 +1675,9 @@ bpf_base_func_proto(enum bpf_func_id func_id) } #endif /* CONFIG_BPF_SYSCALL */ +void __bpf_free_used_btfs(struct bpf_prog_aux *aux, + struct btf_mod_pair *used_btfs, u32 len); + static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e941fe1484e57..dfe6f85d97dd6 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -340,6 +340,7 @@ struct bpf_insn_aux_data { }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ +#define MAX_USED_BTFS 64 /* max number of BTFs accessed by one BPF program */ #define BPF_VERIFIER_TMP_LOG_SIZE 1024 @@ -398,7 +399,9 @@ struct bpf_verifier_env { struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ struct bpf_verifier_state_list *free_list; struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ + struct btf_mod_pair used_btfs[MAX_USED_BTFS]; /* array of BTF's used by BPF program */ u32 used_map_cnt; /* number of used maps */ + u32 used_btf_cnt; /* number of used BTF objects */ u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; bool allow_ptr_to_map_access; diff --git a/include/linux/btf.h b/include/linux/btf.h index 4c200f5d242be..7fabf14280933 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -91,6 +91,9 @@ int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj, int btf_get_fd_by_id(u32 id); u32 btf_obj_id(const struct btf *btf); bool btf_is_kernel(const struct btf *btf); +bool btf_is_module(const struct btf *btf); +struct module *btf_try_get_module(const struct btf *btf); +u32 btf_nr_types(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 8d6bdb4f4d618..7ccc0133723a0 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -458,7 +458,7 @@ static bool btf_type_is_datasec(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; } -static u32 btf_nr_types_total(const struct btf *btf) +u32 btf_nr_types(const struct btf *btf) { u32 total = 0; @@ -476,7 +476,7 @@ s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind) const char *tname; u32 i, total; - total = btf_nr_types_total(btf); + total = btf_nr_types(btf); for (i = 1; i < total; i++) { t = btf_type_by_id(btf, i); if (BTF_INFO_KIND(t->info) != kind) @@ -5743,6 +5743,11 @@ bool btf_is_kernel(const struct btf *btf) return btf->kernel_btf; } +bool btf_is_module(const struct btf *btf) +{ + return btf->kernel_btf && strcmp(btf->name, "vmlinux") != 0; +} + static int btf_id_cmp_func(const void *a, const void *b) { const int *pa = a, *pb = b; @@ -5877,3 +5882,25 @@ static int __init btf_module_init(void) fs_initcall(btf_module_init); #endif /* CONFIG_DEBUG_INFO_BTF_MODULES */ + +struct module *btf_try_get_module(const struct btf *btf) +{ + struct module *res = NULL; +#ifdef CONFIG_DEBUG_INFO_BTF_MODULES + struct btf_module *btf_mod, *tmp; + + mutex_lock(&btf_module_mutex); + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { + if (btf_mod->btf != btf) + continue; + + if (try_module_get(btf_mod->module)) + res = btf_mod->module; + + break; + } + mutex_unlock(&btf_module_mutex); +#endif + + return res; +} diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 261f8692d0d2a..69c3c308de5e0 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2119,6 +2119,28 @@ static void bpf_free_used_maps(struct bpf_prog_aux *aux) kfree(aux->used_maps); } +void __bpf_free_used_btfs(struct bpf_prog_aux *aux, + struct btf_mod_pair *used_btfs, u32 len) +{ +#ifdef CONFIG_BPF_SYSCALL + struct btf_mod_pair *btf_mod; + u32 i; + + for (i = 0; i < len; i++) { + btf_mod = &used_btfs[i]; + if (btf_mod->module) + module_put(btf_mod->module); + btf_put(btf_mod->btf); + } +#endif +} + +static void bpf_free_used_btfs(struct bpf_prog_aux *aux) +{ + __bpf_free_used_btfs(aux, aux->used_btfs, aux->used_btf_cnt); + kfree(aux->used_btfs); +} + static void bpf_prog_free_deferred(struct work_struct *work) { struct bpf_prog_aux *aux; @@ -2126,6 +2148,7 @@ static void bpf_prog_free_deferred(struct work_struct *work) aux = container_of(work, struct bpf_prog_aux, work); bpf_free_used_maps(aux); + bpf_free_used_btfs(aux); if (bpf_prog_is_dev_bound(aux)) bpf_prog_offload_destroy(aux->prog); #ifdef CONFIG_PERF_EVENTS diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5534e667bdb1b..ae2aee48cf821 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9703,6 +9703,36 @@ process_bpf_exit: return 0; } +static int find_btf_percpu_datasec(struct btf *btf) +{ + const struct btf_type *t; + const char *tname; + int i, n; + + /* + * Both vmlinux and module each have their own ".data..percpu" + * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF + * types to look at only module's own BTF types. + */ + n = btf_nr_types(btf); + if (btf_is_module(btf)) + i = btf_nr_types(btf_vmlinux); + else + i = 1; + + for(; i < n; i++) { + t = btf_type_by_id(btf, i); + if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC) + continue; + + tname = btf_name_by_offset(btf, t->name_off); + if (!strcmp(tname, ".data..percpu")) + return i; + } + + return -ENOENT; +} + /* replace pseudo btf_id with kernel symbol address */ static int check_pseudo_btf_id(struct bpf_verifier_env *env, struct bpf_insn *insn, @@ -9710,48 +9740,57 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, { const struct btf_var_secinfo *vsi; const struct btf_type *datasec; + struct btf_mod_pair *btf_mod; const struct btf_type *t; const char *sym_name; bool percpu = false; u32 type, id = insn->imm; + struct btf *btf; s32 datasec_id; u64 addr; - int i; - - if (!btf_vmlinux) { - verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n"); - return -EINVAL; - } + int i, btf_fd, err; - if (insn[1].imm != 0) { - verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n"); - return -EINVAL; + btf_fd = insn[1].imm; + if (btf_fd) { + btf = btf_get_by_fd(btf_fd); + if (IS_ERR(btf)) { + verbose(env, "invalid module BTF object FD specified.\n"); + return -EINVAL; + } + } else { + if (!btf_vmlinux) { + verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n"); + return -EINVAL; + } + btf = btf_vmlinux; + btf_get(btf); } - t = btf_type_by_id(btf_vmlinux, id); + t = btf_type_by_id(btf, id); if (!t) { verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id); - return -ENOENT; + err = -ENOENT; + goto err_put; } if (!btf_type_is_var(t)) { - verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", - id); - return -EINVAL; + verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id); + err = -EINVAL; + goto err_put; } - sym_name = btf_name_by_offset(btf_vmlinux, t->name_off); + sym_name = btf_name_by_offset(btf, t->name_off); addr = kallsyms_lookup_name(sym_name); if (!addr) { verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n", sym_name); - return -ENOENT; + err = -ENOENT; + goto err_put; } - datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu", - BTF_KIND_DATASEC); + datasec_id = find_btf_percpu_datasec(btf); if (datasec_id > 0) { - datasec = btf_type_by_id(btf_vmlinux, datasec_id); + datasec = btf_type_by_id(btf, datasec_id); for_each_vsi(i, datasec, vsi) { if (vsi->type == id) { percpu = true; @@ -9764,10 +9803,10 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, insn[1].imm = addr >> 32; type = t->type; - t = btf_type_skip_modifiers(btf_vmlinux, type, NULL); + t = btf_type_skip_modifiers(btf, type, NULL); if (percpu) { aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID; - aux->btf_var.btf = btf_vmlinux; + aux->btf_var.btf = btf; aux->btf_var.btf_id = type; } else if (!btf_type_is_struct(t)) { const struct btf_type *ret; @@ -9775,21 +9814,54 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env, u32 tsize; /* resolve the type size of ksym. */ - ret = btf_resolve_size(btf_vmlinux, t, &tsize); + ret = btf_resolve_size(btf, t, &tsize); if (IS_ERR(ret)) { - tname = btf_name_by_offset(btf_vmlinux, t->name_off); + tname = btf_name_by_offset(btf, t->name_off); verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n", tname, PTR_ERR(ret)); - return -EINVAL; + err = -EINVAL; + goto err_put; } aux->btf_var.reg_type = PTR_TO_MEM; aux->btf_var.mem_size = tsize; } else { aux->btf_var.reg_type = PTR_TO_BTF_ID; - aux->btf_var.btf = btf_vmlinux; + aux->btf_var.btf = btf; aux->btf_var.btf_id = type; } + + /* check whether we recorded this BTF (and maybe module) already */ + for (i = 0; i < env->used_btf_cnt; i++) { + if (env->used_btfs[i].btf == btf) { + btf_put(btf); + return 0; + } + } + + if (env->used_btf_cnt >= MAX_USED_BTFS) { + err = -E2BIG; + goto err_put; + } + + btf_mod = &env->used_btfs[env->used_btf_cnt]; + btf_mod->btf = btf; + btf_mod->module = NULL; + + /* if we reference variables from kernel module, bump its refcount */ + if (btf_is_module(btf)) { + btf_mod->module = btf_try_get_module(btf); + if (!btf_mod->module) { + err = -ENXIO; + goto err_put; + } + } + + env->used_btf_cnt++; + return 0; +err_put: + btf_put(btf); + return err; } static int check_map_prealloc(struct bpf_map *map) @@ -10086,6 +10158,13 @@ static void release_maps(struct bpf_verifier_env *env) env->used_map_cnt); } +/* drop refcnt of maps used by the rejected program */ +static void release_btfs(struct bpf_verifier_env *env) +{ + __bpf_free_used_btfs(env->prog->aux, env->used_btfs, + env->used_btf_cnt); +} + /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) { @@ -12098,7 +12177,10 @@ skip_full_check: goto err_release_maps; } - if (ret == 0 && env->used_map_cnt) { + if (ret) + goto err_release_maps; + + if (env->used_map_cnt) { /* if program passed verifier, update used_maps in bpf_prog_info */ env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt, sizeof(env->used_maps[0]), @@ -12112,15 +12194,29 @@ skip_full_check: memcpy(env->prog->aux->used_maps, env->used_maps, sizeof(env->used_maps[0]) * env->used_map_cnt); env->prog->aux->used_map_cnt = env->used_map_cnt; + } + if (env->used_btf_cnt) { + /* if program passed verifier, update used_btfs in bpf_prog_aux */ + env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt, + sizeof(env->used_btfs[0]), + GFP_KERNEL); + if (!env->prog->aux->used_btfs) { + ret = -ENOMEM; + goto err_release_maps; + } + memcpy(env->prog->aux->used_btfs, env->used_btfs, + sizeof(env->used_btfs[0]) * env->used_btf_cnt); + env->prog->aux->used_btf_cnt = env->used_btf_cnt; + } + if (env->used_map_cnt || env->used_btf_cnt) { /* program is valid. Convert pseudo bpf_ld_imm64 into generic * bpf_ld_imm64 instructions */ convert_pseudo_ld_imm64(env); } - if (ret == 0) - adjust_btf_func(env); + adjust_btf_func(env); err_release_maps: if (!env->prog->aux->used_maps) @@ -12128,6 +12224,8 @@ err_release_maps: * them now. Otherwise free_used_maps() will release them. */ release_maps(env); + if (!env->prog->aux->used_btfs) + release_btfs(env); /* extension progs temporarily inherit the attach_type of their targets for verification purposes, so set it back to zero before returning -- GitLab From 284d2587ea8a96f97ca519a3de683ff226e9e2b3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 11 Jan 2021 23:55:19 -0800 Subject: [PATCH 0577/2012] libbpf: Support kernel module ksym externs Add support for searching for ksym externs not just in vmlinux BTF, but across all module BTFs, similarly to how it's done for CO-RE relocations. Kernels that expose module BTFs through sysfs are assumed to support new ldimm64 instruction extension with BTF FD provided in insn[1].imm field, so no extra feature detection is performed. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: Hao Luo Link: https://lore.kernel.org/bpf/20210112075520.4103414-7-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 50 +++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6ae748f6ea118..2abbc38005684 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -395,7 +395,8 @@ struct extern_desc { unsigned long long addr; /* target btf_id of the corresponding kernel var. */ - int vmlinux_btf_id; + int kernel_btf_obj_fd; + int kernel_btf_id; /* local btf_id of the ksym extern's type. */ __u32 type_id; @@ -6162,7 +6163,8 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) } else /* EXT_KSYM */ { if (ext->ksym.type_id) { /* typed ksyms */ insn[0].src_reg = BPF_PSEUDO_BTF_ID; - insn[0].imm = ext->ksym.vmlinux_btf_id; + insn[0].imm = ext->ksym.kernel_btf_id; + insn[1].imm = ext->ksym.kernel_btf_obj_fd; } else { /* typeless ksyms */ insn[0].imm = (__u32)ext->ksym.addr; insn[1].imm = ext->ksym.addr >> 32; @@ -7319,7 +7321,8 @@ out: static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) { struct extern_desc *ext; - int i, id; + struct btf *btf; + int i, j, id, btf_fd, err; for (i = 0; i < obj->nr_extern; i++) { const struct btf_type *targ_var, *targ_type; @@ -7331,10 +7334,25 @@ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) if (ext->type != EXT_KSYM || !ext->ksym.type_id) continue; - id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name, - BTF_KIND_VAR); + btf = obj->btf_vmlinux; + btf_fd = 0; + id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR); + if (id == -ENOENT) { + err = load_module_btfs(obj); + if (err) + return err; + + for (j = 0; j < obj->btf_module_cnt; j++) { + btf = obj->btf_modules[j].btf; + /* we assume module BTF FD is always >0 */ + btf_fd = obj->btf_modules[j].fd; + id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR); + if (id != -ENOENT) + break; + } + } if (id <= 0) { - pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n", + pr_warn("extern (ksym) '%s': failed to find BTF ID in kernel BTF(s).\n", ext->name); return -ESRCH; } @@ -7343,24 +7361,19 @@ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) local_type_id = ext->ksym.type_id; /* find target type_id */ - targ_var = btf__type_by_id(obj->btf_vmlinux, id); - targ_var_name = btf__name_by_offset(obj->btf_vmlinux, - targ_var->name_off); - targ_type = skip_mods_and_typedefs(obj->btf_vmlinux, - targ_var->type, - &targ_type_id); + targ_var = btf__type_by_id(btf, id); + targ_var_name = btf__name_by_offset(btf, targ_var->name_off); + targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id); ret = bpf_core_types_are_compat(obj->btf, local_type_id, - obj->btf_vmlinux, targ_type_id); + btf, targ_type_id); if (ret <= 0) { const struct btf_type *local_type; const char *targ_name, *local_name; local_type = btf__type_by_id(obj->btf, local_type_id); - local_name = btf__name_by_offset(obj->btf, - local_type->name_off); - targ_name = btf__name_by_offset(obj->btf_vmlinux, - targ_type->name_off); + local_name = btf__name_by_offset(obj->btf, local_type->name_off); + targ_name = btf__name_by_offset(btf, targ_type->name_off); pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n", ext->name, local_type_id, @@ -7370,7 +7383,8 @@ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) } ext->is_set = true; - ext->ksym.vmlinux_btf_id = id; + ext->ksym.kernel_btf_obj_fd = btf_fd; + ext->ksym.kernel_btf_id = id; pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n", ext->name, id, btf_kind_str(targ_var), targ_var_name); } -- GitLab From 430d97a8a7bf1500c081ce756ff2ead73d2303c5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 11 Jan 2021 23:55:20 -0800 Subject: [PATCH 0578/2012] selftests/bpf: Test kernel module ksym externs Add per-CPU variable to bpf_testmod.ko and use those from new selftest to validate it works end-to-end. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: Hao Luo Link: https://lore.kernel.org/bpf/20210112075520.4103414-8-andrii@kernel.org --- .../selftests/bpf/bpf_testmod/bpf_testmod.c | 3 ++ .../selftests/bpf/prog_tests/ksyms_module.c | 31 +++++++++++++++++++ .../selftests/bpf/progs/test_ksyms_module.c | 26 ++++++++++++++++ 3 files changed, 60 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/ksyms_module.c create mode 100644 tools/testing/selftests/bpf/progs/test_ksyms_module.c diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 2df19d73ca496..0b991e115d1fc 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include "bpf_testmod.h" @@ -10,6 +11,8 @@ #define CREATE_TRACE_POINTS #include "bpf_testmod-events.h" +DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123; + noinline ssize_t bpf_testmod_test_read(struct file *file, struct kobject *kobj, struct bin_attribute *bin_attr, diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c new file mode 100644 index 0000000000000..4c232b456479d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include +#include +#include +#include "test_ksyms_module.skel.h" + +static int duration; + +void test_ksyms_module(void) +{ + struct test_ksyms_module* skel; + int err; + + skel = test_ksyms_module__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + err = test_ksyms_module__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + usleep(1); + + ASSERT_EQ(skel->bss->triggered, true, "triggered"); + ASSERT_EQ(skel->bss->out_mod_ksym_global, 123, "global_ksym_val"); + +cleanup: + test_ksyms_module__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_module.c b/tools/testing/selftests/bpf/progs/test_ksyms_module.c new file mode 100644 index 0000000000000..d6a0b3086b906 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ksyms_module.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" + +#include + +extern const int bpf_testmod_ksym_percpu __ksym; + +int out_mod_ksym_global = 0; +bool triggered = false; + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + int *val; + __u32 cpu; + + val = (int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu); + out_mod_ksym_global = *val; + triggered = true; + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; -- GitLab From 8ff60eb052eeba95cfb3efe16b08c9199f8121cf Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 12 Jan 2021 15:49:04 -0800 Subject: [PATCH 0579/2012] mm, slub: consider rest of partial list if acquire_slab() fails acquire_slab() fails if there is contention on the freelist of the page (probably because some other CPU is concurrently freeing an object from the page). In that case, it might make sense to look for a different page (since there might be more remote frees to the page from other CPUs, and we don't want contention on struct page). However, the current code accidentally stops looking at the partial list completely in that case. Especially on kernels without CONFIG_NUMA set, this means that get_partial() fails and new_slab_objects() falls back to new_slab(), allocating new pages. This could lead to an unnecessary increase in memory fragmentation. Link: https://lkml.kernel.org/r/20201228130853.1871516-1-jannh@google.com Fixes: 7ced37197196 ("slub: Acquire_slab() avoid loop") Signed-off-by: Jann Horn Acked-by: David Rientjes Acked-by: Joonsoo Kim Cc: Christoph Lameter Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index dc5b42e700b85..d9e4e10683cc1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1973,7 +1973,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, t = acquire_slab(s, n, page, object == NULL, &objects); if (!t) - break; + continue; /* cmpxchg raced */ available += objects; if (!object) { -- GitLab From ce8f86ee94fabcc98537ddccd7e82cfd360a4dc5 Mon Sep 17 00:00:00 2001 From: Hailong liu Date: Tue, 12 Jan 2021 15:49:08 -0800 Subject: [PATCH 0580/2012] mm/page_alloc: add a missing mm_page_alloc_zone_locked() tracepoint The trace point *trace_mm_page_alloc_zone_locked()* in __rmqueue() does not currently cover all branches. Add the missing tracepoint and check the page before do that. [akpm@linux-foundation.org: use IS_ENABLED() to suppress warning] Link: https://lkml.kernel.org/r/20201228132901.41523-1-carver4lio@163.com Signed-off-by: Hailong liu Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bdbec4c981738..027f6481ba59b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2862,20 +2862,20 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, { struct page *page; -#ifdef CONFIG_CMA - /* - * Balance movable allocations between regular and CMA areas by - * allocating from CMA when over half of the zone's free memory - * is in the CMA area. - */ - if (alloc_flags & ALLOC_CMA && - zone_page_state(zone, NR_FREE_CMA_PAGES) > - zone_page_state(zone, NR_FREE_PAGES) / 2) { - page = __rmqueue_cma_fallback(zone, order); - if (page) - return page; + if (IS_ENABLED(CONFIG_CMA)) { + /* + * Balance movable allocations between regular and CMA areas by + * allocating from CMA when over half of the zone's free memory + * is in the CMA area. + */ + if (alloc_flags & ALLOC_CMA && + zone_page_state(zone, NR_FREE_CMA_PAGES) > + zone_page_state(zone, NR_FREE_PAGES) / 2) { + page = __rmqueue_cma_fallback(zone, order); + if (page) + goto out; + } } -#endif retry: page = __rmqueue_smallest(zone, order, migratetype); if (unlikely(!page)) { @@ -2886,8 +2886,9 @@ retry: alloc_flags)) goto retry; } - - trace_mm_page_alloc_zone_locked(page, order, migratetype); +out: + if (page) + trace_mm_page_alloc_zone_locked(page, order, migratetype); return page; } -- GitLab From 7ea510b92c7c9b4eb5ff72e6b4bbad4b0407a914 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 12 Jan 2021 15:49:11 -0800 Subject: [PATCH 0581/2012] mm/memcontrol: fix warning in mem_cgroup_page_lruvec() Boot a CONFIG_MEMCG=y kernel with "cgroup_disabled=memory" and you are met by a series of warnings from the VM_WARN_ON_ONCE_PAGE(!memcg, page) recently added to the inline mem_cgroup_page_lruvec(). An earlier attempt to place that warning, in mem_cgroup_lruvec(), had been careful to do so after weeding out the mem_cgroup_disabled() case; but was itself invalid because of the mem_cgroup_lruvec(NULL, pgdat) in clear_pgdat_congested() and age_active_anon(). Warning in mem_cgroup_page_lruvec() was once useful in detecting a KSM charge bug, so may be worth keeping: but skip if mem_cgroup_disabled(). Link: https://lkml.kernel.org/r/alpine.LSU.2.11.2101032056260.1093@eggly.anvils Fixes: 9a1ac2288cf1 ("mm/memcontrol:rewrite mem_cgroup_page_lruvec()") Signed-off-by: Hugh Dickins Reviewed-by: Alex Shi Acked-by: Roman Gushchin Acked-by: Chris Down Reviewed-by: Baoquan He Acked-by: Vlastimil Babka Cc: Hui Su Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Johannes Weiner Cc: Shakeel Butt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d827bd7f3bfe3..eeb0b52203e92 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -665,7 +665,7 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, { struct mem_cgroup *memcg = page_memcg(page); - VM_WARN_ON_ONCE_PAGE(!memcg, page); + VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page); return mem_cgroup_lruvec(memcg, pgdat); } -- GitLab From 29970dc24faf0078beb4efab5455b4f504d2198d Mon Sep 17 00:00:00 2001 From: Hailong Liu Date: Tue, 12 Jan 2021 15:49:14 -0800 Subject: [PATCH 0582/2012] arm/kasan: fix the array size of kasan_early_shadow_pte[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The size of kasan_early_shadow_pte[] now is PTRS_PER_PTE which defined to 512 for arm. This means that it only covers the prev Linux pte entries, but not the HWTABLE pte entries for arm. The reason it currently works is that the symbol kasan_early_shadow_page immediately following kasan_early_shadow_pte in memory is page aligned, which makes kasan_early_shadow_pte look like a 4KB size array. But we can't ensure the order is always right with different compiler/linker, or if more bss symbols are introduced. We had a test with QEMU + vexpress:put a 512KB-size symbol with attribute __section(".bss..page_aligned") after kasan_early_shadow_pte, and poisoned it after kasan_early_init(). Then enabled CONFIG_KASAN, it failed to boot up. Link: https://lkml.kernel.org/r/20210109044622.8312-1-hailongliiu@yeah.net Signed-off-by: Hailong Liu Signed-off-by: Ziliang Guo Reviewed-by: Linus Walleij Cc: Andrey Ryabinin Cc: Russell King Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: Ard Biesheuvel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 6 +++++- mm/kasan/init.c | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5e0655fb2a6f7..fe1ae73ff8b57 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -35,8 +35,12 @@ struct kunit_kasan_expectation { #define KASAN_SHADOW_INIT 0 #endif +#ifndef PTE_HWTABLE_PTRS +#define PTE_HWTABLE_PTRS 0 +#endif + extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; -extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE]; +extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS]; extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD]; extern pud_t kasan_early_shadow_pud[PTRS_PER_PUD]; extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D]; diff --git a/mm/kasan/init.c b/mm/kasan/init.c index bc0ad208b3a7a..7ca0b92d5886d 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -64,7 +64,8 @@ static inline bool kasan_pmd_table(pud_t pud) return false; } #endif -pte_t kasan_early_shadow_pte[PTRS_PER_PTE] __page_aligned_bss; +pte_t kasan_early_shadow_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS] + __page_aligned_bss; static inline bool kasan_pte_table(pmd_t pmd) { -- GitLab From c22ee5284cf58017fa8c6d21d8f8c68159b6faab Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 12 Jan 2021 15:49:18 -0800 Subject: [PATCH 0583/2012] mm/vmalloc.c: fix potential memory leak In VM_MAP_PUT_PAGES case, we should put pages and free array in vfree. But we missed to set area->nr_pages in vmap(). So we would fail to put pages in __vunmap() because area->nr_pages = 0. Link: https://lkml.kernel.org/r/20210107123541.39206-1-linmiaohe@huawei.com Fixes: b944afc9d64d ("mm: add a VM_MAP_PUT_PAGES flag for vmap") Signed-off-by: Shijie Luo Signed-off-by: Miaohe Lin Reviewed-by: Uladzislau Rezki (Sony) Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 4d88fe5a277ac..e6f352bf04982 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2420,8 +2420,10 @@ void *vmap(struct page **pages, unsigned int count, return NULL; } - if (flags & VM_MAP_PUT_PAGES) + if (flags & VM_MAP_PUT_PAGES) { area->pages = pages; + area->nr_pages = count; + } return area->addr; } EXPORT_SYMBOL(vmap); -- GitLab From f555befd185dc097ede887eb7b308c2e1c1369d4 Mon Sep 17 00:00:00 2001 From: Jan Stancek Date: Tue, 12 Jan 2021 15:49:21 -0800 Subject: [PATCH 0584/2012] mm: migrate: initialize err in do_migrate_pages After commit 236c32eb1096 ("mm: migrate: clean up migrate_prep{_local}")', do_migrate_pages can return uninitialized variable 'err' (which is propagated to user-space as error) when 'from' and 'to' nodesets are identical. This can be reproduced with LTP migrate_pages01, which calls migrate_pages() with same set for both old/new_nodes. Add 'err' initialization back. Link: https://lkml.kernel.org/r/456a021c7ef3636d7668cec9dcb4a446a4244812.1609855564.git.jstancek@redhat.com Fixes: 236c32eb1096 ("mm: migrate: clean up migrate_prep{_local}") Signed-off-by: Jan Stancek Acked-by: Michal Hocko Acked-by: Yang Shi Cc: Zi Yan Cc: Jan Kara Cc: Matthew Wilcox Cc: Mel Gorman Cc: Song Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 8cf96bd21341c..2c3a865020534 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1111,7 +1111,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, int flags) { int busy = 0; - int err; + int err = 0; nodemask_t tmp; migrate_prep(); -- GitLab From 0eb98f1588c2cc7a79816d84ab18a55d254f481c Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 12 Jan 2021 15:49:24 -0800 Subject: [PATCH 0585/2012] mm/hugetlb: fix potential missing huge page size info The huge page size is encoded for VM_FAULT_HWPOISON errors only. So if we return VM_FAULT_HWPOISON, huge page size would just be ignored. Link: https://lkml.kernel.org/r/20210107123449.38481-1-linmiaohe@huawei.com Fixes: aa50d3a7aa81 ("Encode huge page size for VM_FAULT_HWPOISON errors") Signed-off-by: Miaohe Lin Reviewed-by: Mike Kravetz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a2602969873dc..18f6ee3179002 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4371,7 +4371,7 @@ retry: * So we need to block hugepage fault by PG_hwpoison bit check. */ if (unlikely(PageHWPoison(page))) { - ret = VM_FAULT_HWPOISON | + ret = VM_FAULT_HWPOISON_LARGE | VM_FAULT_SET_HINDEX(hstate_index(h)); goto backout_unlocked; } -- GitLab From 7e5f1126b54a29c078c07a5fe245e269f3c05500 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 12 Jan 2021 15:49:27 -0800 Subject: [PATCH 0586/2012] MAINTAINERS: add Vlastimil as slab allocators maintainer I would like to help with slab allocators maintenance, from the perspective of being responsible for SLAB and more recently also SLUB in an enterprise distro kernel and supporting its users. Recently I've been focusing on improving SLUB's debugging features, and patch review in the area, including the kmemcg accounting rewrite last year. Link: https://lkml.kernel.org/r/20210108110353.19971-1-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Christoph Lameter Acked-by: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index cc1e6a5ee6e67..28cbe0ec66105 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16319,6 +16319,7 @@ M: Pekka Enberg M: David Rientjes M: Joonsoo Kim M: Andrew Morton +M: Vlastimil Babka L: linux-mm@kvack.org S: Maintained F: include/linux/sl?b*.h -- GitLab From 6696d2a6f38c0beedf03c381edfc392ecf7631b4 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Tue, 12 Jan 2021 15:49:30 -0800 Subject: [PATCH 0587/2012] mm,hwpoison: fix printing of page flags Format %pG expects a lower case 'p' in order to print the flags. Fix it. Link: https://lkml.kernel.org/r/20210108085202.4506-1-osalvador@suse.de Fixes: 8295d535e2aa ("mm,hwpoison: refactor get_any_page") Signed-off-by: Oscar Salvador Reported-by: Dan Carpenter Reviewed-by: Anshuman Khandual Acked-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 5a38e9eade946..04d9f154a130d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1940,7 +1940,7 @@ retry: goto retry; } } else if (ret == -EIO) { - pr_info("%s: %#lx: unknown page type: %lx (%pGP)\n", + pr_info("%s: %#lx: unknown page type: %lx (%pGp)\n", __func__, pfn, page->flags, &page->flags); } -- GitLab From eb351d75ce1e75b4f793d609efac08426ca50acd Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 12 Jan 2021 15:49:33 -0800 Subject: [PATCH 0588/2012] mm/process_vm_access.c: include compat.h Fix the build error: mm/process_vm_access.c:277:5: error: implicit declaration of function 'in_compat_syscall'; did you mean 'in_ia32_syscall'? [-Werror=implicit-function-declaration] Fixes: 38dc5079da7081e "Fix compat regression in process_vm_rw()" Reported-by: syzbot+5b0d0de84d6c65b8dd2b@syzkaller.appspotmail.com Cc: Kyle Huey Cc: Jens Axboe Cc: Al Viro Cc: Christoph Hellwig Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/process_vm_access.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 4bcc119580890..f5fee9cf90f8b 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include -- GitLab From a18caa97b1bda0a3d126a7be165ddcfc56c2dde6 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Mon, 11 Jan 2021 09:59:32 +0100 Subject: [PATCH 0589/2012] net: phy: smsc: fix clk error handling Commit bedd8d78aba3 ("net: phy: smsc: LAN8710/20: add phy refclk in support") added the phy clk support. The commit already checks if clk_get_optional() throw an error but instead of returning the error it ignores it. Fixes: bedd8d78aba3 ("net: phy: smsc: LAN8710/20: add phy refclk in support") Suggested-by: Jakub Kicinski Signed-off-by: Marco Felsch Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210111085932.28680-1-m.felsch@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/phy/smsc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 33372756a451f..ddb78fb4d6dc3 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -317,7 +317,8 @@ static int smsc_phy_probe(struct phy_device *phydev) /* Make clk optional to keep DTB backward compatibility. */ priv->refclk = clk_get_optional(dev, NULL); if (IS_ERR(priv->refclk)) - dev_err_probe(dev, PTR_ERR(priv->refclk), "Failed to request clock\n"); + return dev_err_probe(dev, PTR_ERR(priv->refclk), + "Failed to request clock\n"); ret = clk_prepare_enable(priv->refclk); if (ret) -- GitLab From f0791b92d2b6e1bf36aed0eae93a0ef85eaa3a62 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 11 Jan 2021 13:50:46 +0100 Subject: [PATCH 0590/2012] net: ks8851: Select PHYLIB and MICREL_PHY in Kconfig The PHYLIB must be selected to provide mdiobus_*() functions, and the MICREL_PHY is necessary too, as that is the only possible PHY attached to the KS8851 (it is the internal PHY). Fixes: ef3631220d2b ("net: ks8851: Register MDIO bus and the internal PHY") Signed-off-by: Marek Vasut Cc: Heiner Kallweit Cc: Lukas Wunner Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210111125046.36326-1-marex@denx.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/micrel/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/micrel/Kconfig b/drivers/net/ethernet/micrel/Kconfig index 42bc014136fe3..93df3049cdc05 100644 --- a/drivers/net/ethernet/micrel/Kconfig +++ b/drivers/net/ethernet/micrel/Kconfig @@ -31,6 +31,8 @@ config KS8851 select MII select CRC32 select EEPROM_93CX6 + select PHYLIB + select MICREL_PHY help SPI driver for Micrel KS8851 SPI attached network chip. @@ -40,6 +42,8 @@ config KS8851_MLL select MII select CRC32 select EEPROM_93CX6 + select PHYLIB + select MICREL_PHY help This platform driver is for Micrel KS8851 Address/data bus multiplexed network chip. -- GitLab From 07b90056cb15ff9877dca0d8f1b6583d1051f724 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Jan 2021 01:09:43 +0200 Subject: [PATCH 0591/2012] net: dsa: unbind all switches from tree when DSA master unbinds Currently the following happens when a DSA master driver unbinds while there are DSA switches attached to it: $ echo 0000:00:00.5 > /sys/bus/pci/drivers/mscc_felix/unbind ------------[ cut here ]------------ WARNING: CPU: 0 PID: 392 at net/core/dev.c:9507 Call trace: rollback_registered_many+0x5fc/0x688 unregister_netdevice_queue+0x98/0x120 dsa_slave_destroy+0x4c/0x88 dsa_port_teardown.part.16+0x78/0xb0 dsa_tree_teardown_switches+0x58/0xc0 dsa_unregister_switch+0x104/0x1b8 felix_pci_remove+0x24/0x48 pci_device_remove+0x48/0xf0 device_release_driver_internal+0x118/0x1e8 device_driver_detach+0x28/0x38 unbind_store+0xd0/0x100 Located at the above location is this WARN_ON: /* Notifier chain MUST detach us all upper devices. */ WARN_ON(netdev_has_any_upper_dev(dev)); Other stacked interfaces, like VLAN, do indeed listen for NETDEV_UNREGISTER on the real_dev and also unregister themselves at that time, which is clearly the behavior that rollback_registered_many expects. But DSA interfaces are not VLAN. They have backing hardware (platform devices, PCI devices, MDIO, SPI etc) which have a life cycle of their own and we can't just trigger an unregister from the DSA framework when we receive a netdev notifier that the master unregisters. Luckily, there is something we can do, and that is to inform the driver core that we have a runtime dependency to the DSA master interface's device, and create a device link where that is the supplier and we are the consumer. Having this device link will make the DSA switch unbind before the DSA master unbinds, which is enough to avoid the WARN_ON from rollback_registered_many. Note that even before the blamed commit, DSA did nothing intelligent when the master interface got unregistered either. See the discussion here: https://lore.kernel.org/netdev/20200505210253.20311-1-f.fainelli@gmail.com/ But this time, at least the WARN_ON is loud enough that the upper_dev_link commit can be blamed. The advantage with this approach vs dev_hold(master) in the attached link is that the latter is not meant for long term reference counting. With dev_hold, the only thing that will happen is that when the user attempts an unbind of the DSA master, netdev_wait_allrefs will keep waiting and waiting, due to DSA keeping the refcount forever. DSA would not access freed memory corresponding to the master interface, but the unbind would still result in a freeze. Whereas with device links, graceful teardown is ensured. It even works with cascaded DSA trees. $ echo 0000:00:00.2 > /sys/bus/pci/drivers/fsl_enetc/unbind [ 1818.797546] device swp0 left promiscuous mode [ 1819.301112] sja1105 spi2.0: Link is Down [ 1819.307981] DSA: tree 1 torn down [ 1819.312408] device eno2 left promiscuous mode [ 1819.656803] mscc_felix 0000:00:00.5: Link is Down [ 1819.667194] DSA: tree 0 torn down [ 1819.711557] fsl_enetc 0000:00:00.2 eno2: Link is Down This approach allows us to keep the DSA framework absolutely unchanged, and the driver core will just know to unbind us first when the master goes away - as opposed to the large (and probably impossible) rework required if attempting to listen for NETDEV_UNREGISTER. As per the documentation at Documentation/driver-api/device_link.rst, specifying the DL_FLAG_AUTOREMOVE_CONSUMER flag causes the device link to be automatically purged when the consumer fails to probe or later unbinds. So we don't need to keep the consumer_link variable in struct dsa_switch. Fixes: 2f1e8ea726e9 ("net: dsa: link interfaces with the DSA master to get rid of lockdep warnings") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Tested-by: Florian Fainelli Link: https://lore.kernel.org/r/20210111230943.3701806-1-olteanv@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/master.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/dsa/master.c b/net/dsa/master.c index 5a0f6fec4271d..cb3a5cf99b258 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -309,8 +309,18 @@ static struct lock_class_key dsa_master_addr_list_lock_key; int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { int mtu = ETH_DATA_LEN + cpu_dp->tag_ops->overhead; + struct dsa_switch *ds = cpu_dp->ds; + struct device_link *consumer_link; int ret; + /* The DSA master must use SET_NETDEV_DEV for this to work. */ + consumer_link = device_link_add(ds->dev, dev->dev.parent, + DL_FLAG_AUTOREMOVE_CONSUMER); + if (!consumer_link) + netdev_err(dev, + "Failed to create a device link to DSA switch %s\n", + dev_name(ds->dev)); + rtnl_lock(); ret = dev_set_mtu(dev, mtu); rtnl_unlock(); -- GitLab From 91158e1680b164c8d101144ca916a3dca10c3e17 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 12 Jan 2021 02:48:31 +0200 Subject: [PATCH 0592/2012] net: dsa: clear devlink port type before unregistering slave netdevs Florian reported a use-after-free bug in devlink_nl_port_fill found with KASAN: (devlink_nl_port_fill) (devlink_port_notify) (devlink_port_unregister) (dsa_switch_teardown.part.3) (dsa_tree_teardown_switches) (dsa_unregister_switch) (bcm_sf2_sw_remove) (platform_remove) (device_release_driver_internal) (device_links_unbind_consumers) (device_release_driver_internal) (device_driver_detach) (unbind_store) Allocated by task 31: alloc_netdev_mqs+0x5c/0x50c dsa_slave_create+0x110/0x9c8 dsa_register_switch+0xdb0/0x13a4 b53_switch_register+0x47c/0x6dc bcm_sf2_sw_probe+0xaa4/0xc98 platform_probe+0x90/0xf4 really_probe+0x184/0x728 driver_probe_device+0xa4/0x278 __device_attach_driver+0xe8/0x148 bus_for_each_drv+0x108/0x158 Freed by task 249: free_netdev+0x170/0x194 dsa_slave_destroy+0xac/0xb0 dsa_port_teardown.part.2+0xa0/0xb4 dsa_tree_teardown_switches+0x50/0xc4 dsa_unregister_switch+0x124/0x250 bcm_sf2_sw_remove+0x98/0x13c platform_remove+0x44/0x5c device_release_driver_internal+0x150/0x254 device_links_unbind_consumers+0xf8/0x12c device_release_driver_internal+0x84/0x254 device_driver_detach+0x30/0x34 unbind_store+0x90/0x134 What happens is that devlink_port_unregister emits a netlink DEVLINK_CMD_PORT_DEL message which associates the devlink port that is getting unregistered with the ifindex of its corresponding net_device. Only trouble is, the net_device has already been unregistered. It looks like we can stub out the search for a corresponding net_device if we clear the devlink_port's type. This looks like a bit of a hack, but also seems to be the reason why the devlink_port_type_clear function exists in the first place. Fixes: 3122433eb533 ("net: dsa: Register devlink ports before calling DSA driver setup()") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Tested-by: Florian fainelli Reported-by: Florian Fainelli Link: https://lore.kernel.org/r/20210112004831.3778323-1-olteanv@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 183003e45762a..a47e0f9b20d0a 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -353,9 +353,13 @@ static int dsa_port_devlink_setup(struct dsa_port *dp) static void dsa_port_teardown(struct dsa_port *dp) { + struct devlink_port *dlp = &dp->devlink_port; + if (!dp->setup) return; + devlink_port_type_clear(dlp); + switch (dp->type) { case DSA_PORT_TYPE_UNUSED: break; -- GitLab From 1e8636b366be9deca4492e82c54242f9f5e5b731 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 12 Jan 2021 09:28:41 +0100 Subject: [PATCH 0593/2012] r8169: align rtl_wol_suspend_quirk with vendor driver and rename it At least from chip version 25 the vendor driver sets these rx flags for all chip versions if WOL is enabled. Therefore I wouldn't consider it a quirk, so let's rename the function. Signed-off-by: Heiner Kallweit Reviewed-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 33336098b1e56..84f488d1c8849 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2210,23 +2210,11 @@ static int rtl_set_mac_address(struct net_device *dev, void *p) return 0; } -static void rtl_wol_suspend_quirk(struct rtl8169_private *tp) +static void rtl_wol_enable_rx(struct rtl8169_private *tp) { - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_25: - case RTL_GIGA_MAC_VER_26: - case RTL_GIGA_MAC_VER_29: - case RTL_GIGA_MAC_VER_30: - case RTL_GIGA_MAC_VER_32: - case RTL_GIGA_MAC_VER_33: - case RTL_GIGA_MAC_VER_34: - case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_63: + if (tp->mac_version >= RTL_GIGA_MAC_VER_25) RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) | AcceptBroadcast | AcceptMulticast | AcceptMyPhys); - break; - default: - break; - } } static void rtl_prepare_power_down(struct rtl8169_private *tp) @@ -2240,7 +2228,7 @@ static void rtl_prepare_power_down(struct rtl8169_private *tp) if (device_may_wakeup(tp_to_dev(tp))) { phy_speed_down(tp->phydev, false); - rtl_wol_suspend_quirk(tp); + rtl_wol_enable_rx(tp); } } @@ -4872,7 +4860,7 @@ static void rtl_shutdown(struct pci_dev *pdev) if (system_state == SYSTEM_POWER_OFF) { if (tp->saved_wolopts) { - rtl_wol_suspend_quirk(tp); + rtl_wol_enable_rx(tp); rtl_wol_shutdown_quirk(tp); } -- GitLab From 206a75e003e17aad4fd60047deee2252fdc3df38 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 12 Jan 2021 09:29:45 +0100 Subject: [PATCH 0594/2012] r8169: improve rtl8169_rx_csum Extend the mask to include the checksum failure bits. This allows to simplify the condition in rtl8169_rx_csum(). Signed-off-by: Heiner Kallweit Reviewed-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 84f488d1c8849..b4c080cc6a28f 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -533,6 +533,9 @@ enum rtl_rx_desc_bit { IPFail = (1 << 16), /* IP checksum failed */ UDPFail = (1 << 15), /* UDP/IP checksum failed */ TCPFail = (1 << 14), /* TCP/IP checksum failed */ + +#define RxCSFailMask (IPFail | UDPFail | TCPFail) + RxVlanTag = (1 << 16), /* VLAN tag available */ }; @@ -4377,10 +4380,9 @@ static inline int rtl8169_fragmented_frame(u32 status) static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1) { - u32 status = opts1 & RxProtoMask; + u32 status = opts1 & (RxProtoMask | RxCSFailMask); - if (((status == RxProtoTCP) && !(opts1 & TCPFail)) || - ((status == RxProtoUDP) && !(opts1 & UDPFail))) + if (status == RxProtoTCP || status == RxProtoUDP) skb->ip_summed = CHECKSUM_UNNECESSARY; else skb_checksum_none_assert(skb); -- GitLab From e0d38b5880758432f74fe17fea8281691d1eb3c0 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 12 Jan 2021 09:31:20 +0100 Subject: [PATCH 0595/2012] r8169: improve DASH support Instead of doing the full DASH check each time r8168_check_dash() is called, let's do it once in probe and store DASH capabilities in a new rtl8169_private member. Signed-off-by: Heiner Kallweit Reviewed-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 53 ++++++++++------------- 1 file changed, 22 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index b4c080cc6a28f..fb67d8f797ec5 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -591,6 +591,12 @@ enum rtl_flag { RTL_FLAG_MAX }; +enum rtl_dash_type { + RTL_DASH_NONE, + RTL_DASH_DP, + RTL_DASH_EP, +}; + struct rtl8169_private { void __iomem *mmio_addr; /* memory map physical address */ struct pci_dev *pci_dev; @@ -598,6 +604,7 @@ struct rtl8169_private { struct phy_device *phydev; struct napi_struct napi; enum mac_version mac_version; + enum rtl_dash_type dash_type; u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */ u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */ u32 dirty_tx; @@ -1184,19 +1191,10 @@ static void rtl8168ep_driver_start(struct rtl8169_private *tp) static void rtl8168_driver_start(struct rtl8169_private *tp) { - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_27: - case RTL_GIGA_MAC_VER_28: - case RTL_GIGA_MAC_VER_31: + if (tp->dash_type == RTL_DASH_DP) rtl8168dp_driver_start(tp); - break; - case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_52: + else rtl8168ep_driver_start(tp); - break; - default: - BUG(); - break; - } } static void rtl8168dp_driver_stop(struct rtl8169_private *tp) @@ -1215,44 +1213,35 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp) static void rtl8168_driver_stop(struct rtl8169_private *tp) { - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_27: - case RTL_GIGA_MAC_VER_28: - case RTL_GIGA_MAC_VER_31: + if (tp->dash_type == RTL_DASH_DP) rtl8168dp_driver_stop(tp); - break; - case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_52: + else rtl8168ep_driver_stop(tp); - break; - default: - BUG(); - break; - } } static bool r8168dp_check_dash(struct rtl8169_private *tp) { u16 reg = rtl8168_get_ocp_reg(tp); - return !!(r8168dp_ocp_read(tp, reg) & 0x00008000); + return r8168dp_ocp_read(tp, reg) & BIT(15); } static bool r8168ep_check_dash(struct rtl8169_private *tp) { - return r8168ep_ocp_read(tp, 0x128) & 0x00000001; + return r8168ep_ocp_read(tp, 0x128) & BIT(0); } -static bool r8168_check_dash(struct rtl8169_private *tp) +static enum rtl_dash_type rtl_check_dash(struct rtl8169_private *tp) { switch (tp->mac_version) { case RTL_GIGA_MAC_VER_27: case RTL_GIGA_MAC_VER_28: case RTL_GIGA_MAC_VER_31: - return r8168dp_check_dash(tp); + return r8168dp_check_dash(tp) ? RTL_DASH_DP : RTL_DASH_NONE; case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_52: - return r8168ep_check_dash(tp); + return r8168ep_check_dash(tp) ? RTL_DASH_EP : RTL_DASH_NONE; default: - return false; + return RTL_DASH_NONE; } } @@ -2222,7 +2211,7 @@ static void rtl_wol_enable_rx(struct rtl8169_private *tp) static void rtl_prepare_power_down(struct rtl8169_private *tp) { - if (r8168_check_dash(tp)) + if (tp->dash_type != RTL_DASH_NONE) return; if (tp->mac_version == RTL_GIGA_MAC_VER_32 || @@ -4880,7 +4869,7 @@ static void rtl_remove_one(struct pci_dev *pdev) unregister_netdev(tp->dev); - if (r8168_check_dash(tp)) + if (tp->dash_type != RTL_DASH_NONE) rtl8168_driver_stop(tp); rtl_release_firmware(tp); @@ -5240,6 +5229,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->mac_version = chipset; + tp->dash_type = rtl_check_dash(tp); + tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK; if (sizeof(dma_addr_t) > 4 && tp->mac_version >= RTL_GIGA_MAC_VER_18 && @@ -5344,7 +5335,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) jumbo_max, tp->mac_version <= RTL_GIGA_MAC_VER_06 ? "ok" : "ko"); - if (r8168_check_dash(tp)) { + if (tp->dash_type != RTL_DASH_NONE) { netdev_info(dev, "DASH enabled\n"); rtl8168_driver_start(tp); } -- GitLab From 2c82b7fe219a4ee3024ce77334247d60194cc41d Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Tue, 12 Jan 2021 16:01:52 +0530 Subject: [PATCH 0596/2012] net: marvell: Fixed two spellings,controling to controlling and oen to one s/oen/one/ s/controling/controlling/ Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20210112103152.13222-1-unixbhaskar@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h index 8867f25afab40..663157dc8062b 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h @@ -143,7 +143,7 @@ struct mvpp2_cls_c2_entry { /* Number of per-port dedicated entries in the C2 TCAM */ #define MVPP22_CLS_C2_PORT_N_FLOWS MVPP2_N_RFS_ENTRIES_PER_FLOW -/* Each port has oen range per flow type + one entry controling the global RSS +/* Each port has one range per flow type + one entry controlling the global RSS * setting and the default rx queue */ #define MVPP22_CLS_C2_PORT_RANGE (MVPP22_CLS_C2_PORT_N_FLOWS + 1) -- GitLab From f2cb4b2397ca9e6e972d6551e5461d1f1d81c23f Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 11 Jan 2021 11:22:12 +0100 Subject: [PATCH 0597/2012] scsi: docs: ABI: sysfs-driver-ufs: Rectify table formatting Commit 0b2894cd0fdf ("scsi: docs: ABI: sysfs-driver-ufs: Add DeepSleep power mode") adds new entries in tables of sysfs-driver-ufs ABI documentation, but formatted the table incorrectly. Hence, make htmldocs warns: ./Documentation/ABI/testing/sysfs-driver-ufs:{915,956}: WARNING: Malformed table. Text in column margin in table line 15. Rectify table formatting for DeepSleep power mode. Link: https://lore.kernel.org/r/20210111102212.19377-1-lukas.bulwahn@gmail.com Acked-by: Adrian Hunter Signed-off-by: Lukas Bulwahn Signed-off-by: Martin K. Petersen --- Documentation/ABI/testing/sysfs-driver-ufs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs index e77fa784d6d81..75ccc5c62b3c4 100644 --- a/Documentation/ABI/testing/sysfs-driver-ufs +++ b/Documentation/ABI/testing/sysfs-driver-ufs @@ -932,8 +932,9 @@ Description: This entry could be used to set or show the UFS device 5 UFS device will be powered off, UIC link will be powered off 6 UFS device will be moved to deep sleep, UIC link - will be powered off. Note, deep sleep might not be - supported in which case this value will not be accepted + will be powered off. Note, deep sleep might not be + supported in which case this value will not be + accepted == ==================================================== What: /sys/bus/platform/drivers/ufshcd/*/rpm_target_dev_state @@ -973,8 +974,9 @@ Description: This entry could be used to set or show the UFS device 5 UFS device will be powered off, UIC link will be powered off 6 UFS device will be moved to deep sleep, UIC link - will be powered off. Note, deep sleep might not be - supported in which case this value will not be accepted + will be powered off. Note, deep sleep might not be + supported in which case this value will not be + accepted == ==================================================== What: /sys/bus/platform/drivers/ufshcd/*/spm_target_dev_state -- GitLab From 72eeb7c7151302ef007f1acd018cbf6f30e50321 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Mon, 11 Jan 2021 15:25:41 +0100 Subject: [PATCH 0598/2012] scsi: scsi_transport_srp: Don't block target in failfast state If the port is in SRP_RPORT_FAIL_FAST state when srp_reconnect_rport() is entered, a transition to SDEV_BLOCK would be illegal, and a kernel WARNING would be triggered. Skip scsi_target_block() in this case. Link: https://lore.kernel.org/r/20210111142541.21534-1-mwilck@suse.com Reviewed-by: Bart Van Assche Signed-off-by: Martin Wilck Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_srp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index cba1cf6a1c12d..1e939a2a387f3 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -541,7 +541,14 @@ int srp_reconnect_rport(struct srp_rport *rport) res = mutex_lock_interruptible(&rport->mutex); if (res) goto out; - scsi_target_block(&shost->shost_gendev); + if (rport->state != SRP_RPORT_FAIL_FAST) + /* + * sdev state must be SDEV_TRANSPORT_OFFLINE, transition + * to SDEV_BLOCK is illegal. Calling scsi_target_unblock() + * later is ok though, scsi_internal_device_unblock_nowait() + * treats SDEV_TRANSPORT_OFFLINE like SDEV_BLOCK. + */ + scsi_target_block(&shost->shost_gendev); res = rport->state != SRP_RPORT_LOST ? i->f->reconnect(rport) : -ENODEV; pr_debug("%s (state %d): transport.reconnect() returned %d\n", dev_name(&shost->shost_gendev), rport->state, res); -- GitLab From 69d25a6cf4cadf756460a1bf82996ea240539999 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 8 Jan 2021 16:53:41 -0800 Subject: [PATCH 0599/2012] hv_netvsc: Check VF datapath when sending traffic to VF The driver needs to check if the datapath has been switched to VF before sending traffic to VF. Signed-off-by: Long Li Reviewed-by: Haiyang Zhang Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f32f28311d573..5dd4f37afa3da 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -539,7 +539,8 @@ static int netvsc_xmit(struct sk_buff *skb, struct net_device *net, bool xdp_tx) */ vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev); if (vf_netdev && netif_running(vf_netdev) && - netif_carrier_ok(vf_netdev) && !netpoll_tx_running(net)) + netif_carrier_ok(vf_netdev) && !netpoll_tx_running(net) && + net_device_ctx->data_path_is_vf) return netvsc_vf_xmit(net, vf_netdev, skb); /* We will atmost need two pages to describe the rndis -- GitLab From 8b31f8c982b738e4130539e47f03967c599d8e22 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 8 Jan 2021 16:53:42 -0800 Subject: [PATCH 0600/2012] hv_netvsc: Wait for completion on request SWITCH_DATA_PATH The completion indicates if NVSP_MSG4_TYPE_SWITCH_DATA_PATH has been processed by the VSP. The traffic is steered to VF or synthetic after we receive this completion. Signed-off-by: Long Li Reported-by: kernel test robot Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc.c | 37 ++++++++++++++++++++++++++++++--- drivers/net/hyperv/netvsc_drv.c | 1 - 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 2350342b961ff..3a3db2f0134d8 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -37,6 +37,10 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev); struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt; + /* Block sending traffic to VF if it's about to be gone */ + if (!vf) + net_device_ctx->data_path_is_vf = vf; + memset(init_pkt, 0, sizeof(struct nvsp_message)); init_pkt->hdr.msg_type = NVSP_MSG4_TYPE_SWITCH_DATA_PATH; if (vf) @@ -50,8 +54,11 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) vmbus_sendpacket(dev->channel, init_pkt, sizeof(struct nvsp_message), - VMBUS_RQST_ID_NO_RESPONSE, - VM_PKT_DATA_INBAND, 0); + (unsigned long)init_pkt, + VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + wait_for_completion(&nv_dev->channel_init_wait); + net_device_ctx->data_path_is_vf = vf; } /* Worker to setup sub channels on initial setup @@ -754,8 +761,31 @@ static void netvsc_send_completion(struct net_device *ndev, const struct vmpacket_descriptor *desc, int budget) { - const struct nvsp_message *nvsp_packet = hv_pkt_data(desc); + const struct nvsp_message *nvsp_packet; u32 msglen = hv_pkt_datalen(desc); + struct nvsp_message *pkt_rqst; + u64 cmd_rqst; + + /* First check if this is a VMBUS completion without data payload */ + if (!msglen) { + cmd_rqst = vmbus_request_addr(&incoming_channel->requestor, + (u64)desc->trans_id); + if (cmd_rqst == VMBUS_RQST_ERROR) { + netdev_err(ndev, "Invalid transaction id\n"); + return; + } + + pkt_rqst = (struct nvsp_message *)(uintptr_t)cmd_rqst; + switch (pkt_rqst->hdr.msg_type) { + case NVSP_MSG4_TYPE_SWITCH_DATA_PATH: + complete(&net_device->channel_init_wait); + break; + + default: + netdev_err(ndev, "Unexpected VMBUS completion!!\n"); + } + return; + } /* Ensure packet is big enough to read header fields */ if (msglen < sizeof(struct nvsp_message_header)) { @@ -763,6 +793,7 @@ static void netvsc_send_completion(struct net_device *ndev, return; } + nvsp_packet = hv_pkt_data(desc); switch (nvsp_packet->hdr.msg_type) { case NVSP_MSG_TYPE_INIT_COMPLETE: if (msglen < sizeof(struct nvsp_message_header) + diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5dd4f37afa3da..64ae5f4e974ed 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2400,7 +2400,6 @@ static int netvsc_vf_changed(struct net_device *vf_netdev) if (net_device_ctx->data_path_is_vf == vf_is_up) return NOTIFY_OK; - net_device_ctx->data_path_is_vf = vf_is_up; netvsc_switch_datapath(ndev, vf_is_up); netdev_info(ndev, "Data path switched %s VF: %s\n", -- GitLab From 34b06a2eee44d469f2e2c013a83e6dac3aff6411 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 8 Jan 2021 16:53:43 -0800 Subject: [PATCH 0601/2012] hv_netvsc: Process NETDEV_GOING_DOWN on VF hot remove On VF hot remove, NETDEV_GOING_DOWN is sent to notify the VF is about to go down. At this time, the VF is still sending/receiving traffic and we request the VSP to switch datapath. On completion, the datapath is switched to synthetic and we can proceed with VF hot remove. Signed-off-by: Long Li Reviewed-by: Haiyang Zhang Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc_drv.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 64ae5f4e974ed..75b4d6703cf1e 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -2382,12 +2382,15 @@ static int netvsc_register_vf(struct net_device *vf_netdev) * During hibernation, if a VF NIC driver (e.g. mlx5) preserves the network * interface, there is only the CHANGE event and no UP or DOWN event. */ -static int netvsc_vf_changed(struct net_device *vf_netdev) +static int netvsc_vf_changed(struct net_device *vf_netdev, unsigned long event) { struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; struct net_device *ndev; - bool vf_is_up = netif_running(vf_netdev); + bool vf_is_up = false; + + if (event != NETDEV_GOING_DOWN) + vf_is_up = netif_running(vf_netdev); ndev = get_netvsc_byref(vf_netdev); if (!ndev) @@ -2716,7 +2719,8 @@ static int netvsc_netdev_event(struct notifier_block *this, case NETDEV_UP: case NETDEV_DOWN: case NETDEV_CHANGE: - return netvsc_vf_changed(event_dev); + case NETDEV_GOING_DOWN: + return netvsc_vf_changed(event_dev, event); default: return NOTIFY_DONE; } -- GitLab From cb82a54904a99df9e8f9e9d282046055dae5a730 Mon Sep 17 00:00:00 2001 From: Leon Schuermann Date: Mon, 11 Jan 2021 20:03:13 +0100 Subject: [PATCH 0602/2012] r8152: Add Lenovo Powered USB-C Travel Hub This USB-C Hub (17ef:721e) based on the Realtek RTL8153B chip used to use the cdc_ether driver. However, using this driver, with the system suspended the device constantly sends pause-frames as soon as the receive buffer fills up. This causes issues with other devices, where some Ethernet switches stop forwarding packets altogether. Using the Realtek driver (r8152) fixes this issue. Pause frames are no longer sent while the host system is suspended. Signed-off-by: Leon Schuermann Tested-by: Leon Schuermann Link: https://lore.kernel.org/r/20210111190312.12589-2-leon@is.currently.online Signed-off-by: Jakub Kicinski --- drivers/net/usb/cdc_ether.c | 7 +++++++ drivers/net/usb/r8152.c | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 8c1d61c2cbacb..6aaa0675c28a3 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -793,6 +793,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* Lenovo Powered USB-C Travel Hub (4X90S92381, based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x721e, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* ThinkPad USB-C Dock Gen 2 (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0xa387, USB_CLASS_COMM, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index c448d60898216..67cd6986634fb 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -6877,6 +6877,7 @@ static const struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x720c)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7214)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x721e)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0xa387)}, {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)}, {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, -- GitLab From 2284bbd0cf3981462dc6d729c89851c66b05a66a Mon Sep 17 00:00:00 2001 From: Leon Schuermann Date: Mon, 11 Jan 2021 20:03:15 +0100 Subject: [PATCH 0603/2012] r8153_ecm: Add Lenovo Powered USB-C Hub as a fallback of r8152 This commit enables the use of the r8153_ecm driver, introduced with commit c1aedf015ebdd0 ("net/usb/r8153_ecm: support ECM mode for RTL8153") for the Lenovo Powered USB-C Hub (17ef:721e) based on the Realtek RTL8153B chip. This results in the following driver preference: - if r8152 is available, use the r8152 driver - if r8152 is not available, use the r8153_ecm driver This is done to prevent the NIC from constantly sending pause frames when the host system enters standby (fixed by using the r8152 driver in "r8152: Add Lenovo Powered USB-C Travel Hub"), while still allowing the device to work with the r8153_ecm driver as a fallback. Signed-off-by: Leon Schuermann Tested-by: Leon Schuermann Link: https://lore.kernel.org/r/20210111190312.12589-3-leon@is.currently.online Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8153_ecm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/usb/r8153_ecm.c b/drivers/net/usb/r8153_ecm.c index 2c3fabd38b163..20b2df8d74ae1 100644 --- a/drivers/net/usb/r8153_ecm.c +++ b/drivers/net/usb/r8153_ecm.c @@ -122,12 +122,20 @@ static const struct driver_info r8153_info = { }; static const struct usb_device_id products[] = { +/* Realtek RTL8153 Based USB 3.0 Ethernet Adapters */ { USB_DEVICE_AND_INTERFACE_INFO(VENDOR_ID_REALTEK, 0x8153, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&r8153_info, }, +/* Lenovo Powered USB-C Travel Hub (4X90S92381, based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(VENDOR_ID_LENOVO, 0x721e, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&r8153_info, +}, + { }, /* END */ }; MODULE_DEVICE_TABLE(usb, products); -- GitLab From 9cc8976c69eb626a7a5100239eb7c69b1a1a609f Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 10 Jan 2021 10:59:38 +0000 Subject: [PATCH 0604/2012] net: sfp: add debugfs support Add debugfs support to SFP so that the internal state of the SFP state machines and hardware signal state can be viewed from userspace, rather than having to compile a debug kernel to view state transitions in the kernel log. The 'state' output looks like: Module state: empty Module probe attempts: 0 0 Device state: up Main state: down Fault recovery remaining retries: 5 PHY probe remaining retries: 12 moddef0: 0 rx_los: 1 tx_fault: 1 tx_disable: 1 Reviewed-by: Andrew Lunn Signed-off-by: Russell King Link: https://lore.kernel.org/r/E1kyYRe-0004kN-3F@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 55 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 55e6a2fc5bc69..b2a5ed6915fa3 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include @@ -258,6 +259,9 @@ struct sfp { char *hwmon_name; #endif +#if IS_ENABLED(CONFIG_DEBUG_FS) + struct dentry *debugfs_dir; +#endif }; static bool sff_module_supported(const struct sfp_eeprom_id *id) @@ -1390,6 +1394,54 @@ static void sfp_module_tx_enable(struct sfp *sfp) sfp_set_state(sfp, sfp->state); } +#if IS_ENABLED(CONFIG_DEBUG_FS) +static int sfp_debug_state_show(struct seq_file *s, void *data) +{ + struct sfp *sfp = s->private; + + seq_printf(s, "Module state: %s\n", + mod_state_to_str(sfp->sm_mod_state)); + seq_printf(s, "Module probe attempts: %d %d\n", + R_PROBE_RETRY_INIT - sfp->sm_mod_tries_init, + R_PROBE_RETRY_SLOW - sfp->sm_mod_tries); + seq_printf(s, "Device state: %s\n", + dev_state_to_str(sfp->sm_dev_state)); + seq_printf(s, "Main state: %s\n", + sm_state_to_str(sfp->sm_state)); + seq_printf(s, "Fault recovery remaining retries: %d\n", + sfp->sm_fault_retries); + seq_printf(s, "PHY probe remaining retries: %d\n", + sfp->sm_phy_retries); + seq_printf(s, "moddef0: %d\n", !!(sfp->state & SFP_F_PRESENT)); + seq_printf(s, "rx_los: %d\n", !!(sfp->state & SFP_F_LOS)); + seq_printf(s, "tx_fault: %d\n", !!(sfp->state & SFP_F_TX_FAULT)); + seq_printf(s, "tx_disable: %d\n", !!(sfp->state & SFP_F_TX_DISABLE)); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(sfp_debug_state); + +static void sfp_debugfs_init(struct sfp *sfp) +{ + sfp->debugfs_dir = debugfs_create_dir(dev_name(sfp->dev), NULL); + + debugfs_create_file("state", 0600, sfp->debugfs_dir, sfp, + &sfp_debug_state_fops); +} + +static void sfp_debugfs_exit(struct sfp *sfp) +{ + debugfs_remove_recursive(sfp->debugfs_dir); +} +#else +static void sfp_debugfs_init(struct sfp *sfp) +{ +} + +static void sfp_debugfs_exit(struct sfp *sfp) +{ +} +#endif + static void sfp_module_tx_fault_reset(struct sfp *sfp) { unsigned int state = sfp->state; @@ -2491,6 +2543,8 @@ static int sfp_probe(struct platform_device *pdev) if (!sfp->sfp_bus) return -ENOMEM; + sfp_debugfs_init(sfp); + return 0; } @@ -2498,6 +2552,7 @@ static int sfp_remove(struct platform_device *pdev) { struct sfp *sfp = platform_get_drvdata(pdev); + sfp_debugfs_exit(sfp); sfp_unregister_socket(sfp->sfp_bus); rtnl_lock(); -- GitLab From 5bc8f5ab3b752306d6a1e0e4fa6c4473c15a2924 Mon Sep 17 00:00:00 2001 From: Maxim Kochetkov Date: Mon, 11 Jan 2021 07:39:03 +0300 Subject: [PATCH 0605/2012] fsl/fman: Add MII mode support. Set proper value to IF_MODE register for MII mode. Signed-off-by: Maxim Kochetkov Link: https://lore.kernel.org/r/20210111043903.8044-1-fido_max@inbox.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fman/fman_memac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index bb9887f988411..62f42921933d6 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -111,6 +111,7 @@ do { \ #define IF_MODE_MASK 0x00000003 /* 30-31 Mask on i/f mode bits */ #define IF_MODE_10G 0x00000000 /* 30-31 10G interface */ +#define IF_MODE_MII 0x00000001 /* 30-31 MII interface */ #define IF_MODE_GMII 0x00000002 /* 30-31 GMII (1G) interface */ #define IF_MODE_RGMII 0x00000004 #define IF_MODE_RGMII_AUTO 0x00008000 @@ -442,6 +443,9 @@ static int init(struct memac_regs __iomem *regs, struct memac_cfg *cfg, case PHY_INTERFACE_MODE_XGMII: tmp |= IF_MODE_10G; break; + case PHY_INTERFACE_MODE_MII: + tmp |= IF_MODE_MII; + break; default: tmp |= IF_MODE_GMII; if (phy_if == PHY_INTERFACE_MODE_RGMII || -- GitLab From 869c4d5eb1e6fbda66aa790c48bdb946d71494a0 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 11 Jan 2021 04:26:39 -0500 Subject: [PATCH 0606/2012] bnxt_en: Improve stats context resource accounting with RDMA driver loaded. The function bnxt_get_ulp_stat_ctxs() does not count the stats contexts used by the RDMA driver correctly when the RDMA driver is freeing the MSIX vectors. It assumes that if the RDMA driver is registered, the additional stats contexts will be needed. This is not true when the RDMA driver is about to unregister and frees the MSIX vectors. This slight error leads to over accouting of the stats contexts needed after the RDMA driver has unloaded. This will cause some firmware warning and error messages in dmesg during subsequent config. changes or ifdown/ifup. Fix it by properly accouting for extra stats contexts only if the RDMA driver is registered and MSIX vectors have been successfully requested. Fixes: c027c6b4e91f ("bnxt_en: get rid of num_stat_ctxs variable") Reviewed-by: Yongping Zhang Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 8c8368c2f335c..64dbbb04b0434 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -222,8 +222,12 @@ int bnxt_get_ulp_msix_base(struct bnxt *bp) int bnxt_get_ulp_stat_ctxs(struct bnxt *bp) { - if (bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP)) - return BNXT_MIN_ROCE_STAT_CTXS; + if (bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP)) { + struct bnxt_en_dev *edev = bp->edev; + + if (edev->ulp_tbl[BNXT_ROCE_ULP].msix_requested) + return BNXT_MIN_ROCE_STAT_CTXS; + } return 0; } -- GitLab From 687487751814a493fba953efb9b1542b2f90614c Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 11 Jan 2021 04:26:40 -0500 Subject: [PATCH 0607/2012] bnxt_en: Clear DEFRAG flag in firmware message when retry flashing. When the FW tells the driver to retry the INSTALL_UPDATE command after it has cleared the NVM area, the driver is not clearing the previously used ALLOWED_TO_DEFRAG flag. As a result the FW tries to defrag the NVM area a second time in a loop and can fail the request. Fixes: 1432c3f6a6ca ("bnxt_en: Retry installing FW package under NO_SPACE error condition.") Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 9ff79d5d14c4c..2f8b193a772da 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2532,7 +2532,7 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware if (rc && ((struct hwrm_err_output *)&resp)->cmd_err == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { - install.flags |= + install.flags = cpu_to_le16(NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG); rc = _hwrm_send_message_silent(bp, &install, @@ -2546,6 +2546,7 @@ int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware * UPDATE directory and try the flash again */ defrag_attempted = true; + install.flags = 0; rc = __bnxt_flash_nvram(bp->dev, BNX_DIR_TYPE_UPDATE, BNX_DIR_ORDINAL_FIRST, -- GitLab From b2b0f16fa65e910a3ec8771206bb49ee87a54ac5 Mon Sep 17 00:00:00 2001 From: Javed Hasan Date: Tue, 15 Dec 2020 11:47:31 -0800 Subject: [PATCH 0608/2012] scsi: libfc: Avoid invoking response handler twice if ep is already completed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A race condition exists between the response handler getting called because of exchange_mgr_reset() (which clears out all the active XIDs) and the response we get via an interrupt. Sequence of events: rport ba0200: Port timeout, state PLOGI rport ba0200: Port entered PLOGI state from PLOGI state xid 1052: Exchange timer armed : 20000 msecs  xid timer armed here rport ba0200: Received LOGO request while in state PLOGI rport ba0200: Delete port rport ba0200: work event 3 rport ba0200: lld callback ev 3 bnx2fc: rport_event_hdlr: event = 3, port_id = 0xba0200 bnx2fc: ba0200 - rport not created Yet!! /* Here we reset any outstanding exchanges before freeing rport using the exch_mgr_reset() */ xid 1052: Exchange timer canceled /* Here we got two responses for one xid */ xid 1052: invoking resp(), esb 20000000 state 3 xid 1052: invoking resp(), esb 20000000 state 3 xid 1052: fc_rport_plogi_resp() : ep->resp_active 2 xid 1052: fc_rport_plogi_resp() : ep->resp_active 2 Skip the response if the exchange is already completed. Link: https://lore.kernel.org/r/20201215194731.2326-1-jhasan@marvell.com Signed-off-by: Javed Hasan Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_exch.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index d71afae6191cb..841000445b9a1 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -1623,8 +1623,13 @@ static void fc_exch_recv_seq_resp(struct fc_exch_mgr *mp, struct fc_frame *fp) rc = fc_exch_done_locked(ep); WARN_ON(fc_seq_exch(sp) != ep); spin_unlock_bh(&ep->ex_lock); - if (!rc) + if (!rc) { fc_exch_delete(ep); + } else { + FC_EXCH_DBG(ep, "ep is completed already," + "hence skip calling the resp\n"); + goto skip_resp; + } } /* @@ -1643,6 +1648,7 @@ static void fc_exch_recv_seq_resp(struct fc_exch_mgr *mp, struct fc_frame *fp) if (!fc_invoke_resp(ep, sp, fp)) fc_frame_free(fp); +skip_resp: fc_exch_release(ep); return; rel: @@ -1899,10 +1905,16 @@ static void fc_exch_reset(struct fc_exch *ep) fc_exch_hold(ep); - if (!rc) + if (!rc) { fc_exch_delete(ep); + } else { + FC_EXCH_DBG(ep, "ep is completed already," + "hence skip calling the resp\n"); + goto skip_resp; + } fc_invoke_resp(ep, sp, ERR_PTR(-FC_EX_CLOSED)); +skip_resp: fc_seq_set_resp(sp, NULL, ep->arg); fc_exch_release(ep); } -- GitLab From 20bc80b6f582ad1151c52ca09ab66b472768c9c8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 12 Jan 2021 18:25:23 +0100 Subject: [PATCH 0609/2012] mptcp: more strict state checking for acks Syzkaller found a way to trigger division by zero in mptcp_subflow_cleanup_rbuf(). The current checks implemented into tcp_can_send_ack() are too week, let's be more accurate. Reported-by: Christoph Paasch Fixes: ea4ca586b16f ("mptcp: refine MPTCP-level ack scheduling") Fixes: fd8976790a6c ("mptcp: be careful on MPTCP-level ack.") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 6628d8d742030..2ff8c7caf74fc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -427,7 +427,7 @@ static bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) static bool tcp_can_send_ack(const struct sock *ssk) { return !((1 << inet_sk_state_load(ssk)) & - (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE)); + (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN)); } static void mptcp_send_ack(struct mptcp_sock *msk) -- GitLab From 76e2a55d16259b51116767b28b19d759bff43f72 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 12 Jan 2021 18:25:24 +0100 Subject: [PATCH 0610/2012] mptcp: better msk-level shutdown. Instead of re-implementing most of inet_shutdown, re-use such helper, and implement the MPTCP-specific bits at the 'proto' level. The msk-level disconnect() can now be invoked, lets provide a suitable implementation. As a side effect, this fixes bad state management for listener sockets. The latter could lead to division by 0 oops since commit ea4ca586b16f ("mptcp: refine MPTCP-level ack scheduling"). Fixes: 43b54c6ee382 ("mptcp: Use full MPTCP-level disconnect state machine") Fixes: ea4ca586b16f ("mptcp: refine MPTCP-level ack scheduling") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 62 ++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 45 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2ff8c7caf74fc..81faeff8f3bb7 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2642,11 +2642,12 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) static int mptcp_disconnect(struct sock *sk, int flags) { - /* Should never be called. - * inet_stream_connect() calls ->disconnect, but that - * refers to the subflow socket, not the mptcp one. - */ - WARN_ON_ONCE(1); + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); + + __mptcp_flush_join_list(msk); + mptcp_for_each_subflow(msk, subflow) + tcp_disconnect(mptcp_subflow_tcp_sock(subflow), flags); return 0; } @@ -3089,6 +3090,14 @@ bool mptcp_finish_join(struct sock *ssk) return true; } +static void mptcp_shutdown(struct sock *sk, int how) +{ + pr_debug("sk=%p, how=%d", sk, how); + + if ((how & SEND_SHUTDOWN) && mptcp_close_state(sk)) + __mptcp_wr_shutdown(sk); +} + static struct proto mptcp_prot = { .name = "MPTCP", .owner = THIS_MODULE, @@ -3098,7 +3107,7 @@ static struct proto mptcp_prot = { .accept = mptcp_accept, .setsockopt = mptcp_setsockopt, .getsockopt = mptcp_getsockopt, - .shutdown = tcp_shutdown, + .shutdown = mptcp_shutdown, .destroy = mptcp_destroy, .sendmsg = mptcp_sendmsg, .recvmsg = mptcp_recvmsg, @@ -3344,43 +3353,6 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, return mask; } -static int mptcp_shutdown(struct socket *sock, int how) -{ - struct mptcp_sock *msk = mptcp_sk(sock->sk); - struct sock *sk = sock->sk; - int ret = 0; - - pr_debug("sk=%p, how=%d", msk, how); - - lock_sock(sk); - - how++; - if ((how & ~SHUTDOWN_MASK) || !how) { - ret = -EINVAL; - goto out_unlock; - } - - if (sock->state == SS_CONNECTING) { - if ((1 << sk->sk_state) & - (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE)) - sock->state = SS_DISCONNECTING; - else - sock->state = SS_CONNECTED; - } - - sk->sk_shutdown |= how; - if ((how & SEND_SHUTDOWN) && mptcp_close_state(sk)) - __mptcp_wr_shutdown(sk); - - /* Wake up anyone sleeping in poll. */ - sk->sk_state_change(sk); - -out_unlock: - release_sock(sk); - - return ret; -} - static const struct proto_ops mptcp_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, @@ -3394,7 +3366,7 @@ static const struct proto_ops mptcp_stream_ops = { .ioctl = inet_ioctl, .gettstamp = sock_gettstamp, .listen = mptcp_listen, - .shutdown = mptcp_shutdown, + .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, @@ -3444,7 +3416,7 @@ static const struct proto_ops mptcp_v6_stream_ops = { .ioctl = inet6_ioctl, .gettstamp = sock_gettstamp, .listen = mptcp_listen, - .shutdown = mptcp_shutdown, + .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet6_sendmsg, -- GitLab From 46e05e1df628ab054486eafcc06a3c6512b338a1 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 12 Jan 2021 13:21:34 -0600 Subject: [PATCH 0611/2012] net: ipa: add config dependency on QCOM_SMEM The IPA driver depends on some SMEM functionality (qcom_smem_init(), qcom_smem_alloc(), and qcom_smem_virt_to_phys()), but this is not reflected in the configuration dependencies. Add a dependency on QCOM_SMEM to avoid attempts to build the IPA driver without SMEM. This avoids a link error for certain configurations. Reported-by: Randy Dunlap Fixes: 38a4066f593c5 ("net: ipa: support COMPILE_TEST") Signed-off-by: Alex Elder Acked-by: Randy Dunlap # build-tested Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210112192134.493-1-elder@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ipa/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/Kconfig b/drivers/net/ipa/Kconfig index 10a0e041ee775..b68f1289b89ef 100644 --- a/drivers/net/ipa/Kconfig +++ b/drivers/net/ipa/Kconfig @@ -1,6 +1,6 @@ config QCOM_IPA tristate "Qualcomm IPA support" - depends on 64BIT && NET + depends on 64BIT && NET && QCOM_SMEM depends on ARCH_QCOM || COMPILE_TEST depends on QCOM_RPROC_COMMON || (QCOM_RPROC_COMMON=n && COMPILE_TEST) select QCOM_MDT_LOADER if ARCH_QCOM -- GitLab From 7cd1af107a92eb63b93a96dc07406dcbc5269436 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 18 Dec 2020 16:20:51 -0800 Subject: [PATCH 0612/2012] riscv: Trace irq on only interrupt is enabled We should call irq trace only if interrupt is going to be enabled during excecption handling. Otherwise, it results in following warning during boot with lock debugging enabled. [ 0.000000] ------------[ cut here ]------------ [ 0.000000] DEBUG_LOCKS_WARN_ON(early_boot_irqs_disabled) [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/locking/lockdep.c:4085 lockdep_hardirqs_on_prepare+0x22a/0x22e [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-00022-ge20097fb37e2-dirty #548 [ 0.000000] epc: c005d5d4 ra : c005d5d4 sp : c1c01e80 [ 0.000000] gp : c1d456e0 tp : c1c0a980 t0 : 00000000 [ 0.000000] t1 : ffffffff t2 : 00000000 s0 : c1c01ea0 [ 0.000000] s1 : c100f360 a0 : 0000002d a1 : c00666ee [ 0.000000] a2 : 00000000 a3 : 00000000 a4 : 00000000 [ 0.000000] a5 : 00000000 a6 : c1c6b390 a7 : 3ffff00e [ 0.000000] s2 : c2384fe8 s3 : 00000000 s4 : 00000001 [ 0.000000] s5 : c1c0a980 s6 : c1d48000 s7 : c1613b4c [ 0.000000] s8 : 00000fff s9 : 80000200 s10: c1613b40 [ 0.000000] s11: 00000000 t3 : 00000000 t4 : 00000000 [ 0.000000] t5 : 00000001 t6 : 00000000 Fixes: 3c4697982982 ("riscv:Enable LOCKDEP_SUPPORT & fixup TRACE_IRQFLAGS_SUPPORT") Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 48de316c68c18..744f3209c48d0 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -124,15 +124,15 @@ skip_context_tracking: REG_L a1, (a1) jr a1 1: -#ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_on -#endif /* * Exceptions run with interrupts enabled or disabled depending on the * state of SR_PIE in m/sstatus. */ andi t0, s1, SR_PIE beqz t0, 1f +#ifdef CONFIG_TRACE_IRQFLAGS + call trace_hardirqs_on +#endif csrs CSR_STATUS, SR_IE 1: -- GitLab From c2ec5f2ecf6aeef60c63d1af9ddcacdcbf829d68 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 11 Jan 2021 11:46:57 +0100 Subject: [PATCH 0613/2012] net: dsa: add optional stats64 support Allow DSA drivers to export stats64 Signed-off-by: Oleksij Rempel Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 4 +++- net/dsa/slave.c | 14 +++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index c9a3dd7588dfe..c3485ba6c3125 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -482,7 +482,7 @@ struct dsa_switch_ops { void (*phylink_fixed_state)(struct dsa_switch *ds, int port, struct phylink_link_state *state); /* - * ethtool hardware statistics. + * Port statistics counters. */ void (*get_strings)(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data); @@ -491,6 +491,8 @@ struct dsa_switch_ops { int (*get_sset_count)(struct dsa_switch *ds, int port, int sset); void (*get_ethtool_phy_stats)(struct dsa_switch *ds, int port, uint64_t *data); + void (*get_stats64)(struct dsa_switch *ds, int port, + struct rtnl_link_stats64 *s); /* * ethtool Wake-on-LAN diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 5d7f6cada6a81..5a1769602e653 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1569,6 +1569,18 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev) return dp->ds->devlink ? &dp->devlink_port : NULL; } +static void dsa_slave_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *s) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (ds->ops->get_stats64) + ds->ops->get_stats64(ds, dp->index, s); + else + dev_get_tstats64(dev, s); +} + static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_open = dsa_slave_open, .ndo_stop = dsa_slave_close, @@ -1588,7 +1600,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = { #endif .ndo_get_phys_port_name = dsa_slave_get_phys_port_name, .ndo_setup_tc = dsa_slave_setup_tc, - .ndo_get_stats64 = dev_get_tstats64, + .ndo_get_stats64 = dsa_slave_get_stats64, .ndo_get_port_parent_id = dsa_slave_get_port_parent_id, .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid, -- GitLab From bf9ce385932b61584684d31e2e5f8f485791e409 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 11 Jan 2021 11:46:58 +0100 Subject: [PATCH 0614/2012] net: dsa: qca: ar9331: export stats64 Add stats support for the ar9331 switch. Signed-off-by: Oleksij Rempel Signed-off-by: Jakub Kicinski --- drivers/net/dsa/qca/ar9331.c | 163 ++++++++++++++++++++++++++++++++++- 1 file changed, 162 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index 4d49c5f2b7905..1e3706054ae1f 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -101,6 +101,9 @@ AR9331_SW_PORT_STATUS_RX_FLOW_EN | AR9331_SW_PORT_STATUS_TX_FLOW_EN | \ AR9331_SW_PORT_STATUS_SPEED_M) +/* MIB registers */ +#define AR9331_MIB_COUNTER(x) (0x20000 + ((x) * 0x100)) + /* Phy bypass mode * ------------------------------------------------------------------------ * Bit: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |10 |11 |12 |13 |14 |15 | @@ -154,6 +157,66 @@ #define AR9331_SW_MDIO_POLL_SLEEP_US 1 #define AR9331_SW_MDIO_POLL_TIMEOUT_US 20 +/* The interval should be small enough to avoid overflow of 32bit MIBs */ +/* + * FIXME: until we can read MIBs from stats64 call directly (i.e. sleep + * there), we have to poll stats more frequently then it is actually needed. + * For overflow protection, normally, 100 sec interval should have been OK. + */ +#define STATS_INTERVAL_JIFFIES (3 * HZ) + +struct ar9331_sw_stats_raw { + u32 rxbroad; /* 0x00 */ + u32 rxpause; /* 0x04 */ + u32 rxmulti; /* 0x08 */ + u32 rxfcserr; /* 0x0c */ + u32 rxalignerr; /* 0x10 */ + u32 rxrunt; /* 0x14 */ + u32 rxfragment; /* 0x18 */ + u32 rx64byte; /* 0x1c */ + u32 rx128byte; /* 0x20 */ + u32 rx256byte; /* 0x24 */ + u32 rx512byte; /* 0x28 */ + u32 rx1024byte; /* 0x2c */ + u32 rx1518byte; /* 0x30 */ + u32 rxmaxbyte; /* 0x34 */ + u32 rxtoolong; /* 0x38 */ + u32 rxgoodbyte; /* 0x3c */ + u32 rxgoodbyte_hi; + u32 rxbadbyte; /* 0x44 */ + u32 rxbadbyte_hi; + u32 rxoverflow; /* 0x4c */ + u32 filtered; /* 0x50 */ + u32 txbroad; /* 0x54 */ + u32 txpause; /* 0x58 */ + u32 txmulti; /* 0x5c */ + u32 txunderrun; /* 0x60 */ + u32 tx64byte; /* 0x64 */ + u32 tx128byte; /* 0x68 */ + u32 tx256byte; /* 0x6c */ + u32 tx512byte; /* 0x70 */ + u32 tx1024byte; /* 0x74 */ + u32 tx1518byte; /* 0x78 */ + u32 txmaxbyte; /* 0x7c */ + u32 txoversize; /* 0x80 */ + u32 txbyte; /* 0x84 */ + u32 txbyte_hi; + u32 txcollision; /* 0x8c */ + u32 txabortcol; /* 0x90 */ + u32 txmulticol; /* 0x94 */ + u32 txsinglecol; /* 0x98 */ + u32 txexcdefer; /* 0x9c */ + u32 txdefer; /* 0xa0 */ + u32 txlatecol; /* 0xa4 */ +}; + +struct ar9331_sw_port { + int idx; + struct delayed_work mib_read; + struct rtnl_link_stats64 stats; + struct spinlock stats_lock; +}; + struct ar9331_sw_priv { struct device *dev; struct dsa_switch ds; @@ -165,8 +228,17 @@ struct ar9331_sw_priv { struct mii_bus *sbus; /* mdio slave */ struct regmap *regmap; struct reset_control *sw_reset; + struct ar9331_sw_port port[AR9331_SW_PORTS]; }; +static struct ar9331_sw_priv *ar9331_sw_port_to_priv(struct ar9331_sw_port *port) +{ + struct ar9331_sw_port *p = port - port->idx; + + return (struct ar9331_sw_priv *)((void *)p - + offsetof(struct ar9331_sw_priv, port)); +} + /* Warning: switch reset will reset last AR9331_SW_MDIO_PHY_MODE_PAGE request * If some kind of optimization is used, the request should be repeated. */ @@ -424,6 +496,7 @@ static void ar9331_sw_phylink_mac_link_down(struct dsa_switch *ds, int port, phy_interface_t interface) { struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; + struct ar9331_sw_port *p = &priv->port[port]; struct regmap *regmap = priv->regmap; int ret; @@ -431,6 +504,8 @@ static void ar9331_sw_phylink_mac_link_down(struct dsa_switch *ds, int port, AR9331_SW_PORT_STATUS_MAC_MASK, 0); if (ret) dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); + + cancel_delayed_work_sync(&p->mib_read); } static void ar9331_sw_phylink_mac_link_up(struct dsa_switch *ds, int port, @@ -441,10 +516,13 @@ static void ar9331_sw_phylink_mac_link_up(struct dsa_switch *ds, int port, bool tx_pause, bool rx_pause) { struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; + struct ar9331_sw_port *p = &priv->port[port]; struct regmap *regmap = priv->regmap; u32 val; int ret; + schedule_delayed_work(&p->mib_read, 0); + val = AR9331_SW_PORT_STATUS_MAC_MASK; switch (speed) { case SPEED_1000: @@ -477,6 +555,73 @@ static void ar9331_sw_phylink_mac_link_up(struct dsa_switch *ds, int port, dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); } +static void ar9331_read_stats(struct ar9331_sw_port *port) +{ + struct ar9331_sw_priv *priv = ar9331_sw_port_to_priv(port); + struct rtnl_link_stats64 *stats = &port->stats; + struct ar9331_sw_stats_raw raw; + int ret; + + /* Do the slowest part first, to avoid needless locking for long time */ + ret = regmap_bulk_read(priv->regmap, AR9331_MIB_COUNTER(port->idx), + &raw, sizeof(raw) / sizeof(u32)); + if (ret) { + dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); + return; + } + /* All MIB counters are cleared automatically on read */ + + spin_lock(&port->stats_lock); + + stats->rx_bytes += raw.rxgoodbyte; + stats->tx_bytes += raw.txbyte; + + stats->rx_packets += raw.rx64byte + raw.rx128byte + raw.rx256byte + + raw.rx512byte + raw.rx1024byte + raw.rx1518byte + raw.rxmaxbyte; + stats->tx_packets += raw.tx64byte + raw.tx128byte + raw.tx256byte + + raw.tx512byte + raw.tx1024byte + raw.tx1518byte + raw.txmaxbyte; + + stats->rx_length_errors += raw.rxrunt + raw.rxfragment + raw.rxtoolong; + stats->rx_crc_errors += raw.rxfcserr; + stats->rx_frame_errors += raw.rxalignerr; + stats->rx_missed_errors += raw.rxoverflow; + stats->rx_dropped += raw.filtered; + stats->rx_errors += raw.rxfcserr + raw.rxalignerr + raw.rxrunt + + raw.rxfragment + raw.rxoverflow + raw.rxtoolong; + + stats->tx_window_errors += raw.txlatecol; + stats->tx_fifo_errors += raw.txunderrun; + stats->tx_aborted_errors += raw.txabortcol; + stats->tx_errors += raw.txoversize + raw.txabortcol + raw.txunderrun + + raw.txlatecol; + + stats->multicast += raw.rxmulti; + stats->collisions += raw.txcollision; + + spin_unlock(&port->stats_lock); +} + +static void ar9331_do_stats_poll(struct work_struct *work) +{ + struct ar9331_sw_port *port = container_of(work, struct ar9331_sw_port, + mib_read.work); + + ar9331_read_stats(port); + + schedule_delayed_work(&port->mib_read, STATS_INTERVAL_JIFFIES); +} + +static void ar9331_get_stats64(struct dsa_switch *ds, int port, + struct rtnl_link_stats64 *s) +{ + struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; + struct ar9331_sw_port *p = &priv->port[port]; + + spin_lock(&p->stats_lock); + memcpy(s, &p->stats, sizeof(*s)); + spin_unlock(&p->stats_lock); +} + static const struct dsa_switch_ops ar9331_sw_ops = { .get_tag_protocol = ar9331_sw_get_tag_protocol, .setup = ar9331_sw_setup, @@ -485,6 +630,7 @@ static const struct dsa_switch_ops ar9331_sw_ops = { .phylink_mac_config = ar9331_sw_phylink_mac_config, .phylink_mac_link_down = ar9331_sw_phylink_mac_link_down, .phylink_mac_link_up = ar9331_sw_phylink_mac_link_up, + .get_stats64 = ar9331_get_stats64, }; static irqreturn_t ar9331_sw_irq(int irq, void *data) @@ -796,7 +942,7 @@ static int ar9331_sw_probe(struct mdio_device *mdiodev) { struct ar9331_sw_priv *priv; struct dsa_switch *ds; - int ret; + int ret, i; priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) @@ -831,6 +977,14 @@ static int ar9331_sw_probe(struct mdio_device *mdiodev) ds->ops = &priv->ops; dev_set_drvdata(&mdiodev->dev, priv); + for (i = 0; i < ARRAY_SIZE(priv->port); i++) { + struct ar9331_sw_port *port = &priv->port[i]; + + port->idx = i; + spin_lock_init(&port->stats_lock); + INIT_DELAYED_WORK(&port->mib_read, ar9331_do_stats_poll); + } + ret = dsa_register_switch(ds); if (ret) goto err_remove_irq; @@ -846,6 +1000,13 @@ err_remove_irq: static void ar9331_sw_remove(struct mdio_device *mdiodev) { struct ar9331_sw_priv *priv = dev_get_drvdata(&mdiodev->dev); + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(priv->port); i++) { + struct ar9331_sw_port *port = &priv->port[i]; + + cancel_delayed_work_sync(&port->mib_read); + } irq_domain_remove(priv->irqdomain); mdiobus_unregister(priv->mbus); -- GitLab From f50e2f9f791647aa4e5b19d0064f5cabf630bf6e Mon Sep 17 00:00:00 2001 From: YANG LI Date: Mon, 11 Jan 2021 17:24:23 +0800 Subject: [PATCH 0615/2012] hci: llc_shdlc: style: Simplify bool comparison Fix the following coccicheck warning: ./net/nfc/hci/llc_shdlc.c:239:5-21: WARNING: Comparison to bool Signed-off-by: YANG LI Reported-by: Abaci Robot Link: https://lore.kernel.org/r/1610357063-57705-1-git-send-email-abaci-bugfix@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/nfc/hci/llc_shdlc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 0eb4ddc056e78..c0c8fea3a1864 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -236,7 +236,7 @@ static void llc_shdlc_rcv_i_frame(struct llc_shdlc *shdlc, goto exit; } - if (shdlc->t1_active == false) { + if (!shdlc->t1_active) { shdlc->t1_active = true; mod_timer(&shdlc->t1_timer, jiffies + msecs_to_jiffies(SHDLC_T1_VALUE_MS(shdlc->w))); -- GitLab From 25fe2c9c4cd2e97c5f5b69f3aefe69aad3057936 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Jan 2021 17:21:21 +0100 Subject: [PATCH 0616/2012] smc: fix out of bound access in smc_nl_get_sys_info() smc_clc_get_hostname() sets the host pointer to a buffer which is not NULL-terminated (see smc_clc_init()). Reported-by: syzbot+f4708c391121cfc58396@syzkaller.appspotmail.com Fixes: 099b990bd11a ("net/smc: Add support for obtaining system information") Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 59342b519e347..8d866b4ed8f61 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -246,7 +246,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) goto errattr; smc_clc_get_hostname(&host); if (host) { - snprintf(hostname, sizeof(hostname), "%s", host); + memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN); + hostname[SMC_MAX_HOSTNAME_LEN] = 0; if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname)) goto errattr; } -- GitLab From 8a4465368964b4fbaf084760c94c7aabf61059fb Mon Sep 17 00:00:00 2001 From: Guvenc Gulce Date: Tue, 12 Jan 2021 17:21:22 +0100 Subject: [PATCH 0617/2012] net/smc: use memcpy instead of snprintf to avoid out of bounds read Using snprintf() to convert not null-terminated strings to null terminated strings may cause out of bounds read in the source string. Therefore use memcpy() and terminate the target string with a null afterwards. Fixes: a3db10efcc4c ("net/smc: Add support for obtaining SMCR device list") Signed-off-by: Guvenc Gulce Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 17 +++++++++++------ net/smc/smc_ib.c | 6 +++--- net/smc/smc_ism.c | 3 ++- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 8d866b4ed8f61..0df85a12651e9 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -258,7 +258,8 @@ int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) smc_ism_get_system_eid(smcd_dev, &seid); mutex_unlock(&smcd_dev_list.mutex); if (seid && smc_ism_is_v2_capable()) { - snprintf(smc_seid, sizeof(smc_seid), "%s", seid); + memcpy(smc_seid, seid, SMC_MAX_EID_LEN); + smc_seid[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid)) goto errattr; } @@ -296,7 +297,8 @@ static int smc_nl_fill_lgr(struct smc_link_group *lgr, goto errattr; if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id)) goto errattr; - snprintf(smc_target, sizeof(smc_target), "%s", lgr->pnet_id); + memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN); + smc_target[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target)) goto errattr; @@ -313,7 +315,7 @@ static int smc_nl_fill_lgr_link(struct smc_link_group *lgr, struct sk_buff *skb, struct netlink_callback *cb) { - char smc_ibname[IB_DEVICE_NAME_MAX + 1]; + char smc_ibname[IB_DEVICE_NAME_MAX]; u8 smc_gid_target[41]; struct nlattr *attrs; u32 link_uid = 0; @@ -462,7 +464,8 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, goto errattr; if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd))) goto errattr; - snprintf(smc_pnet, sizeof(smc_pnet), "%s", lgr->smcd->pnetid); + memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN); + smc_pnet[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet)) goto errattr; @@ -475,10 +478,12 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, goto errv2attr; if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os)) goto errv2attr; - snprintf(smc_host, sizeof(smc_host), "%s", lgr->peer_hostname); + memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN); + smc_host[SMC_MAX_HOSTNAME_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host)) goto errv2attr; - snprintf(smc_eid, sizeof(smc_eid), "%s", lgr->negotiated_eid); + memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN); + smc_eid[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid)) goto errv2attr; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index ddd7fac98b1d6..7d7ba0320d5ae 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -371,8 +371,8 @@ static int smc_nl_handle_dev_port(struct sk_buff *skb, if (nla_put_u8(skb, SMC_NLA_DEV_PORT_PNET_USR, smcibdev->pnetid_by_user[port])) goto errattr; - snprintf(smc_pnet, sizeof(smc_pnet), "%s", - (char *)&smcibdev->pnetid[port]); + memcpy(smc_pnet, &smcibdev->pnetid[port], SMC_MAX_PNETID_LEN); + smc_pnet[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_DEV_PORT_PNETID, smc_pnet)) goto errattr; if (nla_put_u32(skb, SMC_NLA_DEV_PORT_NETDEV, @@ -414,7 +414,7 @@ static int smc_nl_handle_smcr_dev(struct smc_ib_device *smcibdev, struct sk_buff *skb, struct netlink_callback *cb) { - char smc_ibname[IB_DEVICE_NAME_MAX + 1]; + char smc_ibname[IB_DEVICE_NAME_MAX]; struct smc_pci_dev smc_pci_dev; struct pci_dev *pci_dev; unsigned char is_crit; diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 524ef64a191a5..9c6e95882553e 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -250,7 +250,8 @@ static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd, goto errattr; if (nla_put_u8(skb, SMC_NLA_DEV_PORT_PNET_USR, smcd->pnetid_by_user)) goto errportattr; - snprintf(smc_pnet, sizeof(smc_pnet), "%s", smcd->pnetid); + memcpy(smc_pnet, smcd->pnetid, SMC_MAX_PNETID_LEN); + smc_pnet[SMC_MAX_PNETID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_DEV_PORT_PNETID, smc_pnet)) goto errportattr; -- GitLab From 80709af7325d179b433817f421c85449f2454046 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 23 Dec 2020 00:01:52 +0800 Subject: [PATCH 0618/2012] riscv: cacheinfo: Fix using smp_processor_id() in preemptible Use raw_smp_processor_id instead of smp_processor_id() to fix warning, BUG: using smp_processor_id() in preemptible [00000000] code: init/1 caller is debug_smp_processor_id+0x1c/0x26 CPU: 0 PID: 1 Comm: init Not tainted 5.10.0-rc4 #211 Call Trace: walk_stackframe+0x0/0xaa show_stack+0x32/0x3e dump_stack+0x76/0x90 check_preemption_disabled+0xaa/0xac debug_smp_processor_id+0x1c/0x26 get_cache_size+0x18/0x68 load_elf_binary+0x868/0xece bprm_execve+0x224/0x498 kernel_execve+0xdc/0x142 run_init_process+0x90/0x9e try_to_run_init_process+0x12/0x3c kernel_init+0xb4/0xf8 ret_from_exception+0x0/0xc The issue is found when CONFIG_DEBUG_PREEMPT enabled. Reviewed-by: Atish Patra Tested-by: Atish Patra Signed-off-by: Kefeng Wang [Palmer: Added a comment.] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cacheinfo.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index de59dd457b415..d867813570442 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -26,7 +26,16 @@ cache_get_priv_group(struct cacheinfo *this_leaf) static struct cacheinfo *get_cacheinfo(u32 level, enum cache_type type) { - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(smp_processor_id()); + /* + * Using raw_smp_processor_id() elides a preemptability check, but this + * is really indicative of a larger problem: the cacheinfo UABI assumes + * that cores have a homonogenous view of the cache hierarchy. That + * happens to be the case for the current set of RISC-V systems, but + * likely won't be true in general. Since there's no way to provide + * correct information for these systems via the current UABI we're + * just eliding the check for now. + */ + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(raw_smp_processor_id()); struct cacheinfo *this_leaf; int index; -- GitLab From d6e3ae76728ccde49271d9f5acfebbea0c5625a3 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Fri, 25 Dec 2020 16:35:20 +0800 Subject: [PATCH 0619/2012] scsi: fnic: Fix memleak in vnic_dev_init_devcmd2 When ioread32() returns 0xFFFFFFFF, we should execute cleanup functions like other error handling paths before returning. Link: https://lore.kernel.org/r/20201225083520.22015-1-dinghao.liu@zju.edu.cn Acked-by: Karan Tilak Kumar Signed-off-by: Dinghao Liu Signed-off-by: Martin K. Petersen --- drivers/scsi/fnic/vnic_dev.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/fnic/vnic_dev.c b/drivers/scsi/fnic/vnic_dev.c index a2beee6e09f06..5988c300cc82e 100644 --- a/drivers/scsi/fnic/vnic_dev.c +++ b/drivers/scsi/fnic/vnic_dev.c @@ -444,7 +444,8 @@ static int vnic_dev_init_devcmd2(struct vnic_dev *vdev) fetch_index = ioread32(&vdev->devcmd2->wq.ctrl->fetch_index); if (fetch_index == 0xFFFFFFFF) { /* check for hardware gone */ pr_err("error in devcmd2 init"); - return -ENODEV; + err = -ENODEV; + goto err_free_wq; } /* @@ -460,7 +461,7 @@ static int vnic_dev_init_devcmd2(struct vnic_dev *vdev) err = vnic_dev_alloc_desc_ring(vdev, &vdev->devcmd2->results_ring, DEVCMD2_RING_SIZE, DEVCMD2_DESC_SIZE); if (err) - goto err_free_wq; + goto err_disable_wq; vdev->devcmd2->result = (struct devcmd2_result *) vdev->devcmd2->results_ring.descs; @@ -481,8 +482,9 @@ static int vnic_dev_init_devcmd2(struct vnic_dev *vdev) err_free_desc_ring: vnic_dev_free_desc_ring(vdev, &vdev->devcmd2->results_ring); -err_free_wq: +err_disable_wq: vnic_wq_disable(&vdev->devcmd2->wq); +err_free_wq: vnic_wq_free(&vdev->devcmd2->wq); err_free_devcmd2: kfree(vdev->devcmd2); -- GitLab From 0aa2ec8a475fb505fd98d93bbcf4e03beeeebcb6 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sat, 2 Jan 2021 13:24:34 +0000 Subject: [PATCH 0620/2012] riscv: Fixup CONFIG_GENERIC_TIME_VSYSCALL The patch fix commit: ad5d112 ("riscv: use vDSO common flow to reduce the latency of the time-related functions"). The GENERIC_TIME_VSYSCALL should be CONFIG_GENERIC_TIME_VSYSCALL or vgettimeofday won't work. Signed-off-by: Guo Ren Reviewed-by: Pekka Enberg Fixes: ad5d1122b82f ("riscv: use vDSO common flow to reduce the latency of the time-related functions") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vdso.h | 2 +- arch/riscv/kernel/vdso.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 8454f746bbfd0..1453a2f563bcc 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -10,7 +10,7 @@ #include -#ifndef GENERIC_TIME_VSYSCALL +#ifndef CONFIG_GENERIC_TIME_VSYSCALL struct vdso_data { }; #endif diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 678204231700c..3f1d35e7c98a6 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -12,7 +12,7 @@ #include #include #include -#ifdef GENERIC_TIME_VSYSCALL +#ifdef CONFIG_GENERIC_TIME_VSYSCALL #include #else #include -- GitLab From 6d051154d4331f416e436a6f3c0edcb3f6c13e39 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 8 Jan 2021 08:39:32 +0100 Subject: [PATCH 0621/2012] MAINTAINERS: adjust entry to tcan4x5x file split Commit 7813887ea972 ("can: tcan4x5x: rename tcan4x5x.c -> tcan4x5x-core.c") and commit 67def4ef8bb9 ("can: tcan4x5x: move regmap code into seperate file") split the file tcan4x5x.c into two files, but missed to adjust the TI TCAN4X5X DEVICE DRIVER section in MAINTAINERS. Hence, ./scripts/get_maintainer.pl --self-test=patterns complains: warning: no file matches F: drivers/net/can/m_can/tcan4x5x.c Adjust the file entry in MAINTAINERS to the tcan4x5x file splitting. Signed-off-by: Lukas Bulwahn Fixes: 67def4ef8bb9 ("can: tcan4x5x: move regmap code into seperate file") Fixes: 7813887ea972 ("can: tcan4x5x: rename tcan4x5x.c -> tcan4x5x-core.c") Link: https://lore.kernel.org/r/20210108073932.20804-1-lukas.bulwahn@gmail.com Signed-off-by: Marc Kleine-Budde --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c2cb791982882..54fcd5fe572d3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17839,7 +17839,7 @@ M: Dan Murphy L: linux-can@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/can/tcan4x5x.txt -F: drivers/net/can/m_can/tcan4x5x.c +F: drivers/net/can/m_can/tcan4x5x* TI TRF7970A NFC DRIVER M: Mark Greer -- GitLab From 9f16f4e0a8a08e39c40cc9b7e847a96e8ef9063f Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:16 +0100 Subject: [PATCH 0622/2012] MAINTAINERS: CAN network layer: add missing header file can-ml.h This patch add the can-ml.h to the list of maintained files of the CAN network layer. Fixes: ffd956eef69b ("can: introduce CAN midlayer private and allocate it automatically") Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-2-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 54fcd5fe572d3..c3091a91ebbfa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3960,6 +3960,7 @@ W: https://github.com/linux-can T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git F: Documentation/networking/can.rst +F: include/linux/can/can-ml.h F: include/linux/can/core.h F: include/linux/can/skb.h F: include/net/netns/can.h -- GitLab From 3e77f70e734584e0ad1038e459ed3fd2400f873a Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:17 +0100 Subject: [PATCH 0623/2012] can: dev: move driver related infrastructure into separate subdir This patch moves the CAN driver related infrastructure into a separate subdir. It will be split into more files in the coming patches. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-3-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/Makefile | 7 +------ drivers/net/can/dev/Makefile | 7 +++++++ drivers/net/can/{ => dev}/dev.c | 0 drivers/net/can/{ => dev}/rx-offload.c | 0 4 files changed, 8 insertions(+), 6 deletions(-) create mode 100644 drivers/net/can/dev/Makefile rename drivers/net/can/{ => dev}/dev.c (100%) rename drivers/net/can/{ => dev}/rx-offload.c (100%) diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile index 22164300122d5..a2b4463d84802 100644 --- a/drivers/net/can/Makefile +++ b/drivers/net/can/Makefile @@ -7,12 +7,7 @@ obj-$(CONFIG_CAN_VCAN) += vcan.o obj-$(CONFIG_CAN_VXCAN) += vxcan.o obj-$(CONFIG_CAN_SLCAN) += slcan.o -obj-$(CONFIG_CAN_DEV) += can-dev.o -can-dev-y += dev.o -can-dev-y += rx-offload.o - -can-dev-$(CONFIG_CAN_LEDS) += led.o - +obj-y += dev/ obj-y += rcar/ obj-y += spi/ obj-y += usb/ diff --git a/drivers/net/can/dev/Makefile b/drivers/net/can/dev/Makefile new file mode 100644 index 0000000000000..cba92e6bcf6f5 --- /dev/null +++ b/drivers/net/can/dev/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_CAN_DEV) += can-dev.o +can-dev-y += dev.o +can-dev-y += rx-offload.o + +can-dev-$(CONFIG_CAN_LEDS) += led.o diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev/dev.c similarity index 100% rename from drivers/net/can/dev.c rename to drivers/net/can/dev/dev.c diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/dev/rx-offload.c similarity index 100% rename from drivers/net/can/rx-offload.c rename to drivers/net/can/dev/rx-offload.c -- GitLab From 5a9d5ecd69ed65fc2d49cdecfc1c1ab1e4d7af34 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:18 +0100 Subject: [PATCH 0624/2012] can: dev: move bittiming related code into seperate file This patch moves the bittiming related code of the CAN device infrastructure into a separate file. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-4-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- MAINTAINERS | 1 + drivers/net/can/dev/Makefile | 1 + drivers/net/can/dev/bittiming.c | 261 ++++++++++++++++++++++++++++++++ drivers/net/can/dev/dev.c | 261 -------------------------------- include/linux/can/bittiming.h | 44 ++++++ include/linux/can/dev.h | 16 +- 6 files changed, 308 insertions(+), 276 deletions(-) create mode 100644 drivers/net/can/dev/bittiming.c create mode 100644 include/linux/can/bittiming.h diff --git a/MAINTAINERS b/MAINTAINERS index c3091a91ebbfa..d17662df1cd78 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3943,6 +3943,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git F: Documentation/devicetree/bindings/net/can/ F: drivers/net/can/ +F: include/linux/can/bittiming.h F: include/linux/can/dev.h F: include/linux/can/led.h F: include/linux/can/platform/ diff --git a/drivers/net/can/dev/Makefile b/drivers/net/can/dev/Makefile index cba92e6bcf6f5..b5c6bb848d9df 100644 --- a/drivers/net/can/dev/Makefile +++ b/drivers/net/can/dev/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CAN_DEV) += can-dev.o +can-dev-y += bittiming.o can-dev-y += dev.o can-dev-y += rx-offload.o diff --git a/drivers/net/can/dev/bittiming.c b/drivers/net/can/dev/bittiming.c new file mode 100644 index 0000000000000..f7fe226bb3952 --- /dev/null +++ b/drivers/net/can/dev/bittiming.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2005 Marc Kleine-Budde, Pengutronix + * Copyright (C) 2006 Andrey Volkov, Varma Electronics + * Copyright (C) 2008-2009 Wolfgang Grandegger + */ + +#include + +#ifdef CONFIG_CAN_CALC_BITTIMING +#define CAN_CALC_MAX_ERROR 50 /* in one-tenth of a percent */ + +/* Bit-timing calculation derived from: + * + * Code based on LinCAN sources and H8S2638 project + * Copyright 2004-2006 Pavel Pisa - DCE FELK CVUT cz + * Copyright 2005 Stanislav Marek + * email: pisa@cmp.felk.cvut.cz + * + * Calculates proper bit-timing parameters for a specified bit-rate + * and sample-point, which can then be used to set the bit-timing + * registers of the CAN controller. You can find more information + * in the header file linux/can/netlink.h. + */ +static int +can_update_sample_point(const struct can_bittiming_const *btc, + unsigned int sample_point_nominal, unsigned int tseg, + unsigned int *tseg1_ptr, unsigned int *tseg2_ptr, + unsigned int *sample_point_error_ptr) +{ + unsigned int sample_point_error, best_sample_point_error = UINT_MAX; + unsigned int sample_point, best_sample_point = 0; + unsigned int tseg1, tseg2; + int i; + + for (i = 0; i <= 1; i++) { + tseg2 = tseg + CAN_SYNC_SEG - + (sample_point_nominal * (tseg + CAN_SYNC_SEG)) / + 1000 - i; + tseg2 = clamp(tseg2, btc->tseg2_min, btc->tseg2_max); + tseg1 = tseg - tseg2; + if (tseg1 > btc->tseg1_max) { + tseg1 = btc->tseg1_max; + tseg2 = tseg - tseg1; + } + + sample_point = 1000 * (tseg + CAN_SYNC_SEG - tseg2) / + (tseg + CAN_SYNC_SEG); + sample_point_error = abs(sample_point_nominal - sample_point); + + if (sample_point <= sample_point_nominal && + sample_point_error < best_sample_point_error) { + best_sample_point = sample_point; + best_sample_point_error = sample_point_error; + *tseg1_ptr = tseg1; + *tseg2_ptr = tseg2; + } + } + + if (sample_point_error_ptr) + *sample_point_error_ptr = best_sample_point_error; + + return best_sample_point; +} + +int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, + const struct can_bittiming_const *btc) +{ + struct can_priv *priv = netdev_priv(dev); + unsigned int bitrate; /* current bitrate */ + unsigned int bitrate_error; /* difference between current and nominal value */ + unsigned int best_bitrate_error = UINT_MAX; + unsigned int sample_point_error; /* difference between current and nominal value */ + unsigned int best_sample_point_error = UINT_MAX; + unsigned int sample_point_nominal; /* nominal sample point */ + unsigned int best_tseg = 0; /* current best value for tseg */ + unsigned int best_brp = 0; /* current best value for brp */ + unsigned int brp, tsegall, tseg, tseg1 = 0, tseg2 = 0; + u64 v64; + + /* Use CiA recommended sample points */ + if (bt->sample_point) { + sample_point_nominal = bt->sample_point; + } else { + if (bt->bitrate > 800000) + sample_point_nominal = 750; + else if (bt->bitrate > 500000) + sample_point_nominal = 800; + else + sample_point_nominal = 875; + } + + /* tseg even = round down, odd = round up */ + for (tseg = (btc->tseg1_max + btc->tseg2_max) * 2 + 1; + tseg >= (btc->tseg1_min + btc->tseg2_min) * 2; tseg--) { + tsegall = CAN_SYNC_SEG + tseg / 2; + + /* Compute all possible tseg choices (tseg=tseg1+tseg2) */ + brp = priv->clock.freq / (tsegall * bt->bitrate) + tseg % 2; + + /* choose brp step which is possible in system */ + brp = (brp / btc->brp_inc) * btc->brp_inc; + if (brp < btc->brp_min || brp > btc->brp_max) + continue; + + bitrate = priv->clock.freq / (brp * tsegall); + bitrate_error = abs(bt->bitrate - bitrate); + + /* tseg brp biterror */ + if (bitrate_error > best_bitrate_error) + continue; + + /* reset sample point error if we have a better bitrate */ + if (bitrate_error < best_bitrate_error) + best_sample_point_error = UINT_MAX; + + can_update_sample_point(btc, sample_point_nominal, tseg / 2, + &tseg1, &tseg2, &sample_point_error); + if (sample_point_error > best_sample_point_error) + continue; + + best_sample_point_error = sample_point_error; + best_bitrate_error = bitrate_error; + best_tseg = tseg / 2; + best_brp = brp; + + if (bitrate_error == 0 && sample_point_error == 0) + break; + } + + if (best_bitrate_error) { + /* Error in one-tenth of a percent */ + v64 = (u64)best_bitrate_error * 1000; + do_div(v64, bt->bitrate); + bitrate_error = (u32)v64; + if (bitrate_error > CAN_CALC_MAX_ERROR) { + netdev_err(dev, + "bitrate error %d.%d%% too high\n", + bitrate_error / 10, bitrate_error % 10); + return -EDOM; + } + netdev_warn(dev, "bitrate error %d.%d%%\n", + bitrate_error / 10, bitrate_error % 10); + } + + /* real sample point */ + bt->sample_point = can_update_sample_point(btc, sample_point_nominal, + best_tseg, &tseg1, &tseg2, + NULL); + + v64 = (u64)best_brp * 1000 * 1000 * 1000; + do_div(v64, priv->clock.freq); + bt->tq = (u32)v64; + bt->prop_seg = tseg1 / 2; + bt->phase_seg1 = tseg1 - bt->prop_seg; + bt->phase_seg2 = tseg2; + + /* check for sjw user settings */ + if (!bt->sjw || !btc->sjw_max) { + bt->sjw = 1; + } else { + /* bt->sjw is at least 1 -> sanitize upper bound to sjw_max */ + if (bt->sjw > btc->sjw_max) + bt->sjw = btc->sjw_max; + /* bt->sjw must not be higher than tseg2 */ + if (tseg2 < bt->sjw) + bt->sjw = tseg2; + } + + bt->brp = best_brp; + + /* real bitrate */ + bt->bitrate = priv->clock.freq / + (bt->brp * (CAN_SYNC_SEG + tseg1 + tseg2)); + + return 0; +} +#endif /* CONFIG_CAN_CALC_BITTIMING */ + +/* Checks the validity of the specified bit-timing parameters prop_seg, + * phase_seg1, phase_seg2 and sjw and tries to determine the bitrate + * prescaler value brp. You can find more information in the header + * file linux/can/netlink.h. + */ +static int can_fixup_bittiming(struct net_device *dev, struct can_bittiming *bt, + const struct can_bittiming_const *btc) +{ + struct can_priv *priv = netdev_priv(dev); + int tseg1, alltseg; + u64 brp64; + + tseg1 = bt->prop_seg + bt->phase_seg1; + if (!bt->sjw) + bt->sjw = 1; + if (bt->sjw > btc->sjw_max || + tseg1 < btc->tseg1_min || tseg1 > btc->tseg1_max || + bt->phase_seg2 < btc->tseg2_min || bt->phase_seg2 > btc->tseg2_max) + return -ERANGE; + + brp64 = (u64)priv->clock.freq * (u64)bt->tq; + if (btc->brp_inc > 1) + do_div(brp64, btc->brp_inc); + brp64 += 500000000UL - 1; + do_div(brp64, 1000000000UL); /* the practicable BRP */ + if (btc->brp_inc > 1) + brp64 *= btc->brp_inc; + bt->brp = (u32)brp64; + + if (bt->brp < btc->brp_min || bt->brp > btc->brp_max) + return -EINVAL; + + alltseg = bt->prop_seg + bt->phase_seg1 + bt->phase_seg2 + 1; + bt->bitrate = priv->clock.freq / (bt->brp * alltseg); + bt->sample_point = ((tseg1 + 1) * 1000) / alltseg; + + return 0; +} + +/* Checks the validity of predefined bitrate settings */ +static int +can_validate_bitrate(struct net_device *dev, struct can_bittiming *bt, + const u32 *bitrate_const, + const unsigned int bitrate_const_cnt) +{ + struct can_priv *priv = netdev_priv(dev); + unsigned int i; + + for (i = 0; i < bitrate_const_cnt; i++) { + if (bt->bitrate == bitrate_const[i]) + break; + } + + if (i >= priv->bitrate_const_cnt) + return -EINVAL; + + return 0; +} + +int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt, + const struct can_bittiming_const *btc, + const u32 *bitrate_const, + const unsigned int bitrate_const_cnt) +{ + int err; + + /* Depending on the given can_bittiming parameter structure the CAN + * timing parameters are calculated based on the provided bitrate OR + * alternatively the CAN timing parameters (tq, prop_seg, etc.) are + * provided directly which are then checked and fixed up. + */ + if (!bt->tq && bt->bitrate && btc) + err = can_calc_bittiming(dev, bt, btc); + else if (bt->tq && !bt->bitrate && btc) + err = can_fixup_bittiming(dev, bt, btc); + else if (!bt->tq && bt->bitrate && bitrate_const) + err = can_validate_bitrate(dev, bt, bitrate_const, + bitrate_const_cnt); + else + err = -EINVAL; + + return err; +} diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 3486704c8a957..1b3ab95b3fd1a 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -58,267 +58,6 @@ u8 can_fd_len2dlc(u8 len) } EXPORT_SYMBOL_GPL(can_fd_len2dlc); -#ifdef CONFIG_CAN_CALC_BITTIMING -#define CAN_CALC_MAX_ERROR 50 /* in one-tenth of a percent */ - -/* Bit-timing calculation derived from: - * - * Code based on LinCAN sources and H8S2638 project - * Copyright 2004-2006 Pavel Pisa - DCE FELK CVUT cz - * Copyright 2005 Stanislav Marek - * email: pisa@cmp.felk.cvut.cz - * - * Calculates proper bit-timing parameters for a specified bit-rate - * and sample-point, which can then be used to set the bit-timing - * registers of the CAN controller. You can find more information - * in the header file linux/can/netlink.h. - */ -static int -can_update_sample_point(const struct can_bittiming_const *btc, - unsigned int sample_point_nominal, unsigned int tseg, - unsigned int *tseg1_ptr, unsigned int *tseg2_ptr, - unsigned int *sample_point_error_ptr) -{ - unsigned int sample_point_error, best_sample_point_error = UINT_MAX; - unsigned int sample_point, best_sample_point = 0; - unsigned int tseg1, tseg2; - int i; - - for (i = 0; i <= 1; i++) { - tseg2 = tseg + CAN_SYNC_SEG - - (sample_point_nominal * (tseg + CAN_SYNC_SEG)) / - 1000 - i; - tseg2 = clamp(tseg2, btc->tseg2_min, btc->tseg2_max); - tseg1 = tseg - tseg2; - if (tseg1 > btc->tseg1_max) { - tseg1 = btc->tseg1_max; - tseg2 = tseg - tseg1; - } - - sample_point = 1000 * (tseg + CAN_SYNC_SEG - tseg2) / - (tseg + CAN_SYNC_SEG); - sample_point_error = abs(sample_point_nominal - sample_point); - - if (sample_point <= sample_point_nominal && - sample_point_error < best_sample_point_error) { - best_sample_point = sample_point; - best_sample_point_error = sample_point_error; - *tseg1_ptr = tseg1; - *tseg2_ptr = tseg2; - } - } - - if (sample_point_error_ptr) - *sample_point_error_ptr = best_sample_point_error; - - return best_sample_point; -} - -static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, - const struct can_bittiming_const *btc) -{ - struct can_priv *priv = netdev_priv(dev); - unsigned int bitrate; /* current bitrate */ - unsigned int bitrate_error; /* difference between current and nominal value */ - unsigned int best_bitrate_error = UINT_MAX; - unsigned int sample_point_error; /* difference between current and nominal value */ - unsigned int best_sample_point_error = UINT_MAX; - unsigned int sample_point_nominal; /* nominal sample point */ - unsigned int best_tseg = 0; /* current best value for tseg */ - unsigned int best_brp = 0; /* current best value for brp */ - unsigned int brp, tsegall, tseg, tseg1 = 0, tseg2 = 0; - u64 v64; - - /* Use CiA recommended sample points */ - if (bt->sample_point) { - sample_point_nominal = bt->sample_point; - } else { - if (bt->bitrate > 800000) - sample_point_nominal = 750; - else if (bt->bitrate > 500000) - sample_point_nominal = 800; - else - sample_point_nominal = 875; - } - - /* tseg even = round down, odd = round up */ - for (tseg = (btc->tseg1_max + btc->tseg2_max) * 2 + 1; - tseg >= (btc->tseg1_min + btc->tseg2_min) * 2; tseg--) { - tsegall = CAN_SYNC_SEG + tseg / 2; - - /* Compute all possible tseg choices (tseg=tseg1+tseg2) */ - brp = priv->clock.freq / (tsegall * bt->bitrate) + tseg % 2; - - /* choose brp step which is possible in system */ - brp = (brp / btc->brp_inc) * btc->brp_inc; - if (brp < btc->brp_min || brp > btc->brp_max) - continue; - - bitrate = priv->clock.freq / (brp * tsegall); - bitrate_error = abs(bt->bitrate - bitrate); - - /* tseg brp biterror */ - if (bitrate_error > best_bitrate_error) - continue; - - /* reset sample point error if we have a better bitrate */ - if (bitrate_error < best_bitrate_error) - best_sample_point_error = UINT_MAX; - - can_update_sample_point(btc, sample_point_nominal, tseg / 2, - &tseg1, &tseg2, &sample_point_error); - if (sample_point_error > best_sample_point_error) - continue; - - best_sample_point_error = sample_point_error; - best_bitrate_error = bitrate_error; - best_tseg = tseg / 2; - best_brp = brp; - - if (bitrate_error == 0 && sample_point_error == 0) - break; - } - - if (best_bitrate_error) { - /* Error in one-tenth of a percent */ - v64 = (u64)best_bitrate_error * 1000; - do_div(v64, bt->bitrate); - bitrate_error = (u32)v64; - if (bitrate_error > CAN_CALC_MAX_ERROR) { - netdev_err(dev, - "bitrate error %d.%d%% too high\n", - bitrate_error / 10, bitrate_error % 10); - return -EDOM; - } - netdev_warn(dev, "bitrate error %d.%d%%\n", - bitrate_error / 10, bitrate_error % 10); - } - - /* real sample point */ - bt->sample_point = can_update_sample_point(btc, sample_point_nominal, - best_tseg, &tseg1, &tseg2, - NULL); - - v64 = (u64)best_brp * 1000 * 1000 * 1000; - do_div(v64, priv->clock.freq); - bt->tq = (u32)v64; - bt->prop_seg = tseg1 / 2; - bt->phase_seg1 = tseg1 - bt->prop_seg; - bt->phase_seg2 = tseg2; - - /* check for sjw user settings */ - if (!bt->sjw || !btc->sjw_max) { - bt->sjw = 1; - } else { - /* bt->sjw is at least 1 -> sanitize upper bound to sjw_max */ - if (bt->sjw > btc->sjw_max) - bt->sjw = btc->sjw_max; - /* bt->sjw must not be higher than tseg2 */ - if (tseg2 < bt->sjw) - bt->sjw = tseg2; - } - - bt->brp = best_brp; - - /* real bitrate */ - bt->bitrate = priv->clock.freq / - (bt->brp * (CAN_SYNC_SEG + tseg1 + tseg2)); - - return 0; -} -#else /* !CONFIG_CAN_CALC_BITTIMING */ -static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, - const struct can_bittiming_const *btc) -{ - netdev_err(dev, "bit-timing calculation not available\n"); - return -EINVAL; -} -#endif /* CONFIG_CAN_CALC_BITTIMING */ - -/* Checks the validity of the specified bit-timing parameters prop_seg, - * phase_seg1, phase_seg2 and sjw and tries to determine the bitrate - * prescaler value brp. You can find more information in the header - * file linux/can/netlink.h. - */ -static int can_fixup_bittiming(struct net_device *dev, struct can_bittiming *bt, - const struct can_bittiming_const *btc) -{ - struct can_priv *priv = netdev_priv(dev); - int tseg1, alltseg; - u64 brp64; - - tseg1 = bt->prop_seg + bt->phase_seg1; - if (!bt->sjw) - bt->sjw = 1; - if (bt->sjw > btc->sjw_max || - tseg1 < btc->tseg1_min || tseg1 > btc->tseg1_max || - bt->phase_seg2 < btc->tseg2_min || bt->phase_seg2 > btc->tseg2_max) - return -ERANGE; - - brp64 = (u64)priv->clock.freq * (u64)bt->tq; - if (btc->brp_inc > 1) - do_div(brp64, btc->brp_inc); - brp64 += 500000000UL - 1; - do_div(brp64, 1000000000UL); /* the practicable BRP */ - if (btc->brp_inc > 1) - brp64 *= btc->brp_inc; - bt->brp = (u32)brp64; - - if (bt->brp < btc->brp_min || bt->brp > btc->brp_max) - return -EINVAL; - - alltseg = bt->prop_seg + bt->phase_seg1 + bt->phase_seg2 + 1; - bt->bitrate = priv->clock.freq / (bt->brp * alltseg); - bt->sample_point = ((tseg1 + 1) * 1000) / alltseg; - - return 0; -} - -/* Checks the validity of predefined bitrate settings */ -static int -can_validate_bitrate(struct net_device *dev, struct can_bittiming *bt, - const u32 *bitrate_const, - const unsigned int bitrate_const_cnt) -{ - struct can_priv *priv = netdev_priv(dev); - unsigned int i; - - for (i = 0; i < bitrate_const_cnt; i++) { - if (bt->bitrate == bitrate_const[i]) - break; - } - - if (i >= priv->bitrate_const_cnt) - return -EINVAL; - - return 0; -} - -static int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt, - const struct can_bittiming_const *btc, - const u32 *bitrate_const, - const unsigned int bitrate_const_cnt) -{ - int err; - - /* Depending on the given can_bittiming parameter structure the CAN - * timing parameters are calculated based on the provided bitrate OR - * alternatively the CAN timing parameters (tq, prop_seg, etc.) are - * provided directly which are then checked and fixed up. - */ - if (!bt->tq && bt->bitrate && btc) - err = can_calc_bittiming(dev, bt, btc); - else if (bt->tq && !bt->bitrate && btc) - err = can_fixup_bittiming(dev, bt, btc); - else if (!bt->tq && bt->bitrate && bitrate_const) - err = can_validate_bitrate(dev, bt, bitrate_const, - bitrate_const_cnt); - else - err = -EINVAL; - - return err; -} - static void can_update_state_error_stats(struct net_device *dev, enum can_state new_state) { diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h new file mode 100644 index 0000000000000..707575c668f40 --- /dev/null +++ b/include/linux/can/bittiming.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2020 Pengutronix, Marc Kleine-Budde + */ + +#ifndef _CAN_BITTIMING_H +#define _CAN_BITTIMING_H + +#include +#include + +#define CAN_SYNC_SEG 1 + +#ifdef CONFIG_CAN_CALC_BITTIMING +int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, + const struct can_bittiming_const *btc); +#else /* !CONFIG_CAN_CALC_BITTIMING */ +static inline int +can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt, + const struct can_bittiming_const *btc) +{ + netdev_err(dev, "bit-timing calculation not available\n"); + return -EINVAL; +} +#endif /* CONFIG_CAN_CALC_BITTIMING */ + +int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt, + const struct can_bittiming_const *btc, + const u32 *bitrate_const, + const unsigned int bitrate_const_cnt); + +/* + * can_bit_time() - Duration of one bit + * + * Please refer to ISO 11898-1:2015, section 11.3.1.1 "Bit time" for + * additional information. + * + * Return: the number of time quanta in one bit. + */ +static inline unsigned int can_bit_time(const struct can_bittiming *bt) +{ + return CAN_SYNC_SEG + bt->prop_seg + bt->phase_seg1 + bt->phase_seg2; +} + +#endif /* !_CAN_BITTIMING_H */ diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 197a79535cc22..054c3bed190b2 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -15,6 +15,7 @@ #define _CAN_DEV_H #include +#include #include #include #include @@ -82,21 +83,6 @@ struct can_priv { #endif }; -#define CAN_SYNC_SEG 1 - -/* - * can_bit_time() - Duration of one bit - * - * Please refer to ISO 11898-1:2015, section 11.3.1.1 "Bit time" for - * additional information. - * - * Return: the number of time quanta in one bit. - */ -static inline unsigned int can_bit_time(const struct can_bittiming *bt) -{ - return CAN_SYNC_SEG + bt->prop_seg + bt->phase_seg1 + bt->phase_seg2; -} - /* * can_cc_dlc2len(value) - convert a given data length code (dlc) of a * Classical CAN frame into a valid data length of max. 8 bytes. -- GitLab From bdd2e413192dd5f2153d166cd907b048cce872e8 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:19 +0100 Subject: [PATCH 0625/2012] can: dev: move length related code into seperate file This patch moves all CAN frame length related code of the CAN device infrastructure into a separate file. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-5-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- MAINTAINERS | 1 + drivers/net/can/dev/Makefile | 1 + drivers/net/can/dev/dev.c | 33 ------------------------- drivers/net/can/dev/length.c | 38 ++++++++++++++++++++++++++++ include/linux/can/dev.h | 41 +----------------------------- include/linux/can/length.h | 48 ++++++++++++++++++++++++++++++++++++ 6 files changed, 89 insertions(+), 73 deletions(-) create mode 100644 drivers/net/can/dev/length.c create mode 100644 include/linux/can/length.h diff --git a/MAINTAINERS b/MAINTAINERS index d17662df1cd78..21780c4e2e714 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3946,6 +3946,7 @@ F: drivers/net/can/ F: include/linux/can/bittiming.h F: include/linux/can/dev.h F: include/linux/can/led.h +F: include/linux/can/length.h F: include/linux/can/platform/ F: include/linux/can/rx-offload.h F: include/uapi/linux/can/error.h diff --git a/drivers/net/can/dev/Makefile b/drivers/net/can/dev/Makefile index b5c6bb848d9df..5c647951e06d5 100644 --- a/drivers/net/can/dev/Makefile +++ b/drivers/net/can/dev/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_CAN_DEV) += can-dev.o can-dev-y += bittiming.o can-dev-y += dev.o +can-dev-y += length.o can-dev-y += rx-offload.o can-dev-$(CONFIG_CAN_LEDS) += led.o diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 1b3ab95b3fd1a..3372e99d5db77 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -25,39 +25,6 @@ MODULE_DESCRIPTION(MOD_DESC); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Wolfgang Grandegger "); -/* CAN DLC to real data length conversion helpers */ - -static const u8 dlc2len[] = {0, 1, 2, 3, 4, 5, 6, 7, - 8, 12, 16, 20, 24, 32, 48, 64}; - -/* get data length from raw data length code (DLC) */ -u8 can_fd_dlc2len(u8 dlc) -{ - return dlc2len[dlc & 0x0F]; -} -EXPORT_SYMBOL_GPL(can_fd_dlc2len); - -static const u8 len2dlc[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, /* 0 - 8 */ - 9, 9, 9, 9, /* 9 - 12 */ - 10, 10, 10, 10, /* 13 - 16 */ - 11, 11, 11, 11, /* 17 - 20 */ - 12, 12, 12, 12, /* 21 - 24 */ - 13, 13, 13, 13, 13, 13, 13, 13, /* 25 - 32 */ - 14, 14, 14, 14, 14, 14, 14, 14, /* 33 - 40 */ - 14, 14, 14, 14, 14, 14, 14, 14, /* 41 - 48 */ - 15, 15, 15, 15, 15, 15, 15, 15, /* 49 - 56 */ - 15, 15, 15, 15, 15, 15, 15, 15}; /* 57 - 64 */ - -/* map the sanitized data length to an appropriate data length code */ -u8 can_fd_len2dlc(u8 len) -{ - if (unlikely(len > 64)) - return 0xF; - - return len2dlc[len]; -} -EXPORT_SYMBOL_GPL(can_fd_len2dlc); - static void can_update_state_error_stats(struct net_device *dev, enum can_state new_state) { diff --git a/drivers/net/can/dev/length.c b/drivers/net/can/dev/length.c new file mode 100644 index 0000000000000..6fe18aa23ec9a --- /dev/null +++ b/drivers/net/can/dev/length.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2012, 2020 Oliver Hartkopp + */ + +#include + +/* CAN DLC to real data length conversion helpers */ + +static const u8 dlc2len[] = {0, 1, 2, 3, 4, 5, 6, 7, + 8, 12, 16, 20, 24, 32, 48, 64}; + +/* get data length from raw data length code (DLC) */ +u8 can_fd_dlc2len(u8 dlc) +{ + return dlc2len[dlc & 0x0F]; +} +EXPORT_SYMBOL_GPL(can_fd_dlc2len); + +static const u8 len2dlc[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, /* 0 - 8 */ + 9, 9, 9, 9, /* 9 - 12 */ + 10, 10, 10, 10, /* 13 - 16 */ + 11, 11, 11, 11, /* 17 - 20 */ + 12, 12, 12, 12, /* 21 - 24 */ + 13, 13, 13, 13, 13, 13, 13, 13, /* 25 - 32 */ + 14, 14, 14, 14, 14, 14, 14, 14, /* 33 - 40 */ + 14, 14, 14, 14, 14, 14, 14, 14, /* 41 - 48 */ + 15, 15, 15, 15, 15, 15, 15, 15, /* 49 - 56 */ + 15, 15, 15, 15, 15, 15, 15, 15}; /* 57 - 64 */ + +/* map the sanitized data length to an appropriate data length code */ +u8 can_fd_len2dlc(u8 len) +{ + if (unlikely(len > 64)) + return 0xF; + + return len2dlc[len]; +} +EXPORT_SYMBOL_GPL(can_fd_len2dlc); diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 054c3bed190b2..d75fba1d030a4 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -83,15 +84,6 @@ struct can_priv { #endif }; -/* - * can_cc_dlc2len(value) - convert a given data length code (dlc) of a - * Classical CAN frame into a valid data length of max. 8 bytes. - * - * To be used in the CAN netdriver receive path to ensure conformance with - * ISO 11898-1 Chapter 8.4.2.3 (DLC field) - */ -#define can_cc_dlc2len(dlc) (min_t(u8, (dlc), CAN_MAX_DLEN)) - /* Check for outgoing skbs that have not been created by the CAN subsystem */ static inline bool can_skb_headroom_valid(struct net_device *dev, struct sk_buff *skb) @@ -156,31 +148,6 @@ static inline bool can_is_canfd_skb(const struct sk_buff *skb) return skb->len == CANFD_MTU; } -/* helper to get the data length code (DLC) for Classical CAN raw DLC access */ -static inline u8 can_get_cc_dlc(const struct can_frame *cf, const u32 ctrlmode) -{ - /* return len8_dlc as dlc value only if all conditions apply */ - if ((ctrlmode & CAN_CTRLMODE_CC_LEN8_DLC) && - (cf->len == CAN_MAX_DLEN) && - (cf->len8_dlc > CAN_MAX_DLEN && cf->len8_dlc <= CAN_MAX_RAW_DLC)) - return cf->len8_dlc; - - /* return the payload length as dlc value */ - return cf->len; -} - -/* helper to set len and len8_dlc value for Classical CAN raw DLC access */ -static inline void can_frame_set_cc_len(struct can_frame *cf, const u8 dlc, - const u32 ctrlmode) -{ - /* the caller already ensured that dlc is a value from 0 .. 15 */ - if (ctrlmode & CAN_CTRLMODE_CC_LEN8_DLC && dlc > CAN_MAX_DLEN) - cf->len8_dlc = dlc; - - /* limit the payload length 'len' to CAN_MAX_DLEN */ - cf->len = can_cc_dlc2len(dlc); -} - /* helper to define static CAN controller features at device creation time */ static inline void can_set_static_ctrlmode(struct net_device *dev, u32 static_mode) @@ -196,12 +163,6 @@ static inline void can_set_static_ctrlmode(struct net_device *dev, dev->mtu = CANFD_MTU; } -/* get data length from raw data length code (DLC) */ -u8 can_fd_dlc2len(u8 dlc); - -/* map the sanitized data length to an appropriate data length code */ -u8 can_fd_len2dlc(u8 len); - struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, unsigned int txqs, unsigned int rxqs); #define alloc_candev(sizeof_priv, echo_skb_max) \ diff --git a/include/linux/can/length.h b/include/linux/can/length.h new file mode 100644 index 0000000000000..156b9d17969a7 --- /dev/null +++ b/include/linux/can/length.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 Oliver Hartkopp + */ + +#ifndef _CAN_LENGTH_H +#define _CAN_LENGTH_H + +/* + * can_cc_dlc2len(value) - convert a given data length code (dlc) of a + * Classical CAN frame into a valid data length of max. 8 bytes. + * + * To be used in the CAN netdriver receive path to ensure conformance with + * ISO 11898-1 Chapter 8.4.2.3 (DLC field) + */ +#define can_cc_dlc2len(dlc) (min_t(u8, (dlc), CAN_MAX_DLEN)) + +/* helper to get the data length code (DLC) for Classical CAN raw DLC access */ +static inline u8 can_get_cc_dlc(const struct can_frame *cf, const u32 ctrlmode) +{ + /* return len8_dlc as dlc value only if all conditions apply */ + if ((ctrlmode & CAN_CTRLMODE_CC_LEN8_DLC) && + (cf->len == CAN_MAX_DLEN) && + (cf->len8_dlc > CAN_MAX_DLEN && cf->len8_dlc <= CAN_MAX_RAW_DLC)) + return cf->len8_dlc; + + /* return the payload length as dlc value */ + return cf->len; +} + +/* helper to set len and len8_dlc value for Classical CAN raw DLC access */ +static inline void can_frame_set_cc_len(struct can_frame *cf, const u8 dlc, + const u32 ctrlmode) +{ + /* the caller already ensured that dlc is a value from 0 .. 15 */ + if (ctrlmode & CAN_CTRLMODE_CC_LEN8_DLC && dlc > CAN_MAX_DLEN) + cf->len8_dlc = dlc; + + /* limit the payload length 'len' to CAN_MAX_DLEN */ + cf->len = can_cc_dlc2len(dlc); +} + +/* get data length from raw data length code (DLC) */ +u8 can_fd_dlc2len(u8 dlc); + +/* map the sanitized data length to an appropriate data length code */ +u8 can_fd_len2dlc(u8 len); + +#endif /* !_CAN_LENGTH_H */ -- GitLab From 18f2dbfd2232212f53af9d249682f13a8335d54f Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:20 +0100 Subject: [PATCH 0626/2012] can: dev: move skb related into seperate file This patch moves the skb related code of the CAN device infrastructure into a separate file. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-6-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/Makefile | 1 + drivers/net/can/dev/dev.c | 213 --------------------------------- drivers/net/can/dev/skb.c | 220 +++++++++++++++++++++++++++++++++++ include/linux/can/dev.h | 76 ------------ include/linux/can/skb.h | 77 ++++++++++++ 5 files changed, 298 insertions(+), 289 deletions(-) create mode 100644 drivers/net/can/dev/skb.c diff --git a/drivers/net/can/dev/Makefile b/drivers/net/can/dev/Makefile index 5c647951e06d5..188bd53b113c5 100644 --- a/drivers/net/can/dev/Makefile +++ b/drivers/net/can/dev/Makefile @@ -5,5 +5,6 @@ can-dev-y += bittiming.o can-dev-y += dev.o can-dev-y += length.o can-dev-y += rx-offload.o +can-dev-y += skb.o can-dev-$(CONFIG_CAN_LEDS) += led.o diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 3372e99d5db77..fe71ff9ef8c93 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -132,149 +132,6 @@ void can_change_state(struct net_device *dev, struct can_frame *cf, } EXPORT_SYMBOL_GPL(can_change_state); -/* Local echo of CAN messages - * - * CAN network devices *should* support a local echo functionality - * (see Documentation/networking/can.rst). To test the handling of CAN - * interfaces that do not support the local echo both driver types are - * implemented. In the case that the driver does not support the echo - * the IFF_ECHO remains clear in dev->flags. This causes the PF_CAN core - * to perform the echo as a fallback solution. - */ -static void can_flush_echo_skb(struct net_device *dev) -{ - struct can_priv *priv = netdev_priv(dev); - struct net_device_stats *stats = &dev->stats; - int i; - - for (i = 0; i < priv->echo_skb_max; i++) { - if (priv->echo_skb[i]) { - kfree_skb(priv->echo_skb[i]); - priv->echo_skb[i] = NULL; - stats->tx_dropped++; - stats->tx_aborted_errors++; - } - } -} - -/* Put the skb on the stack to be looped backed locally lateron - * - * The function is typically called in the start_xmit function - * of the device driver. The driver must protect access to - * priv->echo_skb, if necessary. - */ -int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, - unsigned int idx) -{ - struct can_priv *priv = netdev_priv(dev); - - BUG_ON(idx >= priv->echo_skb_max); - - /* check flag whether this packet has to be looped back */ - if (!(dev->flags & IFF_ECHO) || skb->pkt_type != PACKET_LOOPBACK || - (skb->protocol != htons(ETH_P_CAN) && - skb->protocol != htons(ETH_P_CANFD))) { - kfree_skb(skb); - return 0; - } - - if (!priv->echo_skb[idx]) { - skb = can_create_echo_skb(skb); - if (!skb) - return -ENOMEM; - - /* make settings for echo to reduce code in irq context */ - skb->pkt_type = PACKET_BROADCAST; - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->dev = dev; - - /* save this skb for tx interrupt echo handling */ - priv->echo_skb[idx] = skb; - } else { - /* locking problem with netif_stop_queue() ?? */ - netdev_err(dev, "%s: BUG! echo_skb %d is occupied!\n", __func__, idx); - kfree_skb(skb); - return -EBUSY; - } - - return 0; -} -EXPORT_SYMBOL_GPL(can_put_echo_skb); - -struct sk_buff * -__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) -{ - struct can_priv *priv = netdev_priv(dev); - - if (idx >= priv->echo_skb_max) { - netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n", - __func__, idx, priv->echo_skb_max); - return NULL; - } - - if (priv->echo_skb[idx]) { - /* Using "struct canfd_frame::len" for the frame - * length is supported on both CAN and CANFD frames. - */ - struct sk_buff *skb = priv->echo_skb[idx]; - struct canfd_frame *cf = (struct canfd_frame *)skb->data; - - /* get the real payload length for netdev statistics */ - if (cf->can_id & CAN_RTR_FLAG) - *len_ptr = 0; - else - *len_ptr = cf->len; - - priv->echo_skb[idx] = NULL; - - return skb; - } - - return NULL; -} - -/* Get the skb from the stack and loop it back locally - * - * The function is typically called when the TX done interrupt - * is handled in the device driver. The driver must protect - * access to priv->echo_skb, if necessary. - */ -unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) -{ - struct sk_buff *skb; - u8 len; - - skb = __can_get_echo_skb(dev, idx, &len); - if (!skb) - return 0; - - skb_get(skb); - if (netif_rx(skb) == NET_RX_SUCCESS) - dev_consume_skb_any(skb); - else - dev_kfree_skb_any(skb); - - return len; -} -EXPORT_SYMBOL_GPL(can_get_echo_skb); - -/* Remove the skb from the stack and free it. - * - * The function is typically called when TX failed. - */ -void can_free_echo_skb(struct net_device *dev, unsigned int idx) -{ - struct can_priv *priv = netdev_priv(dev); - - BUG_ON(idx >= priv->echo_skb_max); - - if (priv->echo_skb[idx]) { - dev_kfree_skb_any(priv->echo_skb[idx]); - priv->echo_skb[idx] = NULL; - } -} -EXPORT_SYMBOL_GPL(can_free_echo_skb); - /* CAN device restart for bus-off recovery */ static void can_restart(struct net_device *dev) { @@ -379,76 +236,6 @@ static void can_setup(struct net_device *dev) dev->features = NETIF_F_HW_CSUM; } -struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) -{ - struct sk_buff *skb; - - skb = netdev_alloc_skb(dev, sizeof(struct can_skb_priv) + - sizeof(struct can_frame)); - if (unlikely(!skb)) - return NULL; - - skb->protocol = htons(ETH_P_CAN); - skb->pkt_type = PACKET_BROADCAST; - skb->ip_summed = CHECKSUM_UNNECESSARY; - - skb_reset_mac_header(skb); - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - - can_skb_reserve(skb); - can_skb_prv(skb)->ifindex = dev->ifindex; - can_skb_prv(skb)->skbcnt = 0; - - *cf = skb_put_zero(skb, sizeof(struct can_frame)); - - return skb; -} -EXPORT_SYMBOL_GPL(alloc_can_skb); - -struct sk_buff *alloc_canfd_skb(struct net_device *dev, - struct canfd_frame **cfd) -{ - struct sk_buff *skb; - - skb = netdev_alloc_skb(dev, sizeof(struct can_skb_priv) + - sizeof(struct canfd_frame)); - if (unlikely(!skb)) - return NULL; - - skb->protocol = htons(ETH_P_CANFD); - skb->pkt_type = PACKET_BROADCAST; - skb->ip_summed = CHECKSUM_UNNECESSARY; - - skb_reset_mac_header(skb); - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - - can_skb_reserve(skb); - can_skb_prv(skb)->ifindex = dev->ifindex; - can_skb_prv(skb)->skbcnt = 0; - - *cfd = skb_put_zero(skb, sizeof(struct canfd_frame)); - - return skb; -} -EXPORT_SYMBOL_GPL(alloc_canfd_skb); - -struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf) -{ - struct sk_buff *skb; - - skb = alloc_can_skb(dev, cf); - if (unlikely(!skb)) - return NULL; - - (*cf)->can_id = CAN_ERR_FLAG; - (*cf)->len = CAN_ERR_DLC; - - return skb; -} -EXPORT_SYMBOL_GPL(alloc_can_err_skb); - /* Allocate and setup space for the CAN network device */ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, unsigned int txqs, unsigned int rxqs) diff --git a/drivers/net/can/dev/skb.c b/drivers/net/can/dev/skb.c new file mode 100644 index 0000000000000..26cd597ff7714 --- /dev/null +++ b/drivers/net/can/dev/skb.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2005 Marc Kleine-Budde, Pengutronix + * Copyright (C) 2006 Andrey Volkov, Varma Electronics + * Copyright (C) 2008-2009 Wolfgang Grandegger + */ + +#include + +/* Local echo of CAN messages + * + * CAN network devices *should* support a local echo functionality + * (see Documentation/networking/can.rst). To test the handling of CAN + * interfaces that do not support the local echo both driver types are + * implemented. In the case that the driver does not support the echo + * the IFF_ECHO remains clear in dev->flags. This causes the PF_CAN core + * to perform the echo as a fallback solution. + */ +void can_flush_echo_skb(struct net_device *dev) +{ + struct can_priv *priv = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; + int i; + + for (i = 0; i < priv->echo_skb_max; i++) { + if (priv->echo_skb[i]) { + kfree_skb(priv->echo_skb[i]); + priv->echo_skb[i] = NULL; + stats->tx_dropped++; + stats->tx_aborted_errors++; + } + } +} + +/* Put the skb on the stack to be looped backed locally lateron + * + * The function is typically called in the start_xmit function + * of the device driver. The driver must protect access to + * priv->echo_skb, if necessary. + */ +int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, + unsigned int idx) +{ + struct can_priv *priv = netdev_priv(dev); + + BUG_ON(idx >= priv->echo_skb_max); + + /* check flag whether this packet has to be looped back */ + if (!(dev->flags & IFF_ECHO) || skb->pkt_type != PACKET_LOOPBACK || + (skb->protocol != htons(ETH_P_CAN) && + skb->protocol != htons(ETH_P_CANFD))) { + kfree_skb(skb); + return 0; + } + + if (!priv->echo_skb[idx]) { + skb = can_create_echo_skb(skb); + if (!skb) + return -ENOMEM; + + /* make settings for echo to reduce code in irq context */ + skb->pkt_type = PACKET_BROADCAST; + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->dev = dev; + + /* save this skb for tx interrupt echo handling */ + priv->echo_skb[idx] = skb; + } else { + /* locking problem with netif_stop_queue() ?? */ + netdev_err(dev, "%s: BUG! echo_skb %d is occupied!\n", __func__, idx); + kfree_skb(skb); + return -EBUSY; + } + + return 0; +} +EXPORT_SYMBOL_GPL(can_put_echo_skb); + +struct sk_buff * +__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) +{ + struct can_priv *priv = netdev_priv(dev); + + if (idx >= priv->echo_skb_max) { + netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n", + __func__, idx, priv->echo_skb_max); + return NULL; + } + + if (priv->echo_skb[idx]) { + /* Using "struct canfd_frame::len" for the frame + * length is supported on both CAN and CANFD frames. + */ + struct sk_buff *skb = priv->echo_skb[idx]; + struct canfd_frame *cf = (struct canfd_frame *)skb->data; + + /* get the real payload length for netdev statistics */ + if (cf->can_id & CAN_RTR_FLAG) + *len_ptr = 0; + else + *len_ptr = cf->len; + + priv->echo_skb[idx] = NULL; + + return skb; + } + + return NULL; +} + +/* Get the skb from the stack and loop it back locally + * + * The function is typically called when the TX done interrupt + * is handled in the device driver. The driver must protect + * access to priv->echo_skb, if necessary. + */ +unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) +{ + struct sk_buff *skb; + u8 len; + + skb = __can_get_echo_skb(dev, idx, &len); + if (!skb) + return 0; + + skb_get(skb); + if (netif_rx(skb) == NET_RX_SUCCESS) + dev_consume_skb_any(skb); + else + dev_kfree_skb_any(skb); + + return len; +} +EXPORT_SYMBOL_GPL(can_get_echo_skb); + +/* Remove the skb from the stack and free it. + * + * The function is typically called when TX failed. + */ +void can_free_echo_skb(struct net_device *dev, unsigned int idx) +{ + struct can_priv *priv = netdev_priv(dev); + + BUG_ON(idx >= priv->echo_skb_max); + + if (priv->echo_skb[idx]) { + dev_kfree_skb_any(priv->echo_skb[idx]); + priv->echo_skb[idx] = NULL; + } +} +EXPORT_SYMBOL_GPL(can_free_echo_skb); + +struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) +{ + struct sk_buff *skb; + + skb = netdev_alloc_skb(dev, sizeof(struct can_skb_priv) + + sizeof(struct can_frame)); + if (unlikely(!skb)) + return NULL; + + skb->protocol = htons(ETH_P_CAN); + skb->pkt_type = PACKET_BROADCAST; + skb->ip_summed = CHECKSUM_UNNECESSARY; + + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; + + *cf = skb_put_zero(skb, sizeof(struct can_frame)); + + return skb; +} +EXPORT_SYMBOL_GPL(alloc_can_skb); + +struct sk_buff *alloc_canfd_skb(struct net_device *dev, + struct canfd_frame **cfd) +{ + struct sk_buff *skb; + + skb = netdev_alloc_skb(dev, sizeof(struct can_skb_priv) + + sizeof(struct canfd_frame)); + if (unlikely(!skb)) + return NULL; + + skb->protocol = htons(ETH_P_CANFD); + skb->pkt_type = PACKET_BROADCAST; + skb->ip_summed = CHECKSUM_UNNECESSARY; + + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; + + *cfd = skb_put_zero(skb, sizeof(struct canfd_frame)); + + return skb; +} +EXPORT_SYMBOL_GPL(alloc_canfd_skb); + +struct sk_buff *alloc_can_err_skb(struct net_device *dev, struct can_frame **cf) +{ + struct sk_buff *skb; + + skb = alloc_can_skb(dev, cf); + if (unlikely(!skb)) + return NULL; + + (*cf)->can_id = CAN_ERR_FLAG; + (*cf)->len = CAN_ERR_DLC; + + return skb; +} +EXPORT_SYMBOL_GPL(alloc_can_err_skb); diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index d75fba1d030a4..4a26e128af7f2 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -84,69 +84,6 @@ struct can_priv { #endif }; -/* Check for outgoing skbs that have not been created by the CAN subsystem */ -static inline bool can_skb_headroom_valid(struct net_device *dev, - struct sk_buff *skb) -{ - /* af_packet creates a headroom of HH_DATA_MOD bytes which is fine */ - if (WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct can_skb_priv))) - return false; - - /* af_packet does not apply CAN skb specific settings */ - if (skb->ip_summed == CHECKSUM_NONE) { - /* init headroom */ - can_skb_prv(skb)->ifindex = dev->ifindex; - can_skb_prv(skb)->skbcnt = 0; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - - /* perform proper loopback on capable devices */ - if (dev->flags & IFF_ECHO) - skb->pkt_type = PACKET_LOOPBACK; - else - skb->pkt_type = PACKET_HOST; - - skb_reset_mac_header(skb); - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - } - - return true; -} - -/* Drop a given socketbuffer if it does not contain a valid CAN frame. */ -static inline bool can_dropped_invalid_skb(struct net_device *dev, - struct sk_buff *skb) -{ - const struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - - if (skb->protocol == htons(ETH_P_CAN)) { - if (unlikely(skb->len != CAN_MTU || - cfd->len > CAN_MAX_DLEN)) - goto inval_skb; - } else if (skb->protocol == htons(ETH_P_CANFD)) { - if (unlikely(skb->len != CANFD_MTU || - cfd->len > CANFD_MAX_DLEN)) - goto inval_skb; - } else - goto inval_skb; - - if (!can_skb_headroom_valid(dev, skb)) - goto inval_skb; - - return false; - -inval_skb: - kfree_skb(skb); - dev->stats.tx_dropped++; - return true; -} - -static inline bool can_is_canfd_skb(const struct sk_buff *skb) -{ - /* the CAN specific type of skb is identified by its data length */ - return skb->len == CANFD_MTU; -} /* helper to define static CAN controller features at device creation time */ static inline void can_set_static_ctrlmode(struct net_device *dev, @@ -187,23 +124,10 @@ void can_bus_off(struct net_device *dev); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state); -int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, - unsigned int idx); -struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, - u8 *len_ptr); -unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); -void can_free_echo_skb(struct net_device *dev, unsigned int idx); - #ifdef CONFIG_OF void of_can_transceiver(struct net_device *dev); #else static inline void of_can_transceiver(struct net_device *dev) { } #endif -struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); -struct sk_buff *alloc_canfd_skb(struct net_device *dev, - struct canfd_frame **cfd); -struct sk_buff *alloc_can_err_skb(struct net_device *dev, - struct can_frame **cf); - #endif /* !_CAN_DEV_H */ diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index fc61cf4eff1c9..c90ebbd3008c5 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -16,6 +16,19 @@ #include #include +void can_flush_echo_skb(struct net_device *dev); +int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, + unsigned int idx); +struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, + u8 *len_ptr); +unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); +void can_free_echo_skb(struct net_device *dev, unsigned int idx); +struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); +struct sk_buff *alloc_canfd_skb(struct net_device *dev, + struct canfd_frame **cfd); +struct sk_buff *alloc_can_err_skb(struct net_device *dev, + struct can_frame **cf); + /* * The struct can_skb_priv is used to transport additional information along * with the stored struct can(fd)_frame that can not be contained in existing @@ -74,4 +87,68 @@ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) return nskb; } +/* Check for outgoing skbs that have not been created by the CAN subsystem */ +static inline bool can_skb_headroom_valid(struct net_device *dev, + struct sk_buff *skb) +{ + /* af_packet creates a headroom of HH_DATA_MOD bytes which is fine */ + if (WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct can_skb_priv))) + return false; + + /* af_packet does not apply CAN skb specific settings */ + if (skb->ip_summed == CHECKSUM_NONE) { + /* init headroom */ + can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* perform proper loopback on capable devices */ + if (dev->flags & IFF_ECHO) + skb->pkt_type = PACKET_LOOPBACK; + else + skb->pkt_type = PACKET_HOST; + + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + } + + return true; +} + +/* Drop a given socketbuffer if it does not contain a valid CAN frame. */ +static inline bool can_dropped_invalid_skb(struct net_device *dev, + struct sk_buff *skb) +{ + const struct canfd_frame *cfd = (struct canfd_frame *)skb->data; + + if (skb->protocol == htons(ETH_P_CAN)) { + if (unlikely(skb->len != CAN_MTU || + cfd->len > CAN_MAX_DLEN)) + goto inval_skb; + } else if (skb->protocol == htons(ETH_P_CANFD)) { + if (unlikely(skb->len != CANFD_MTU || + cfd->len > CANFD_MAX_DLEN)) + goto inval_skb; + } else + goto inval_skb; + + if (!can_skb_headroom_valid(dev, skb)) + goto inval_skb; + + return false; + +inval_skb: + kfree_skb(skb); + dev->stats.tx_dropped++; + return true; +} + +static inline bool can_is_canfd_skb(const struct sk_buff *skb) +{ + /* the CAN specific type of skb is identified by its data length */ + return skb->len == CANFD_MTU; +} + #endif /* !_CAN_SKB_H */ -- GitLab From 0a042c6ec991e4d33062ee52e9e23f615bc659f9 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:21 +0100 Subject: [PATCH 0627/2012] can: dev: move netlink related code into seperate file This patch moves the netlink related code of the CAN device infrastructure into a separate file. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-7-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/Makefile | 1 + drivers/net/can/dev/dev.c | 370 +-------------------------------- drivers/net/can/dev/netlink.c | 379 ++++++++++++++++++++++++++++++++++ include/linux/can/dev.h | 6 + 4 files changed, 389 insertions(+), 367 deletions(-) create mode 100644 drivers/net/can/dev/netlink.c diff --git a/drivers/net/can/dev/Makefile b/drivers/net/can/dev/Makefile index 188bd53b113c5..3e2e207861fc9 100644 --- a/drivers/net/can/dev/Makefile +++ b/drivers/net/can/dev/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_CAN_DEV) += can-dev.o can-dev-y += bittiming.o can-dev-y += dev.o can-dev-y += length.o +can-dev-y += netlink.o can-dev-y += rx-offload.o can-dev-y += skb.o diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index fe71ff9ef8c93..f580f0ac64975 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -14,10 +14,8 @@ #include #include #include -#include #include #include -#include #define MOD_DESC "CAN device driver interface" @@ -223,7 +221,7 @@ void can_bus_off(struct net_device *dev) } EXPORT_SYMBOL_GPL(can_bus_off); -static void can_setup(struct net_device *dev) +void can_setup(struct net_device *dev) { dev->type = ARPHRD_CAN; dev->mtu = CAN_MTU; @@ -399,368 +397,6 @@ void close_candev(struct net_device *dev) } EXPORT_SYMBOL_GPL(close_candev); -/* CAN netlink interface */ -static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = { - [IFLA_CAN_STATE] = { .type = NLA_U32 }, - [IFLA_CAN_CTRLMODE] = { .len = sizeof(struct can_ctrlmode) }, - [IFLA_CAN_RESTART_MS] = { .type = NLA_U32 }, - [IFLA_CAN_RESTART] = { .type = NLA_U32 }, - [IFLA_CAN_BITTIMING] = { .len = sizeof(struct can_bittiming) }, - [IFLA_CAN_BITTIMING_CONST] - = { .len = sizeof(struct can_bittiming_const) }, - [IFLA_CAN_CLOCK] = { .len = sizeof(struct can_clock) }, - [IFLA_CAN_BERR_COUNTER] = { .len = sizeof(struct can_berr_counter) }, - [IFLA_CAN_DATA_BITTIMING] - = { .len = sizeof(struct can_bittiming) }, - [IFLA_CAN_DATA_BITTIMING_CONST] - = { .len = sizeof(struct can_bittiming_const) }, - [IFLA_CAN_TERMINATION] = { .type = NLA_U16 }, -}; - -static int can_validate(struct nlattr *tb[], struct nlattr *data[], - struct netlink_ext_ack *extack) -{ - bool is_can_fd = false; - - /* Make sure that valid CAN FD configurations always consist of - * - nominal/arbitration bittiming - * - data bittiming - * - control mode with CAN_CTRLMODE_FD set - */ - - if (!data) - return 0; - - if (data[IFLA_CAN_CTRLMODE]) { - struct can_ctrlmode *cm = nla_data(data[IFLA_CAN_CTRLMODE]); - - is_can_fd = cm->flags & cm->mask & CAN_CTRLMODE_FD; - } - - if (is_can_fd) { - if (!data[IFLA_CAN_BITTIMING] || !data[IFLA_CAN_DATA_BITTIMING]) - return -EOPNOTSUPP; - } - - if (data[IFLA_CAN_DATA_BITTIMING]) { - if (!is_can_fd || !data[IFLA_CAN_BITTIMING]) - return -EOPNOTSUPP; - } - - return 0; -} - -static int can_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[], - struct netlink_ext_ack *extack) -{ - struct can_priv *priv = netdev_priv(dev); - int err; - - /* We need synchronization with dev->stop() */ - ASSERT_RTNL(); - - if (data[IFLA_CAN_BITTIMING]) { - struct can_bittiming bt; - - /* Do not allow changing bittiming while running */ - if (dev->flags & IFF_UP) - return -EBUSY; - - /* Calculate bittiming parameters based on - * bittiming_const if set, otherwise pass bitrate - * directly via do_set_bitrate(). Bail out if neither - * is given. - */ - if (!priv->bittiming_const && !priv->do_set_bittiming) - return -EOPNOTSUPP; - - memcpy(&bt, nla_data(data[IFLA_CAN_BITTIMING]), sizeof(bt)); - err = can_get_bittiming(dev, &bt, - priv->bittiming_const, - priv->bitrate_const, - priv->bitrate_const_cnt); - if (err) - return err; - - if (priv->bitrate_max && bt.bitrate > priv->bitrate_max) { - netdev_err(dev, "arbitration bitrate surpasses transceiver capabilities of %d bps\n", - priv->bitrate_max); - return -EINVAL; - } - - memcpy(&priv->bittiming, &bt, sizeof(bt)); - - if (priv->do_set_bittiming) { - /* Finally, set the bit-timing registers */ - err = priv->do_set_bittiming(dev); - if (err) - return err; - } - } - - if (data[IFLA_CAN_CTRLMODE]) { - struct can_ctrlmode *cm; - u32 ctrlstatic; - u32 maskedflags; - - /* Do not allow changing controller mode while running */ - if (dev->flags & IFF_UP) - return -EBUSY; - cm = nla_data(data[IFLA_CAN_CTRLMODE]); - ctrlstatic = priv->ctrlmode_static; - maskedflags = cm->flags & cm->mask; - - /* check whether provided bits are allowed to be passed */ - if (cm->mask & ~(priv->ctrlmode_supported | ctrlstatic)) - return -EOPNOTSUPP; - - /* do not check for static fd-non-iso if 'fd' is disabled */ - if (!(maskedflags & CAN_CTRLMODE_FD)) - ctrlstatic &= ~CAN_CTRLMODE_FD_NON_ISO; - - /* make sure static options are provided by configuration */ - if ((maskedflags & ctrlstatic) != ctrlstatic) - return -EOPNOTSUPP; - - /* clear bits to be modified and copy the flag values */ - priv->ctrlmode &= ~cm->mask; - priv->ctrlmode |= maskedflags; - - /* CAN_CTRLMODE_FD can only be set when driver supports FD */ - if (priv->ctrlmode & CAN_CTRLMODE_FD) - dev->mtu = CANFD_MTU; - else - dev->mtu = CAN_MTU; - } - - if (data[IFLA_CAN_RESTART_MS]) { - /* Do not allow changing restart delay while running */ - if (dev->flags & IFF_UP) - return -EBUSY; - priv->restart_ms = nla_get_u32(data[IFLA_CAN_RESTART_MS]); - } - - if (data[IFLA_CAN_RESTART]) { - /* Do not allow a restart while not running */ - if (!(dev->flags & IFF_UP)) - return -EINVAL; - err = can_restart_now(dev); - if (err) - return err; - } - - if (data[IFLA_CAN_DATA_BITTIMING]) { - struct can_bittiming dbt; - - /* Do not allow changing bittiming while running */ - if (dev->flags & IFF_UP) - return -EBUSY; - - /* Calculate bittiming parameters based on - * data_bittiming_const if set, otherwise pass bitrate - * directly via do_set_bitrate(). Bail out if neither - * is given. - */ - if (!priv->data_bittiming_const && !priv->do_set_data_bittiming) - return -EOPNOTSUPP; - - memcpy(&dbt, nla_data(data[IFLA_CAN_DATA_BITTIMING]), - sizeof(dbt)); - err = can_get_bittiming(dev, &dbt, - priv->data_bittiming_const, - priv->data_bitrate_const, - priv->data_bitrate_const_cnt); - if (err) - return err; - - if (priv->bitrate_max && dbt.bitrate > priv->bitrate_max) { - netdev_err(dev, "canfd data bitrate surpasses transceiver capabilities of %d bps\n", - priv->bitrate_max); - return -EINVAL; - } - - memcpy(&priv->data_bittiming, &dbt, sizeof(dbt)); - - if (priv->do_set_data_bittiming) { - /* Finally, set the bit-timing registers */ - err = priv->do_set_data_bittiming(dev); - if (err) - return err; - } - } - - if (data[IFLA_CAN_TERMINATION]) { - const u16 termval = nla_get_u16(data[IFLA_CAN_TERMINATION]); - const unsigned int num_term = priv->termination_const_cnt; - unsigned int i; - - if (!priv->do_set_termination) - return -EOPNOTSUPP; - - /* check whether given value is supported by the interface */ - for (i = 0; i < num_term; i++) { - if (termval == priv->termination_const[i]) - break; - } - if (i >= num_term) - return -EINVAL; - - /* Finally, set the termination value */ - err = priv->do_set_termination(dev, termval); - if (err) - return err; - - priv->termination = termval; - } - - return 0; -} - -static size_t can_get_size(const struct net_device *dev) -{ - struct can_priv *priv = netdev_priv(dev); - size_t size = 0; - - if (priv->bittiming.bitrate) /* IFLA_CAN_BITTIMING */ - size += nla_total_size(sizeof(struct can_bittiming)); - if (priv->bittiming_const) /* IFLA_CAN_BITTIMING_CONST */ - size += nla_total_size(sizeof(struct can_bittiming_const)); - size += nla_total_size(sizeof(struct can_clock)); /* IFLA_CAN_CLOCK */ - size += nla_total_size(sizeof(u32)); /* IFLA_CAN_STATE */ - size += nla_total_size(sizeof(struct can_ctrlmode)); /* IFLA_CAN_CTRLMODE */ - size += nla_total_size(sizeof(u32)); /* IFLA_CAN_RESTART_MS */ - if (priv->do_get_berr_counter) /* IFLA_CAN_BERR_COUNTER */ - size += nla_total_size(sizeof(struct can_berr_counter)); - if (priv->data_bittiming.bitrate) /* IFLA_CAN_DATA_BITTIMING */ - size += nla_total_size(sizeof(struct can_bittiming)); - if (priv->data_bittiming_const) /* IFLA_CAN_DATA_BITTIMING_CONST */ - size += nla_total_size(sizeof(struct can_bittiming_const)); - if (priv->termination_const) { - size += nla_total_size(sizeof(priv->termination)); /* IFLA_CAN_TERMINATION */ - size += nla_total_size(sizeof(*priv->termination_const) * /* IFLA_CAN_TERMINATION_CONST */ - priv->termination_const_cnt); - } - if (priv->bitrate_const) /* IFLA_CAN_BITRATE_CONST */ - size += nla_total_size(sizeof(*priv->bitrate_const) * - priv->bitrate_const_cnt); - if (priv->data_bitrate_const) /* IFLA_CAN_DATA_BITRATE_CONST */ - size += nla_total_size(sizeof(*priv->data_bitrate_const) * - priv->data_bitrate_const_cnt); - size += sizeof(priv->bitrate_max); /* IFLA_CAN_BITRATE_MAX */ - - return size; -} - -static int can_fill_info(struct sk_buff *skb, const struct net_device *dev) -{ - struct can_priv *priv = netdev_priv(dev); - struct can_ctrlmode cm = {.flags = priv->ctrlmode}; - struct can_berr_counter bec; - enum can_state state = priv->state; - - if (priv->do_get_state) - priv->do_get_state(dev, &state); - - if ((priv->bittiming.bitrate && - nla_put(skb, IFLA_CAN_BITTIMING, - sizeof(priv->bittiming), &priv->bittiming)) || - - (priv->bittiming_const && - nla_put(skb, IFLA_CAN_BITTIMING_CONST, - sizeof(*priv->bittiming_const), priv->bittiming_const)) || - - nla_put(skb, IFLA_CAN_CLOCK, sizeof(priv->clock), &priv->clock) || - nla_put_u32(skb, IFLA_CAN_STATE, state) || - nla_put(skb, IFLA_CAN_CTRLMODE, sizeof(cm), &cm) || - nla_put_u32(skb, IFLA_CAN_RESTART_MS, priv->restart_ms) || - - (priv->do_get_berr_counter && - !priv->do_get_berr_counter(dev, &bec) && - nla_put(skb, IFLA_CAN_BERR_COUNTER, sizeof(bec), &bec)) || - - (priv->data_bittiming.bitrate && - nla_put(skb, IFLA_CAN_DATA_BITTIMING, - sizeof(priv->data_bittiming), &priv->data_bittiming)) || - - (priv->data_bittiming_const && - nla_put(skb, IFLA_CAN_DATA_BITTIMING_CONST, - sizeof(*priv->data_bittiming_const), - priv->data_bittiming_const)) || - - (priv->termination_const && - (nla_put_u16(skb, IFLA_CAN_TERMINATION, priv->termination) || - nla_put(skb, IFLA_CAN_TERMINATION_CONST, - sizeof(*priv->termination_const) * - priv->termination_const_cnt, - priv->termination_const))) || - - (priv->bitrate_const && - nla_put(skb, IFLA_CAN_BITRATE_CONST, - sizeof(*priv->bitrate_const) * - priv->bitrate_const_cnt, - priv->bitrate_const)) || - - (priv->data_bitrate_const && - nla_put(skb, IFLA_CAN_DATA_BITRATE_CONST, - sizeof(*priv->data_bitrate_const) * - priv->data_bitrate_const_cnt, - priv->data_bitrate_const)) || - - (nla_put(skb, IFLA_CAN_BITRATE_MAX, - sizeof(priv->bitrate_max), - &priv->bitrate_max)) - ) - - return -EMSGSIZE; - - return 0; -} - -static size_t can_get_xstats_size(const struct net_device *dev) -{ - return sizeof(struct can_device_stats); -} - -static int can_fill_xstats(struct sk_buff *skb, const struct net_device *dev) -{ - struct can_priv *priv = netdev_priv(dev); - - if (nla_put(skb, IFLA_INFO_XSTATS, - sizeof(priv->can_stats), &priv->can_stats)) - goto nla_put_failure; - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - -static int can_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], - struct netlink_ext_ack *extack) -{ - return -EOPNOTSUPP; -} - -static void can_dellink(struct net_device *dev, struct list_head *head) -{ -} - -static struct rtnl_link_ops can_link_ops __read_mostly = { - .kind = "can", - .maxtype = IFLA_CAN_MAX, - .policy = can_policy, - .setup = can_setup, - .validate = can_validate, - .newlink = can_newlink, - .changelink = can_changelink, - .dellink = can_dellink, - .get_size = can_get_size, - .fill_info = can_fill_info, - .get_xstats_size = can_get_xstats_size, - .fill_xstats = can_fill_xstats, -}; - /* Register the CAN network device */ int register_candev(struct net_device *dev) { @@ -812,7 +448,7 @@ static __init int can_dev_init(void) can_led_notifier_init(); - err = rtnl_link_register(&can_link_ops); + err = can_netlink_register(); if (!err) pr_info(MOD_DESC "\n"); @@ -822,7 +458,7 @@ module_init(can_dev_init); static __exit void can_dev_exit(void) { - rtnl_link_unregister(&can_link_ops); + can_netlink_unregister(); can_led_notifier_exit(); } diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c new file mode 100644 index 0000000000000..3ae884cdf677d --- /dev/null +++ b/drivers/net/can/dev/netlink.c @@ -0,0 +1,379 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2005 Marc Kleine-Budde, Pengutronix + * Copyright (C) 2006 Andrey Volkov, Varma Electronics + * Copyright (C) 2008-2009 Wolfgang Grandegger + */ + +#include +#include + +static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = { + [IFLA_CAN_STATE] = { .type = NLA_U32 }, + [IFLA_CAN_CTRLMODE] = { .len = sizeof(struct can_ctrlmode) }, + [IFLA_CAN_RESTART_MS] = { .type = NLA_U32 }, + [IFLA_CAN_RESTART] = { .type = NLA_U32 }, + [IFLA_CAN_BITTIMING] = { .len = sizeof(struct can_bittiming) }, + [IFLA_CAN_BITTIMING_CONST] + = { .len = sizeof(struct can_bittiming_const) }, + [IFLA_CAN_CLOCK] = { .len = sizeof(struct can_clock) }, + [IFLA_CAN_BERR_COUNTER] = { .len = sizeof(struct can_berr_counter) }, + [IFLA_CAN_DATA_BITTIMING] + = { .len = sizeof(struct can_bittiming) }, + [IFLA_CAN_DATA_BITTIMING_CONST] + = { .len = sizeof(struct can_bittiming_const) }, + [IFLA_CAN_TERMINATION] = { .type = NLA_U16 }, +}; + +static int can_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + bool is_can_fd = false; + + /* Make sure that valid CAN FD configurations always consist of + * - nominal/arbitration bittiming + * - data bittiming + * - control mode with CAN_CTRLMODE_FD set + */ + + if (!data) + return 0; + + if (data[IFLA_CAN_CTRLMODE]) { + struct can_ctrlmode *cm = nla_data(data[IFLA_CAN_CTRLMODE]); + + is_can_fd = cm->flags & cm->mask & CAN_CTRLMODE_FD; + } + + if (is_can_fd) { + if (!data[IFLA_CAN_BITTIMING] || !data[IFLA_CAN_DATA_BITTIMING]) + return -EOPNOTSUPP; + } + + if (data[IFLA_CAN_DATA_BITTIMING]) { + if (!is_can_fd || !data[IFLA_CAN_BITTIMING]) + return -EOPNOTSUPP; + } + + return 0; +} + +static int can_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct can_priv *priv = netdev_priv(dev); + int err; + + /* We need synchronization with dev->stop() */ + ASSERT_RTNL(); + + if (data[IFLA_CAN_BITTIMING]) { + struct can_bittiming bt; + + /* Do not allow changing bittiming while running */ + if (dev->flags & IFF_UP) + return -EBUSY; + + /* Calculate bittiming parameters based on + * bittiming_const if set, otherwise pass bitrate + * directly via do_set_bitrate(). Bail out if neither + * is given. + */ + if (!priv->bittiming_const && !priv->do_set_bittiming) + return -EOPNOTSUPP; + + memcpy(&bt, nla_data(data[IFLA_CAN_BITTIMING]), sizeof(bt)); + err = can_get_bittiming(dev, &bt, + priv->bittiming_const, + priv->bitrate_const, + priv->bitrate_const_cnt); + if (err) + return err; + + if (priv->bitrate_max && bt.bitrate > priv->bitrate_max) { + netdev_err(dev, "arbitration bitrate surpasses transceiver capabilities of %d bps\n", + priv->bitrate_max); + return -EINVAL; + } + + memcpy(&priv->bittiming, &bt, sizeof(bt)); + + if (priv->do_set_bittiming) { + /* Finally, set the bit-timing registers */ + err = priv->do_set_bittiming(dev); + if (err) + return err; + } + } + + if (data[IFLA_CAN_CTRLMODE]) { + struct can_ctrlmode *cm; + u32 ctrlstatic; + u32 maskedflags; + + /* Do not allow changing controller mode while running */ + if (dev->flags & IFF_UP) + return -EBUSY; + cm = nla_data(data[IFLA_CAN_CTRLMODE]); + ctrlstatic = priv->ctrlmode_static; + maskedflags = cm->flags & cm->mask; + + /* check whether provided bits are allowed to be passed */ + if (cm->mask & ~(priv->ctrlmode_supported | ctrlstatic)) + return -EOPNOTSUPP; + + /* do not check for static fd-non-iso if 'fd' is disabled */ + if (!(maskedflags & CAN_CTRLMODE_FD)) + ctrlstatic &= ~CAN_CTRLMODE_FD_NON_ISO; + + /* make sure static options are provided by configuration */ + if ((maskedflags & ctrlstatic) != ctrlstatic) + return -EOPNOTSUPP; + + /* clear bits to be modified and copy the flag values */ + priv->ctrlmode &= ~cm->mask; + priv->ctrlmode |= maskedflags; + + /* CAN_CTRLMODE_FD can only be set when driver supports FD */ + if (priv->ctrlmode & CAN_CTRLMODE_FD) + dev->mtu = CANFD_MTU; + else + dev->mtu = CAN_MTU; + } + + if (data[IFLA_CAN_RESTART_MS]) { + /* Do not allow changing restart delay while running */ + if (dev->flags & IFF_UP) + return -EBUSY; + priv->restart_ms = nla_get_u32(data[IFLA_CAN_RESTART_MS]); + } + + if (data[IFLA_CAN_RESTART]) { + /* Do not allow a restart while not running */ + if (!(dev->flags & IFF_UP)) + return -EINVAL; + err = can_restart_now(dev); + if (err) + return err; + } + + if (data[IFLA_CAN_DATA_BITTIMING]) { + struct can_bittiming dbt; + + /* Do not allow changing bittiming while running */ + if (dev->flags & IFF_UP) + return -EBUSY; + + /* Calculate bittiming parameters based on + * data_bittiming_const if set, otherwise pass bitrate + * directly via do_set_bitrate(). Bail out if neither + * is given. + */ + if (!priv->data_bittiming_const && !priv->do_set_data_bittiming) + return -EOPNOTSUPP; + + memcpy(&dbt, nla_data(data[IFLA_CAN_DATA_BITTIMING]), + sizeof(dbt)); + err = can_get_bittiming(dev, &dbt, + priv->data_bittiming_const, + priv->data_bitrate_const, + priv->data_bitrate_const_cnt); + if (err) + return err; + + if (priv->bitrate_max && dbt.bitrate > priv->bitrate_max) { + netdev_err(dev, "canfd data bitrate surpasses transceiver capabilities of %d bps\n", + priv->bitrate_max); + return -EINVAL; + } + + memcpy(&priv->data_bittiming, &dbt, sizeof(dbt)); + + if (priv->do_set_data_bittiming) { + /* Finally, set the bit-timing registers */ + err = priv->do_set_data_bittiming(dev); + if (err) + return err; + } + } + + if (data[IFLA_CAN_TERMINATION]) { + const u16 termval = nla_get_u16(data[IFLA_CAN_TERMINATION]); + const unsigned int num_term = priv->termination_const_cnt; + unsigned int i; + + if (!priv->do_set_termination) + return -EOPNOTSUPP; + + /* check whether given value is supported by the interface */ + for (i = 0; i < num_term; i++) { + if (termval == priv->termination_const[i]) + break; + } + if (i >= num_term) + return -EINVAL; + + /* Finally, set the termination value */ + err = priv->do_set_termination(dev, termval); + if (err) + return err; + + priv->termination = termval; + } + + return 0; +} + +static size_t can_get_size(const struct net_device *dev) +{ + struct can_priv *priv = netdev_priv(dev); + size_t size = 0; + + if (priv->bittiming.bitrate) /* IFLA_CAN_BITTIMING */ + size += nla_total_size(sizeof(struct can_bittiming)); + if (priv->bittiming_const) /* IFLA_CAN_BITTIMING_CONST */ + size += nla_total_size(sizeof(struct can_bittiming_const)); + size += nla_total_size(sizeof(struct can_clock)); /* IFLA_CAN_CLOCK */ + size += nla_total_size(sizeof(u32)); /* IFLA_CAN_STATE */ + size += nla_total_size(sizeof(struct can_ctrlmode)); /* IFLA_CAN_CTRLMODE */ + size += nla_total_size(sizeof(u32)); /* IFLA_CAN_RESTART_MS */ + if (priv->do_get_berr_counter) /* IFLA_CAN_BERR_COUNTER */ + size += nla_total_size(sizeof(struct can_berr_counter)); + if (priv->data_bittiming.bitrate) /* IFLA_CAN_DATA_BITTIMING */ + size += nla_total_size(sizeof(struct can_bittiming)); + if (priv->data_bittiming_const) /* IFLA_CAN_DATA_BITTIMING_CONST */ + size += nla_total_size(sizeof(struct can_bittiming_const)); + if (priv->termination_const) { + size += nla_total_size(sizeof(priv->termination)); /* IFLA_CAN_TERMINATION */ + size += nla_total_size(sizeof(*priv->termination_const) * /* IFLA_CAN_TERMINATION_CONST */ + priv->termination_const_cnt); + } + if (priv->bitrate_const) /* IFLA_CAN_BITRATE_CONST */ + size += nla_total_size(sizeof(*priv->bitrate_const) * + priv->bitrate_const_cnt); + if (priv->data_bitrate_const) /* IFLA_CAN_DATA_BITRATE_CONST */ + size += nla_total_size(sizeof(*priv->data_bitrate_const) * + priv->data_bitrate_const_cnt); + size += sizeof(priv->bitrate_max); /* IFLA_CAN_BITRATE_MAX */ + + return size; +} + +static int can_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct can_priv *priv = netdev_priv(dev); + struct can_ctrlmode cm = {.flags = priv->ctrlmode}; + struct can_berr_counter bec; + enum can_state state = priv->state; + + if (priv->do_get_state) + priv->do_get_state(dev, &state); + + if ((priv->bittiming.bitrate && + nla_put(skb, IFLA_CAN_BITTIMING, + sizeof(priv->bittiming), &priv->bittiming)) || + + (priv->bittiming_const && + nla_put(skb, IFLA_CAN_BITTIMING_CONST, + sizeof(*priv->bittiming_const), priv->bittiming_const)) || + + nla_put(skb, IFLA_CAN_CLOCK, sizeof(priv->clock), &priv->clock) || + nla_put_u32(skb, IFLA_CAN_STATE, state) || + nla_put(skb, IFLA_CAN_CTRLMODE, sizeof(cm), &cm) || + nla_put_u32(skb, IFLA_CAN_RESTART_MS, priv->restart_ms) || + + (priv->do_get_berr_counter && + !priv->do_get_berr_counter(dev, &bec) && + nla_put(skb, IFLA_CAN_BERR_COUNTER, sizeof(bec), &bec)) || + + (priv->data_bittiming.bitrate && + nla_put(skb, IFLA_CAN_DATA_BITTIMING, + sizeof(priv->data_bittiming), &priv->data_bittiming)) || + + (priv->data_bittiming_const && + nla_put(skb, IFLA_CAN_DATA_BITTIMING_CONST, + sizeof(*priv->data_bittiming_const), + priv->data_bittiming_const)) || + + (priv->termination_const && + (nla_put_u16(skb, IFLA_CAN_TERMINATION, priv->termination) || + nla_put(skb, IFLA_CAN_TERMINATION_CONST, + sizeof(*priv->termination_const) * + priv->termination_const_cnt, + priv->termination_const))) || + + (priv->bitrate_const && + nla_put(skb, IFLA_CAN_BITRATE_CONST, + sizeof(*priv->bitrate_const) * + priv->bitrate_const_cnt, + priv->bitrate_const)) || + + (priv->data_bitrate_const && + nla_put(skb, IFLA_CAN_DATA_BITRATE_CONST, + sizeof(*priv->data_bitrate_const) * + priv->data_bitrate_const_cnt, + priv->data_bitrate_const)) || + + (nla_put(skb, IFLA_CAN_BITRATE_MAX, + sizeof(priv->bitrate_max), + &priv->bitrate_max)) + ) + + return -EMSGSIZE; + + return 0; +} + +static size_t can_get_xstats_size(const struct net_device *dev) +{ + return sizeof(struct can_device_stats); +} + +static int can_fill_xstats(struct sk_buff *skb, const struct net_device *dev) +{ + struct can_priv *priv = netdev_priv(dev); + + if (nla_put(skb, IFLA_INFO_XSTATS, + sizeof(priv->can_stats), &priv->can_stats)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int can_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static void can_dellink(struct net_device *dev, struct list_head *head) +{ +} + +struct rtnl_link_ops can_link_ops __read_mostly = { + .kind = "can", + .maxtype = IFLA_CAN_MAX, + .policy = can_policy, + .setup = can_setup, + .validate = can_validate, + .newlink = can_newlink, + .changelink = can_changelink, + .dellink = can_dellink, + .get_size = can_get_size, + .fill_info = can_fill_info, + .get_xstats_size = can_get_xstats_size, + .fill_xstats = can_fill_xstats, +}; + +int can_netlink_register(void) +{ + return rtnl_link_register(&can_link_ops); +} + +void can_netlink_unregister(void) +{ + rtnl_link_unregister(&can_link_ops); +} diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 4a26e128af7f2..7faf6a37d5b2b 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -100,6 +100,8 @@ static inline void can_set_static_ctrlmode(struct net_device *dev, dev->mtu = CANFD_MTU; } +void can_setup(struct net_device *dev); + struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max, unsigned int txqs, unsigned int rxqs); #define alloc_candev(sizeof_priv, echo_skb_max) \ @@ -130,4 +132,8 @@ void of_can_transceiver(struct net_device *dev); static inline void of_can_transceiver(struct net_device *dev) { } #endif +extern struct rtnl_link_ops can_link_ops; +int can_netlink_register(void); +void can_netlink_unregister(void); + #endif /* !_CAN_DEV_H */ -- GitLab From 1ea0a522896d0edcac9a1e071658cceaa94d00ef Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:22 +0100 Subject: [PATCH 0628/2012] can: length: convert to kernel coding style This patch converts the file into the kernel coding style. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-8-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/length.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/net/can/dev/length.c b/drivers/net/can/dev/length.c index 6fe18aa23ec9a..5e7d481717ea6 100644 --- a/drivers/net/can/dev/length.c +++ b/drivers/net/can/dev/length.c @@ -6,8 +6,10 @@ /* CAN DLC to real data length conversion helpers */ -static const u8 dlc2len[] = {0, 1, 2, 3, 4, 5, 6, 7, - 8, 12, 16, 20, 24, 32, 48, 64}; +static const u8 dlc2len[] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 12, 16, 20, 24, 32, 48, 64 +}; /* get data length from raw data length code (DLC) */ u8 can_fd_dlc2len(u8 dlc) @@ -16,16 +18,18 @@ u8 can_fd_dlc2len(u8 dlc) } EXPORT_SYMBOL_GPL(can_fd_dlc2len); -static const u8 len2dlc[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, /* 0 - 8 */ - 9, 9, 9, 9, /* 9 - 12 */ - 10, 10, 10, 10, /* 13 - 16 */ - 11, 11, 11, 11, /* 17 - 20 */ - 12, 12, 12, 12, /* 21 - 24 */ - 13, 13, 13, 13, 13, 13, 13, 13, /* 25 - 32 */ - 14, 14, 14, 14, 14, 14, 14, 14, /* 33 - 40 */ - 14, 14, 14, 14, 14, 14, 14, 14, /* 41 - 48 */ - 15, 15, 15, 15, 15, 15, 15, 15, /* 49 - 56 */ - 15, 15, 15, 15, 15, 15, 15, 15}; /* 57 - 64 */ +static const u8 len2dlc[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, /* 0 - 8 */ + 9, 9, 9, 9, /* 9 - 12 */ + 10, 10, 10, 10, /* 13 - 16 */ + 11, 11, 11, 11, /* 17 - 20 */ + 12, 12, 12, 12, /* 21 - 24 */ + 13, 13, 13, 13, 13, 13, 13, 13, /* 25 - 32 */ + 14, 14, 14, 14, 14, 14, 14, 14, /* 33 - 40 */ + 14, 14, 14, 14, 14, 14, 14, 14, /* 41 - 48 */ + 15, 15, 15, 15, 15, 15, 15, 15, /* 49 - 56 */ + 15, 15, 15, 15, 15, 15, 15, 15 /* 57 - 64 */ +}; /* map the sanitized data length to an appropriate data length code */ u8 can_fd_len2dlc(u8 len) -- GitLab From 69e976831cd53f9ba304fd20305b2025ecc78eab Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sun, 10 Jan 2021 14:21:05 +0000 Subject: [PATCH 0629/2012] MIPS: relocatable: fix possible boot hangup with KASLR enabled LLVM-built Linux triggered a boot hangup with KASLR enabled. arch/mips/kernel/relocate.c:get_random_boot() uses linux_banner, which is a string constant, as a random seed, but accesses it as an array of unsigned long (in rotate_xor()). When the address of linux_banner is not aligned to sizeof(long), such access emits unaligned access exception and hangs the kernel. Use PTR_ALIGN() to align input address to sizeof(long) and also align down the input length to prevent possible access-beyond-end. Fixes: 405bc8fd12f5 ("MIPS: Kernel: Implement KASLR using CONFIG_RELOCATABLE") Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Alexander Lobakin Tested-by: Nathan Chancellor Reviewed-by: Kees Cook Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/relocate.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c index 47aeb3350a760..0e365b7c742d9 100644 --- a/arch/mips/kernel/relocate.c +++ b/arch/mips/kernel/relocate.c @@ -187,8 +187,14 @@ static int __init relocate_exception_table(long offset) static inline __init unsigned long rotate_xor(unsigned long hash, const void *area, size_t size) { - size_t i; - unsigned long *ptr = (unsigned long *)area; + const typeof(hash) *ptr = PTR_ALIGN(area, sizeof(hash)); + size_t diff, i; + + diff = (void *)ptr - area; + if (unlikely(size < diff + sizeof(hash))) + return hash; + + size = ALIGN_DOWN(size - diff, sizeof(hash)); for (i = 0; i < size / sizeof(hash); i++) { /* Rotate by odd number of bits and XOR. */ -- GitLab From 7b490a8ab0f2d3ab8d838a4ff22ae86edafd34a1 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 11 Jan 2021 05:27:25 -0800 Subject: [PATCH 0630/2012] MIPS: OCTEON: fix unreachable code in octeon_irq_init_ciu The type of 'r' in octeon_irq_init_ciu is 'unsigned int', so 'r < 0' can't be true. Fix this by change the type of 'r' and 'i' from 'unsigned int' to 'int'. As 'i' won't be negative, this change works. Fixes: 99fbc70f8547 ("MIPS: Octeon: irq: Alloc desc before configuring IRQ") Signed-off-by: Menglong Dong Reviewed-by: Alexander Sverdlin Signed-off-by: Thomas Bogendoerfer --- arch/mips/cavium-octeon/octeon-irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index bd47e15d02c73..be5d4afcd30f9 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -1444,7 +1444,7 @@ static void octeon_irq_setup_secondary_ciu2(void) static int __init octeon_irq_init_ciu( struct device_node *ciu_node, struct device_node *parent) { - unsigned int i, r; + int i, r; struct irq_chip *chip; struct irq_chip *chip_edge; struct irq_chip *chip_mbox; -- GitLab From f419e5940f1d9892ea6f45acdaca572b9e73ff39 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Thu, 7 Jan 2021 22:44:38 +0800 Subject: [PATCH 0631/2012] platform/x86: ideapad-laptop: Disable touchpad_switch for ELAN0634 Newer ideapads (e.g.: Yoga 14s, 720S 14) come with ELAN0634 touchpad do not use EC to switch touchpad. Reading VPCCMD_R_TOUCHPAD will return zero thus touchpad may be blocked unexpectedly. Writing VPCCMD_W_TOUCHPAD may cause a spurious key press. Add has_touchpad_switch to workaround these machines. Signed-off-by: Jiaxun Yang Cc: stable@vger.kernel.org # 5.4+ -- v2: Specify touchpad to ELAN0634 v3: Stupid missing ! in v2 v4: Correct acpi_dev_present usage (Hans) Link: https://lore.kernel.org/r/20210107144438.12605-1-jiaxun.yang@flygoat.com Signed-off-by: Hans de Goede --- drivers/platform/x86/ideapad-laptop.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 7598cd46cf606..5b81bafa5c165 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -92,6 +92,7 @@ struct ideapad_private { struct dentry *debug; unsigned long cfg; bool has_hw_rfkill_switch; + bool has_touchpad_switch; const char *fnesc_guid; }; @@ -535,7 +536,9 @@ static umode_t ideapad_is_visible(struct kobject *kobj, } else if (attr == &dev_attr_fn_lock.attr) { supported = acpi_has_method(priv->adev->handle, "HALS") && acpi_has_method(priv->adev->handle, "SALS"); - } else + } else if (attr == &dev_attr_touchpad.attr) + supported = priv->has_touchpad_switch; + else supported = true; return supported ? attr->mode : 0; @@ -867,6 +870,9 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv) { unsigned long value; + if (!priv->has_touchpad_switch) + return; + /* Without reading from EC touchpad LED doesn't switch state */ if (!read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &value)) { /* Some IdeaPads don't really turn off touchpad - they only @@ -989,6 +995,9 @@ static int ideapad_acpi_add(struct platform_device *pdev) priv->platform_device = pdev; priv->has_hw_rfkill_switch = dmi_check_system(hw_rfkill_list); + /* Most ideapads with ELAN0634 touchpad don't use EC touchpad switch */ + priv->has_touchpad_switch = !acpi_dev_present("ELAN0634", NULL, -1); + ret = ideapad_sysfs_init(priv); if (ret) return ret; @@ -1006,6 +1015,10 @@ static int ideapad_acpi_add(struct platform_device *pdev) if (!priv->has_hw_rfkill_switch) write_ec_cmd(priv->adev->handle, VPCCMD_W_RF, 1); + /* The same for Touchpad */ + if (!priv->has_touchpad_switch) + write_ec_cmd(priv->adev->handle, VPCCMD_W_TOUCHPAD, 1); + for (i = 0; i < IDEAPAD_RFKILL_DEV_NUM; i++) if (test_bit(ideapad_rfk_data[i].cfgbit, &priv->cfg)) ideapad_register_rfkill(priv, i); -- GitLab From 5b191dcba719319148eeecf6ed409949fac55b39 Mon Sep 17 00:00:00 2001 From: Al Cooper Date: Thu, 7 Jan 2021 17:15:09 -0500 Subject: [PATCH 0632/2012] mmc: sdhci-brcmstb: Fix mmc timeout errors on S5 suspend Commit e7b5d63a82fe ("mmc: sdhci-brcmstb: Add shutdown callback") that added a shutdown callback to the diver, is causing "mmc timeout" errors on S5 suspend. The problem was that the "remove" was queuing additional MMC commands after the "shutdown" and these caused timeouts as the MMC queues were cleaned up for "remove". The shutdown callback will be changed to calling sdhci-pltfm_suspend which should get better power savings because the clocks will be shutdown. Fixes: e7b5d63a82fe ("mmc: sdhci-brcmstb: Add shutdown callback") Signed-off-by: Al Cooper Acked-by: Florian Fainelli Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210107221509.6597-1-alcooperx@gmail.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-brcmstb.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c index bbf3496f44955..f9780c65ebe98 100644 --- a/drivers/mmc/host/sdhci-brcmstb.c +++ b/drivers/mmc/host/sdhci-brcmstb.c @@ -314,11 +314,7 @@ err_clk: static void sdhci_brcmstb_shutdown(struct platform_device *pdev) { - int ret; - - ret = sdhci_pltfm_unregister(pdev); - if (ret) - dev_err(&pdev->dev, "failed to shutdown\n"); + sdhci_pltfm_suspend(&pdev->dev); } MODULE_DEVICE_TABLE(of, sdhci_brcm_of_match); -- GitLab From ef3a575baf53571dc405ee4028e26f50856898e7 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 12 Jan 2021 12:53:58 +0100 Subject: [PATCH 0633/2012] xen/privcmd: allow fetching resource sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow issuing an IOCTL_PRIVCMD_MMAP_RESOURCE ioctl with num = 0 and addr = 0 in order to fetch the size of a specific resource. Add a shortcut to the default map resource path, since fetching the size requires no address to be passed in, and thus no VMA to setup. This is missing from the initial implementation, and causes issues when mapping resources that don't have fixed or known sizes. Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Tested-by: Andrew Cooper Cc: stable@vger.kernel.org # >= 4.18 Link: https://lore.kernel.org/r/20210112115358.23346-1-roger.pau@citrix.com Signed-off-by: Juergen Gross --- drivers/xen/privcmd.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index b0c73c58f9874..720a7b7abd46d 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -717,14 +717,15 @@ static long privcmd_ioctl_restrict(struct file *file, void __user *udata) return 0; } -static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) +static long privcmd_ioctl_mmap_resource(struct file *file, + struct privcmd_mmap_resource __user *udata) { struct privcmd_data *data = file->private_data; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct privcmd_mmap_resource kdata; xen_pfn_t *pfns = NULL; - struct xen_mem_acquire_resource xdata; + struct xen_mem_acquire_resource xdata = { }; int rc; if (copy_from_user(&kdata, udata, sizeof(kdata))) @@ -734,6 +735,22 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) if (data->domid != DOMID_INVALID && data->domid != kdata.dom) return -EPERM; + /* Both fields must be set or unset */ + if (!!kdata.addr != !!kdata.num) + return -EINVAL; + + xdata.domid = kdata.dom; + xdata.type = kdata.type; + xdata.id = kdata.id; + + if (!kdata.addr && !kdata.num) { + /* Query the size of the resource. */ + rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata); + if (rc) + return rc; + return __put_user(xdata.nr_frames, &udata->num); + } + mmap_write_lock(mm); vma = find_vma(mm, kdata.addr); @@ -768,10 +785,6 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata) } else vma->vm_private_data = PRIV_VMA_LOCKED; - memset(&xdata, 0, sizeof(xdata)); - xdata.domid = kdata.dom; - xdata.type = kdata.type; - xdata.id = kdata.id; xdata.frame = kdata.idx; xdata.nr_frames = kdata.num; set_xen_guest_handle(xdata.frame_list, pfns); -- GitLab From 0d38fd8d252446d39050578ea32ed89b9adeb202 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 13 Jan 2021 12:15:43 +0100 Subject: [PATCH 0634/2012] MAINTAINERS: update references to stm32 audio bindings Changeset 81437cc3b0d9 ("Merge series "dt-bindings: stm32: convert audio dfsdm to json-schema" from Olivier Moysan :") removed bindings/sound/st,stm32-adfsdm.txt, as stm32-* audio bindings are now under: bindings/iio/adc/st,stm32-*.yaml. Update cross-references to them accordingly. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/03950bbd5cf7bac10eaaff3725e283d3ec2538c5.1610536535.git.mchehab+huawei@kernel.org Signed-off-by: Mark Brown --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a2359903fcc8c..7d3270b3fd842 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16964,7 +16964,7 @@ M: Olivier Moysan M: Arnaud Pouliquen L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained -F: Documentation/devicetree/bindings/sound/st,stm32-*.txt +F: Documentation/devicetree/bindings/iio/adc/st,stm32-*.yaml F: sound/soc/stm/ STM32 TIMER/LPTIMER DRIVERS -- GitLab From df06824767cc9a32fbdb0e3d3b7e169292a5b5fe Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 7 Jan 2021 14:53:10 +0000 Subject: [PATCH 0635/2012] arm64: entry: remove redundant IRQ flag tracing All EL0 returns go via ret_to_user(), which masks IRQs and notifies lockdep and tracing before calling into do_notify_resume(). Therefore, there's no need for do_notify_resume() to call trace_hardirqs_off(), and the comment is stale. The call is simply redundant. In ret_to_user() we call exit_to_user_mode(), which notifies lockdep and tracing the IRQs will be enabled in userspace, so there's no need for el0_svc_common() to call trace_hardirqs_on() before returning. Further, at the start of ret_to_user() we call trace_hardirqs_off(), so not only is this redundant, but it is immediately undone. In addition to being redundant, the trace_hardirqs_on() in el0_svc_common() leaves lockdep inconsistent with the hardware state, and is liable to cause issues for any C code or instrumentation between this and the call to trace_hardirqs_off() which undoes it in ret_to_user(). This patch removes the redundant tracing calls and associated stale comments. Fixes: 23529049c684 ("arm64: entry: fix non-NMI user<->kernel transitions") Signed-off-by: Mark Rutland Acked-by: Will Deacon Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20210107145310.44616-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/signal.c | 7 ------- arch/arm64/kernel/syscall.c | 9 +-------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index f71d6ce4673f5..6237486ff6bb7 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -914,13 +914,6 @@ static void do_signal(struct pt_regs *regs) asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) { - /* - * The assembly code enters us with IRQs off, but it hasn't - * informed the tracing code of that for efficiency reasons. - * Update the trace code with the current status. - */ - trace_hardirqs_off(); - do { if (thread_flags & _TIF_NEED_RESCHED) { /* Unmask Debug and SError for the next task */ diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index f61e9d8cc55a1..0bfac95fe4643 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -165,15 +165,8 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { local_daif_mask(); flags = current_thread_info()->flags; - if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) { - /* - * We're off to userspace, where interrupts are - * always enabled after we restore the flags from - * the SPSR. - */ - trace_hardirqs_on(); + if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) return; - } local_daif_restore(DAIF_PROCCTX); } -- GitLab From 5f39d2713bd80e8a3e6d9299930aec8844872c0e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 3 Jan 2021 14:39:27 -0500 Subject: [PATCH 0636/2012] SUNRPC: Move the svc_xdr_recvfrom tracepoint again Commit 156708adf2d9 ("SUNRPC: Move the svc_xdr_recvfrom() tracepoint") tried to capture the correct XID in the trace record, but this line in svc_recv: rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]); alters the size of rq_arg.head[0].iov_len. The tracepoint records the correct XID but an incorrect value for the length of the xdr_buf's head. To keep the trace callsites simple, I've created two trace classes. One assumes the xdr_buf contains a full RPC message, and the XID can be extracted from it. The other assumes the contents of the xdr_buf are arbitrary, and the xid will be provided by the caller. Currently there is only one user of each class, but I expect we will need a few more tracepoints using each class as time goes on. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 59 +++++++++++++++++++++++++++++++---- net/sunrpc/svc_xprt.c | 4 +-- 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 58994e0130221..6f89c27265f58 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1424,13 +1424,61 @@ TRACE_EVENT(rpcb_unregister, ) ); +/* Record an xdr_buf containing a fully-formed RPC message */ +DECLARE_EVENT_CLASS(svc_xdr_msg_class, + TP_PROTO( + const struct xdr_buf *xdr + ), + + TP_ARGS(xdr), + + TP_STRUCT__entry( + __field(u32, xid) + __field(const void *, head_base) + __field(size_t, head_len) + __field(const void *, tail_base) + __field(size_t, tail_len) + __field(unsigned int, page_len) + __field(unsigned int, msg_len) + ), + + TP_fast_assign( + __be32 *p = (__be32 *)xdr->head[0].iov_base; + + __entry->xid = be32_to_cpu(*p); + __entry->head_base = p; + __entry->head_len = xdr->head[0].iov_len; + __entry->tail_base = xdr->tail[0].iov_base; + __entry->tail_len = xdr->tail[0].iov_len; + __entry->page_len = xdr->page_len; + __entry->msg_len = xdr->len; + ), + + TP_printk("xid=0x%08x head=[%p,%zu] page=%u tail=[%p,%zu] len=%u", + __entry->xid, + __entry->head_base, __entry->head_len, __entry->page_len, + __entry->tail_base, __entry->tail_len, __entry->msg_len + ) +); + +#define DEFINE_SVCXDRMSG_EVENT(name) \ + DEFINE_EVENT(svc_xdr_msg_class, \ + svc_xdr_##name, \ + TP_PROTO( \ + const struct xdr_buf *xdr \ + ), \ + TP_ARGS(xdr)) + +DEFINE_SVCXDRMSG_EVENT(recvfrom); + +/* Record an xdr_buf containing arbitrary data, tagged with an XID */ DECLARE_EVENT_CLASS(svc_xdr_buf_class, TP_PROTO( - const struct svc_rqst *rqst, + __be32 xid, const struct xdr_buf *xdr ), - TP_ARGS(rqst, xdr), + TP_ARGS(xid, xdr), TP_STRUCT__entry( __field(u32, xid) @@ -1443,7 +1491,7 @@ DECLARE_EVENT_CLASS(svc_xdr_buf_class, ), TP_fast_assign( - __entry->xid = be32_to_cpu(rqst->rq_xid); + __entry->xid = be32_to_cpu(xid); __entry->head_base = xdr->head[0].iov_base; __entry->head_len = xdr->head[0].iov_len; __entry->tail_base = xdr->tail[0].iov_base; @@ -1463,12 +1511,11 @@ DECLARE_EVENT_CLASS(svc_xdr_buf_class, DEFINE_EVENT(svc_xdr_buf_class, \ svc_xdr_##name, \ TP_PROTO( \ - const struct svc_rqst *rqst, \ + __be32 xid, \ const struct xdr_buf *xdr \ ), \ - TP_ARGS(rqst, xdr)) + TP_ARGS(xid, xdr)) -DEFINE_SVCXDRBUF_EVENT(recvfrom); DEFINE_SVCXDRBUF_EVENT(sendto); /* diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 5fb9164aa6905..dcc50ae545506 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -857,6 +857,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) err = -EAGAIN; if (len <= 0) goto out_release; + trace_svc_xdr_recvfrom(&rqstp->rq_arg); clear_bit(XPT_OLD, &xprt->xpt_flags); @@ -866,7 +867,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (serv->sv_stats) serv->sv_stats->netcnt++; - trace_svc_xdr_recvfrom(rqstp, &rqstp->rq_arg); return len; out_release: rqstp->rq_res.len = 0; @@ -904,7 +904,7 @@ int svc_send(struct svc_rqst *rqstp) xb->len = xb->head[0].iov_len + xb->page_len + xb->tail[0].iov_len; - trace_svc_xdr_sendto(rqstp, xb); + trace_svc_xdr_sendto(rqstp->rq_xid, xb); trace_svc_stats_latency(rqstp); len = xprt->xpt_ops->xpo_sendto(rqstp); -- GitLab From ce09ccc50208c04a1b03abfd530b5d6314258fd0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Jan 2021 15:43:18 +0100 Subject: [PATCH 0637/2012] genirq: Export irq_check_status_bit() One of the users can be built modular: ERROR: modpost: "irq_check_status_bit" [drivers/perf/arm_spe_pmu.ko] undefined! Fixes: fdd029630434 ("genirq: Move status flag checks to core") Reported-by: Guenter Roeck Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20201227192049.GA195845@roeck-us.net --- kernel/irq/manage.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ab8567f32501f..dec3f73e8db92 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -2859,3 +2859,4 @@ bool irq_check_status_bit(unsigned int irq, unsigned int bitmask) rcu_read_unlock(); return res; } +EXPORT_SYMBOL_GPL(irq_check_status_bit); -- GitLab From b90d72a6bfdb5e5c62cd223a8cdf4045bfbcb94d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 12 Jan 2021 22:18:55 +0000 Subject: [PATCH 0638/2012] Revert "arm64: Enable perf events based hard lockup detector" This reverts commit 367c820ef08082e68df8a3bc12e62393af21e4b5. lockup_detector_init() makes heavy use of per-cpu variables and must be called with preemption disabled. Usually, it's handled early during boot in kernel_init_freeable(), before SMP has been initialised. Since we do not know whether or not our PMU interrupt can be signalled as an NMI until considerably later in the boot process, the Arm PMU driver attempts to re-initialise the lockup detector off the back of a device_initcall(). Unfortunately, this is called from preemptible context and results in the following splat: | BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 | caller is debug_smp_processor_id+0x20/0x2c | CPU: 2 PID: 1 Comm: swapper/0 Not tainted 5.10.0+ #276 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0x0/0x3c0 | show_stack+0x20/0x6c | dump_stack+0x2f0/0x42c | check_preemption_disabled+0x1cc/0x1dc | debug_smp_processor_id+0x20/0x2c | hardlockup_detector_event_create+0x34/0x18c | hardlockup_detector_perf_init+0x2c/0x134 | watchdog_nmi_probe+0x18/0x24 | lockup_detector_init+0x44/0xa8 | armv8_pmu_driver_init+0x54/0x78 | do_one_initcall+0x184/0x43c | kernel_init_freeable+0x368/0x380 | kernel_init+0x1c/0x1cc | ret_from_fork+0x10/0x30 Rather than bodge this with raw_smp_processor_id() or randomly disabling preemption, simply revert the culprit for now until we figure out how to do this properly. Reported-by: Lecopzer Chen Signed-off-by: Will Deacon Acked-by: Mark Rutland Cc: Sumit Garg Cc: Alexandru Elisei Link: https://lore.kernel.org/r/20201221162249.3119-1-lecopzer.chen@mediatek.com Link: https://lore.kernel.org/r/20210112221855.10666-1-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 2 -- arch/arm64/kernel/perf_event.c | 41 ++-------------------------------- drivers/perf/arm_pmu.c | 5 ----- include/linux/perf/arm_pmu.h | 2 -- 4 files changed, 2 insertions(+), 48 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 05e17351e4f33..f39568b28ec1c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -174,8 +174,6 @@ config ARM64 select HAVE_NMI select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS - select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI && HW_PERF_EVENTS - select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 38bb07eff8720..3605f77ad4df1 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -23,8 +23,6 @@ #include #include #include -#include -#include /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 @@ -1250,21 +1248,10 @@ static struct platform_driver armv8_pmu_driver = { static int __init armv8_pmu_driver_init(void) { - int ret; - if (acpi_disabled) - ret = platform_driver_register(&armv8_pmu_driver); + return platform_driver_register(&armv8_pmu_driver); else - ret = arm_pmu_acpi_probe(armv8_pmuv3_init); - - /* - * Try to re-initialize lockup detector after PMU init in - * case PMU events are triggered via NMIs. - */ - if (ret == 0 && arm_pmu_irq_is_nmi()) - lockup_detector_init(); - - return ret; + return arm_pmu_acpi_probe(armv8_pmuv3_init); } device_initcall(armv8_pmu_driver_init) @@ -1322,27 +1309,3 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time_zero = 1; userpg->cap_user_time_short = 1; } - -#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF -/* - * Safe maximum CPU frequency in case a particular platform doesn't implement - * cpufreq driver. Although, architecture doesn't put any restrictions on - * maximum frequency but 5 GHz seems to be safe maximum given the available - * Arm CPUs in the market which are clocked much less than 5 GHz. On the other - * hand, we can't make it much higher as it would lead to a large hard-lockup - * detection timeout on parts which are running slower (eg. 1GHz on - * Developerbox) and doesn't possess a cpufreq driver. - */ -#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz -u64 hw_nmi_get_sample_period(int watchdog_thresh) -{ - unsigned int cpu = smp_processor_id(); - unsigned long max_cpu_freq; - - max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL; - if (!max_cpu_freq) - max_cpu_freq = SAFE_MAX_CPU_FREQ; - - return (u64)max_cpu_freq * watchdog_thresh; -} -#endif diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 794a37d508537..cb2f55f450e4a 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -726,11 +726,6 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu) return per_cpu(hw_events->irq, cpu); } -bool arm_pmu_irq_is_nmi(void) -{ - return has_nmi; -} - /* * PMU hardware loses all context when a CPU goes offline. * When a CPU is hotplugged back in, since some hardware registers are diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index bf7966776c557..505480217cf1a 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -163,8 +163,6 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn); static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } #endif -bool arm_pmu_irq_is_nmi(void); - /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void); -- GitLab From 71e70184f1d1314ad56e834d1befc07daa2af8e6 Mon Sep 17 00:00:00 2001 From: Jianlin Lv Date: Tue, 12 Jan 2021 09:58:13 +0800 Subject: [PATCH 0639/2012] arm64: rename S_FRAME_SIZE to PT_REGS_SIZE S_FRAME_SIZE is the size of the pt_regs structure, no longer the size of the kernel stack frame, the name is misleading. In keeping with arm32, rename S_FRAME_SIZE to PT_REGS_SIZE. Signed-off-by: Jianlin Lv Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20210112015813.2340969-1-Jianlin.Lv@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/asm-offsets.c | 2 +- arch/arm64/kernel/entry-ftrace.S | 12 ++++++------ arch/arm64/kernel/entry.S | 14 +++++++------- arch/arm64/kernel/probes/kprobes_trampoline.S | 6 +++--- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index f42fd9e339815..3017844635874 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -75,7 +75,7 @@ int main(void) DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1)); DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save)); DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); - DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); + DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs)); BLANK(); #ifdef CONFIG_COMPAT DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0)); diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index a338f40e64d39..b3e4f9a088b1a 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -35,7 +35,7 @@ */ .macro ftrace_regs_entry, allregs=0 /* Make room for pt_regs, plus a callee frame */ - sub sp, sp, #(S_FRAME_SIZE + 16) + sub sp, sp, #(PT_REGS_SIZE + 16) /* Save function arguments (and x9 for simplicity) */ stp x0, x1, [sp, #S_X0] @@ -61,15 +61,15 @@ .endif /* Save the callsite's SP and LR */ - add x10, sp, #(S_FRAME_SIZE + 16) + add x10, sp, #(PT_REGS_SIZE + 16) stp x9, x10, [sp, #S_LR] /* Save the PC after the ftrace callsite */ str x30, [sp, #S_PC] /* Create a frame record for the callsite above pt_regs */ - stp x29, x9, [sp, #S_FRAME_SIZE] - add x29, sp, #S_FRAME_SIZE + stp x29, x9, [sp, #PT_REGS_SIZE] + add x29, sp, #PT_REGS_SIZE /* Create our frame record within pt_regs. */ stp x29, x30, [sp, #S_STACKFRAME] @@ -120,7 +120,7 @@ ftrace_common_return: ldr x9, [sp, #S_PC] /* Restore the callsite's SP */ - add sp, sp, #S_FRAME_SIZE + 16 + add sp, sp, #PT_REGS_SIZE + 16 ret x9 SYM_CODE_END(ftrace_common) @@ -130,7 +130,7 @@ SYM_CODE_START(ftrace_graph_caller) ldr x0, [sp, #S_PC] sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn) add x1, sp, #S_LR // parent_ip (callsite's LR) - ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP) + ldr x2, [sp, #PT_REGS_SIZE] // parent fp (callsite's FP) bl prepare_ftrace_return b ftrace_common_return SYM_CODE_END(ftrace_graph_caller) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a8c3e7aaca749..c9bae73f2621a 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -75,7 +75,7 @@ alternative_else_nop_endif .endif #endif - sub sp, sp, #S_FRAME_SIZE + sub sp, sp, #PT_REGS_SIZE #ifdef CONFIG_VMAP_STACK /* * Test whether the SP has overflowed, without corrupting a GPR. @@ -96,7 +96,7 @@ alternative_else_nop_endif * userspace, and can clobber EL0 registers to free up GPRs. */ - /* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */ + /* Stash the original SP (minus PT_REGS_SIZE) in tpidr_el0. */ msr tpidr_el0, x0 /* Recover the original x0 value and stash it in tpidrro_el0 */ @@ -253,7 +253,7 @@ alternative_else_nop_endif scs_load tsk, x20 .else - add x21, sp, #S_FRAME_SIZE + add x21, sp, #PT_REGS_SIZE get_current_task tsk .endif /* \el == 0 */ mrs x22, elr_el1 @@ -377,7 +377,7 @@ alternative_else_nop_endif ldp x26, x27, [sp, #16 * 13] ldp x28, x29, [sp, #16 * 14] ldr lr, [sp, #S_LR] - add sp, sp, #S_FRAME_SIZE // restore sp + add sp, sp, #PT_REGS_SIZE // restore sp .if \el == 0 alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 @@ -580,12 +580,12 @@ __bad_stack: /* * Store the original GPRs to the new stack. The orginal SP (minus - * S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry. + * PT_REGS_SIZE) was stashed in tpidr_el0 by kernel_ventry. */ - sub sp, sp, #S_FRAME_SIZE + sub sp, sp, #PT_REGS_SIZE kernel_entry 1 mrs x0, tpidr_el0 - add x0, x0, #S_FRAME_SIZE + add x0, x0, #PT_REGS_SIZE str x0, [sp, #S_SP] /* Stash the regs for handle_bad_stack */ diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S index 890ca72c5a514..288a84e253ccb 100644 --- a/arch/arm64/kernel/probes/kprobes_trampoline.S +++ b/arch/arm64/kernel/probes/kprobes_trampoline.S @@ -25,7 +25,7 @@ stp x24, x25, [sp, #S_X24] stp x26, x27, [sp, #S_X26] stp x28, x29, [sp, #S_X28] - add x0, sp, #S_FRAME_SIZE + add x0, sp, #PT_REGS_SIZE stp lr, x0, [sp, #S_LR] /* * Construct a useful saved PSTATE @@ -62,7 +62,7 @@ .endm SYM_CODE_START(kretprobe_trampoline) - sub sp, sp, #S_FRAME_SIZE + sub sp, sp, #PT_REGS_SIZE save_all_base_regs @@ -76,7 +76,7 @@ SYM_CODE_START(kretprobe_trampoline) restore_all_base_regs - add sp, sp, #S_FRAME_SIZE + add sp, sp, #PT_REGS_SIZE ret SYM_CODE_END(kretprobe_trampoline) -- GitLab From c35a824c31834d947fb99b0c608c1b9f922b4ba0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 8 Jan 2021 10:19:56 +0100 Subject: [PATCH 0640/2012] arm64: make atomic helpers __always_inline With UBSAN enabled and building with clang, there are occasionally warnings like WARNING: modpost: vmlinux.o(.text+0xc533ec): Section mismatch in reference from the function arch_atomic64_or() to the variable .init.data:numa_nodes_parsed The function arch_atomic64_or() references the variable __initdata numa_nodes_parsed. This is often because arch_atomic64_or lacks a __initdata annotation or the annotation of numa_nodes_parsed is wrong. for functions that end up not being inlined as intended but operating on __initdata variables. Mark these as __always_inline, along with the corresponding asm-generic wrappers. Signed-off-by: Arnd Bergmann Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210108092024.4034860-1-arnd@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/atomic.h | 10 +++++----- include/asm-generic/bitops/atomic.h | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 015ddffaf6caa..b56a4b2bc2486 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -17,7 +17,7 @@ #include #define ATOMIC_OP(op) \ -static inline void arch_##op(int i, atomic_t *v) \ +static __always_inline void arch_##op(int i, atomic_t *v) \ { \ __lse_ll_sc_body(op, i, v); \ } @@ -32,7 +32,7 @@ ATOMIC_OP(atomic_sub) #undef ATOMIC_OP #define ATOMIC_FETCH_OP(name, op) \ -static inline int arch_##op##name(int i, atomic_t *v) \ +static __always_inline int arch_##op##name(int i, atomic_t *v) \ { \ return __lse_ll_sc_body(op##name, i, v); \ } @@ -56,7 +56,7 @@ ATOMIC_FETCH_OPS(atomic_sub_return) #undef ATOMIC_FETCH_OPS #define ATOMIC64_OP(op) \ -static inline void arch_##op(long i, atomic64_t *v) \ +static __always_inline void arch_##op(long i, atomic64_t *v) \ { \ __lse_ll_sc_body(op, i, v); \ } @@ -71,7 +71,7 @@ ATOMIC64_OP(atomic64_sub) #undef ATOMIC64_OP #define ATOMIC64_FETCH_OP(name, op) \ -static inline long arch_##op##name(long i, atomic64_t *v) \ +static __always_inline long arch_##op##name(long i, atomic64_t *v) \ { \ return __lse_ll_sc_body(op##name, i, v); \ } @@ -94,7 +94,7 @@ ATOMIC64_FETCH_OPS(atomic64_sub_return) #undef ATOMIC64_FETCH_OP #undef ATOMIC64_FETCH_OPS -static inline long arch_atomic64_dec_if_positive(atomic64_t *v) +static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v) { return __lse_ll_sc_body(atomic64_dec_if_positive, v); } diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index dd90c9792909d..0e7316a86240b 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -11,19 +11,19 @@ * See Documentation/atomic_bitops.txt for details. */ -static inline void set_bit(unsigned int nr, volatile unsigned long *p) +static __always_inline void set_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p); } -static inline void clear_bit(unsigned int nr, volatile unsigned long *p) +static __always_inline void clear_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); atomic_long_andnot(BIT_MASK(nr), (atomic_long_t *)p); } -static inline void change_bit(unsigned int nr, volatile unsigned long *p) +static __always_inline void change_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); atomic_long_xor(BIT_MASK(nr), (atomic_long_t *)p); -- GitLab From 3499ba8198cad47b731792e5e56b9ec2a78a83a2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 13 Jan 2021 13:26:02 +0000 Subject: [PATCH 0641/2012] xen: Fix event channel callback via INTX/GSI For a while, event channel notification via the PCI platform device has been broken, because we attempt to communicate with xenstore before we even have notifications working, with the xs_reset_watches() call in xs_init(). We tend to get away with this on Xen versions below 4.0 because we avoid calling xs_reset_watches() anyway, because xenstore might not cope with reading a non-existent key. And newer Xen *does* have the vector callback support, so we rarely fall back to INTX/GSI delivery. To fix it, clean up a bit of the mess of xs_init() and xenbus_probe() startup. Call xs_init() directly from xenbus_init() only in the !XS_HVM case, deferring it to be called from xenbus_probe() in the XS_HVM case instead. Then fix up the invocation of xenbus_probe() to happen either from its device_initcall if the callback is available early enough, or when the callback is finally set up. This means that the hack of calling xenbus_probe() from a workqueue after the first interrupt, or directly from the PCI platform device setup, is no longer needed. Signed-off-by: David Woodhouse Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210113132606.422794-2-dwmw2@infradead.org Signed-off-by: Juergen Gross --- arch/arm/xen/enlighten.c | 2 +- drivers/xen/events/events_base.c | 10 ---- drivers/xen/platform-pci.c | 1 - drivers/xen/xenbus/xenbus.h | 1 + drivers/xen/xenbus/xenbus_comms.c | 8 --- drivers/xen/xenbus/xenbus_probe.c | 81 +++++++++++++++++++++++++------ include/xen/xenbus.h | 2 +- 7 files changed, 70 insertions(+), 35 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 60e901cd0de6a..5a957a9a09843 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -371,7 +371,7 @@ static int __init xen_guest_init(void) } gnttab_init(); if (!xen_initial_domain()) - xenbus_probe(NULL); + xenbus_probe(); /* * Making sure board specific code will not set up ops for diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 6038c4c35db5a..bbebe248b7264 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -2010,16 +2010,6 @@ static struct irq_chip xen_percpu_chip __read_mostly = { .irq_ack = ack_dynirq, }; -int xen_set_callback_via(uint64_t via) -{ - struct xen_hvm_param a; - a.domid = DOMID_SELF; - a.index = HVM_PARAM_CALLBACK_IRQ; - a.value = via; - return HYPERVISOR_hvm_op(HVMOP_set_param, &a); -} -EXPORT_SYMBOL_GPL(xen_set_callback_via); - #ifdef CONFIG_XEN_PVHVM /* Vector callbacks are better than PCI interrupts to receive event * channel notifications because we can receive vector callbacks on any diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index dd911e1ff782c..9db557b76511b 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -149,7 +149,6 @@ static int platform_pci_probe(struct pci_dev *pdev, ret = gnttab_init(); if (ret) goto grant_out; - xenbus_probe(NULL); return 0; grant_out: gnttab_free_auto_xlat_frames(); diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index 2a93b7c9c1599..dc15373354144 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -115,6 +115,7 @@ int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename); int xenbus_probe_devices(struct xen_bus_type *bus); +void xenbus_probe(void); void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index eb5151fc8efab..e5fda0256feb3 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -57,16 +57,8 @@ DEFINE_MUTEX(xs_response_mutex); static int xenbus_irq; static struct task_struct *xenbus_task; -static DECLARE_WORK(probe_work, xenbus_probe); - - static irqreturn_t wake_waiting(int irq, void *unused) { - if (unlikely(xenstored_ready == 0)) { - xenstored_ready = 1; - schedule_work(&probe_work); - } - wake_up(&xb_waitq); return IRQ_HANDLED; } diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 44634d970a5ca..c8f0282bb6497 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -683,29 +683,76 @@ void unregister_xenstore_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); -void xenbus_probe(struct work_struct *unused) +void xenbus_probe(void) { xenstored_ready = 1; + /* + * In the HVM case, xenbus_init() deferred its call to + * xs_init() in case callbacks were not operational yet. + * So do it now. + */ + if (xen_store_domain_type == XS_HVM) + xs_init(); + /* Notify others that xenstore is up */ blocking_notifier_call_chain(&xenstore_chain, 0, NULL); } -EXPORT_SYMBOL_GPL(xenbus_probe); -static int __init xenbus_probe_initcall(void) +/* + * Returns true when XenStore init must be deferred in order to + * allow the PCI platform device to be initialised, before we + * can actually have event channel interrupts working. + */ +static bool xs_hvm_defer_init_for_callback(void) { - if (!xen_domain()) - return -ENODEV; +#ifdef CONFIG_XEN_PVHVM + return xen_store_domain_type == XS_HVM && + !xen_have_vector_callback; +#else + return false; +#endif +} - if (xen_initial_domain() || xen_hvm_domain()) - return 0; +static int __init xenbus_probe_initcall(void) +{ + /* + * Probe XenBus here in the XS_PV case, and also XS_HVM unless we + * need to wait for the platform PCI device to come up. + */ + if (xen_store_domain_type == XS_PV || + (xen_store_domain_type == XS_HVM && + !xs_hvm_defer_init_for_callback())) + xenbus_probe(); - xenbus_probe(NULL); return 0; } - device_initcall(xenbus_probe_initcall); +int xen_set_callback_via(uint64_t via) +{ + struct xen_hvm_param a; + int ret; + + a.domid = DOMID_SELF; + a.index = HVM_PARAM_CALLBACK_IRQ; + a.value = via; + + ret = HYPERVISOR_hvm_op(HVMOP_set_param, &a); + if (ret) + return ret; + + /* + * If xenbus_probe_initcall() deferred the xenbus_probe() + * due to the callback not functioning yet, we can do it now. + */ + if (!xenstored_ready && xs_hvm_defer_init_for_callback()) + xenbus_probe(); + + return ret; +} +EXPORT_SYMBOL_GPL(xen_set_callback_via); + /* Set up event channel for xenstored which is run as a local process * (this is normally used only in dom0) */ @@ -818,11 +865,17 @@ static int __init xenbus_init(void) break; } - /* Initialize the interface to xenstore. */ - err = xs_init(); - if (err) { - pr_warn("Error initializing xenstore comms: %i\n", err); - goto out_error; + /* + * HVM domains may not have a functional callback yet. In that + * case let xs_init() be called from xenbus_probe(), which will + * get invoked at an appropriate time. + */ + if (xen_store_domain_type != XS_HVM) { + err = xs_init(); + if (err) { + pr_warn("Error initializing xenstore comms: %i\n", err); + goto out_error; + } } if ((xen_store_domain_type != XS_LOCAL) && diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 00c7235ae93e7..2c43b0ef1e4d5 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -192,7 +192,7 @@ void xs_suspend_cancel(void); struct work_struct; -void xenbus_probe(struct work_struct *); +void xenbus_probe(void); #define XENBUS_IS_ERR_READ(str) ({ \ if (!IS_ERR(str) && strlen(str) == 0) { \ -- GitLab From 8f4fd86aa5d6aa122619623910065d236592e37c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 6 Jan 2021 15:39:55 +0000 Subject: [PATCH 0642/2012] xen: Set platform PCI device INTX affinity to CPU0 With INTX or GSI delivery, Xen uses the event channel structures of CPU0. If the interrupt gets handled by Linux on a different CPU, then no events are seen as pending. Rather than introducing locking to allow other CPUs to process CPU0's events, just ensure that the PCI interrupts happens only on CPU0. Signed-off-by: David Woodhouse Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210106153958.584169-3-dwmw2@infradead.org Signed-off-by: Juergen Gross --- drivers/xen/platform-pci.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index 9db557b76511b..18f0ed8b1f93b 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -132,6 +132,13 @@ static int platform_pci_probe(struct pci_dev *pdev, dev_warn(&pdev->dev, "request_irq failed err=%d\n", ret); goto out; } + /* + * It doesn't strictly *have* to run on CPU0 but it sure + * as hell better process the event channel ports delivered + * to CPU0. + */ + irq_set_affinity(pdev->irq, cpumask_of(0)); + callback_via = get_callback_via(pdev); ret = xen_set_callback_via(callback_via); if (ret) { -- GitLab From b36b0fe96af13460278bf9b173beced1bd15f85d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 6 Jan 2021 15:39:56 +0000 Subject: [PATCH 0643/2012] x86/xen: Add xen_no_vector_callback option to test PCI INTX delivery It's useful to be able to test non-vector event channel delivery, to make sure Linux will work properly on older Xen which doesn't have it. It's also useful for those working on Xen and Xen-compatible hypervisors, because there are guest kernels still in active use which use PCI INTX even when vector delivery is available. Signed-off-by: David Woodhouse Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210106153958.584169-4-dwmw2@infradead.org Signed-off-by: Juergen Gross --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ arch/x86/xen/enlighten_hvm.c | 11 ++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 44fde25bb221e..035d27b6272cd 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5964,6 +5964,10 @@ This option is obsoleted by the "nopv" option, which has equivalent effect for XEN platform. + xen_no_vector_callback + [KNL,X86,XEN] Disable the vector callback for Xen + event channel interrupts. + xen_scrub_pages= [XEN] Boolean option to control scrubbing pages before giving them back to Xen, for use by other domains. Can be also changed at runtime diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 9e87ab010c82b..ec50b7423a4c8 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -188,6 +188,8 @@ static int xen_cpu_dead_hvm(unsigned int cpu) return 0; } +static bool no_vector_callback __initdata; + static void __init xen_hvm_guest_init(void) { if (xen_pv_domain()) @@ -207,7 +209,7 @@ static void __init xen_hvm_guest_init(void) xen_panic_handler_init(); - if (xen_feature(XENFEAT_hvm_callback_vector)) + if (!no_vector_callback && xen_feature(XENFEAT_hvm_callback_vector)) xen_have_vector_callback = 1; xen_hvm_smp_init(); @@ -233,6 +235,13 @@ static __init int xen_parse_nopv(char *arg) } early_param("xen_nopv", xen_parse_nopv); +static __init int xen_parse_no_vector_callback(char *arg) +{ + no_vector_callback = true; + return 0; +} +early_param("xen_no_vector_callback", xen_parse_no_vector_callback); + bool __init xen_hvm_need_lapic(void) { if (xen_pv_domain()) -- GitLab From 4621dc6a5bf1235249e92231db30c96dfd1a18b9 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 6 Jan 2021 15:39:57 +0000 Subject: [PATCH 0644/2012] x86/xen: Don't register Xen IPIs when they aren't going to be used In the case where xen_have_vector_callback is false, we still register the IPI vectors in xen_smp_intr_init() for the secondary CPUs even though they aren't going to be used. Stop doing that. Signed-off-by: David Woodhouse Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210106153958.584169-5-dwmw2@infradead.org Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_hvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index ec50b7423a4c8..e68ea5f4ad1ce 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -164,10 +164,10 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) else per_cpu(xen_vcpu_id, cpu) = cpu; rc = xen_vcpu_setup(cpu); - if (rc) + if (rc || !xen_have_vector_callback) return rc; - if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock)) + if (xen_feature(XENFEAT_hvm_safe_pvclock)) xen_setup_timer(cpu); rc = xen_smp_intr_init(cpu); -- GitLab From 3d7746bea92530e8695258a3cf3ddec7a135edd6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 6 Jan 2021 15:39:58 +0000 Subject: [PATCH 0645/2012] x86/xen: Fix xen_hvm_smp_init() when vector callback not available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only the IPI-related functions in the smp_ops should be conditional on the vector callback being available. The rest should still happen: • xen_hvm_smp_prepare_boot_cpu() This function does two things, both of which should still happen if there is no vector callback support. The call to xen_vcpu_setup() for vCPU0 should still happen as it just sets up the vcpu_info for CPU0. That does happen for the secondary vCPUs too, from xen_cpu_up_prepare_hvm(). The second thing it does is call xen_init_spinlocks(), which perhaps counter-intuitively should *also* still be happening in the case without vector callbacks, so that it can clear its local xen_pvspin flag and disable the virt_spin_lock_key accordingly. Checking xen_have_vector_callback in xen_init_spinlocks() itself would affect PV guests, so set the global nopvspin flag in xen_hvm_smp_init() instead, when vector callbacks aren't available. • xen_hvm_smp_prepare_cpus() This does some IPI-related setup by calling xen_smp_intr_init() and xen_init_lock_cpu(), which can be made conditional. And it sets the xen_vcpu_id to XEN_VCPU_ID_INVALID for all possible CPUS, which does need to happen. • xen_smp_cpus_done() This offlines any vCPUs which doesn't fit in the global shared_info page, if separate vcpu_info placement isn't available. That part also needs to happen regardless of vector callback support. • xen_hvm_cpu_die() This doesn't actually do anything other than commin_cpu_die() right right now in the !vector_callback case; all three teardown functions it calls should be no-ops. But to guard against future regressions it's useful to call it anyway, and for it to explicitly check for xen_have_vector_callback before calling those additional functions. Signed-off-by: David Woodhouse Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20210106153958.584169-6-dwmw2@infradead.org Signed-off-by: Juergen Gross --- arch/x86/xen/smp_hvm.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index f5e7db4f82abb..056430a1080bb 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -33,9 +33,11 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) int cpu; native_smp_prepare_cpus(max_cpus); - WARN_ON(xen_smp_intr_init(0)); - xen_init_lock_cpu(0); + if (xen_have_vector_callback) { + WARN_ON(xen_smp_intr_init(0)); + xen_init_lock_cpu(0); + } for_each_possible_cpu(cpu) { if (cpu == 0) @@ -50,9 +52,11 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) static void xen_hvm_cpu_die(unsigned int cpu) { if (common_cpu_die(cpu) == 0) { - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - xen_teardown_timer(cpu); + if (xen_have_vector_callback) { + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + xen_teardown_timer(cpu); + } } } #else @@ -64,14 +68,17 @@ static void xen_hvm_cpu_die(unsigned int cpu) void __init xen_hvm_smp_init(void) { - if (!xen_have_vector_callback) + smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; + smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; + smp_ops.smp_cpus_done = xen_smp_cpus_done; + smp_ops.cpu_die = xen_hvm_cpu_die; + + if (!xen_have_vector_callback) { + nopvspin = true; return; + } - smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; smp_ops.smp_send_reschedule = xen_smp_send_reschedule; - smp_ops.cpu_die = xen_hvm_cpu_die; smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; - smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu; - smp_ops.smp_cpus_done = xen_smp_cpus_done; } -- GitLab From bcd7059abc19e6ec5b2260dff6a008fb99c4eef9 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 13 Jan 2021 02:11:23 +0800 Subject: [PATCH 0646/2012] ASoC: SOF: Intel: hda: Resume codec to do jack detection Instead of queueing jackpoll_work, runtime resume the codec to let it use different jack detection methods based on jackpoll_interval. This partially matches SOF driver's behavior with commit a6e7d0a4bdb0 ("ALSA: hda: fix jack detection with Realtek codecs when in D3"), the difference is SOF unconditionally resumes the codec. Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20210112181128.1229827-1-kai.heng.feng@canonical.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-codec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/sof/intel/hda-codec.c b/sound/soc/sof/intel/hda-codec.c index 6875fa570c2c5..df59c79cfdfc3 100644 --- a/sound/soc/sof/intel/hda-codec.c +++ b/sound/soc/sof/intel/hda-codec.c @@ -93,8 +93,7 @@ void hda_codec_jack_check(struct snd_sof_dev *sdev) * has been recorded in STATESTS */ if (codec->jacktbl.used) - schedule_delayed_work(&codec->jackpoll_work, - codec->jackpoll_interval); + pm_request_resume(&codec->core.dev); } #else void hda_codec_jack_wake_enable(struct snd_sof_dev *sdev) {} -- GitLab From 31ba0c0776027896553bd8477baff7c8b5d95699 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 13 Jan 2021 02:11:24 +0800 Subject: [PATCH 0647/2012] ASoC: SOF: Intel: hda: Modify existing helper to disable WAKEEN Modify hda_codec_jack_wake_enable() to also support disable WAKEEN. In addition, this patch also moves the WAKEEN disablement call out of hda_codec_jack_check() into hda_codec_jack_wake_enable(). This is a preparation for next patch. No functional change intended. Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20210112181128.1229827-2-kai.heng.feng@canonical.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-codec.c | 16 +++++++--------- sound/soc/sof/intel/hda-dsp.c | 6 ++++-- sound/soc/sof/intel/hda.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/sound/soc/sof/intel/hda-codec.c b/sound/soc/sof/intel/hda-codec.c index df59c79cfdfc3..b7e9931ead57a 100644 --- a/sound/soc/sof/intel/hda-codec.c +++ b/sound/soc/sof/intel/hda-codec.c @@ -63,16 +63,18 @@ static int hda_codec_load_module(struct hda_codec *codec) } /* enable controller wake up event for all codecs with jack connectors */ -void hda_codec_jack_wake_enable(struct snd_sof_dev *sdev) +void hda_codec_jack_wake_enable(struct snd_sof_dev *sdev, bool enable) { struct hda_bus *hbus = sof_to_hbus(sdev); struct hdac_bus *bus = sof_to_bus(sdev); struct hda_codec *codec; unsigned int mask = 0; - list_for_each_codec(codec, hbus) - if (codec->jacktbl.used) - mask |= BIT(codec->core.addr); + if (enable) { + list_for_each_codec(codec, hbus) + if (codec->jacktbl.used) + mask |= BIT(codec->core.addr); + } snd_hdac_chip_updatew(bus, WAKEEN, STATESTS_INT_MASK, mask); } @@ -81,12 +83,8 @@ void hda_codec_jack_wake_enable(struct snd_sof_dev *sdev) void hda_codec_jack_check(struct snd_sof_dev *sdev) { struct hda_bus *hbus = sof_to_hbus(sdev); - struct hdac_bus *bus = sof_to_bus(sdev); struct hda_codec *codec; - /* disable controller Wake Up event*/ - snd_hdac_chip_updatew(bus, WAKEEN, STATESTS_INT_MASK, 0); - list_for_each_codec(codec, hbus) /* * Wake up all jack-detecting codecs regardless whether an event @@ -96,7 +94,7 @@ void hda_codec_jack_check(struct snd_sof_dev *sdev) pm_request_resume(&codec->core.dev); } #else -void hda_codec_jack_wake_enable(struct snd_sof_dev *sdev) {} +void hda_codec_jack_wake_enable(struct snd_sof_dev *sdev, bool enable) {} void hda_codec_jack_check(struct snd_sof_dev *sdev) {} #endif /* CONFIG_SND_SOC_SOF_HDA_AUDIO_CODEC */ EXPORT_SYMBOL_NS(hda_codec_jack_wake_enable, SND_SOC_SOF_HDA_AUDIO_CODEC); diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c index 2b001151fe376..7d00107cf3b27 100644 --- a/sound/soc/sof/intel/hda-dsp.c +++ b/sound/soc/sof/intel/hda-dsp.c @@ -617,7 +617,7 @@ static int hda_suspend(struct snd_sof_dev *sdev, bool runtime_suspend) #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) if (runtime_suspend) - hda_codec_jack_wake_enable(sdev); + hda_codec_jack_wake_enable(sdev, true); /* power down all hda link */ snd_hdac_ext_bus_link_power_down_all(bus); @@ -683,8 +683,10 @@ static int hda_resume(struct snd_sof_dev *sdev, bool runtime_resume) #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) /* check jack status */ - if (runtime_resume) + if (runtime_resume) { + hda_codec_jack_wake_enable(sdev, false); hda_codec_jack_check(sdev); + } /* turn off the links that were off before suspend */ list_for_each_entry(hlink, &bus->hlink_list, list) { diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h index 9ec8ae0fd6495..a3b6f3e9121c4 100644 --- a/sound/soc/sof/intel/hda.h +++ b/sound/soc/sof/intel/hda.h @@ -650,7 +650,7 @@ void sof_hda_bus_init(struct hdac_bus *bus, struct device *dev); */ void hda_codec_probe_bus(struct snd_sof_dev *sdev, bool hda_codec_use_common_hdmi); -void hda_codec_jack_wake_enable(struct snd_sof_dev *sdev); +void hda_codec_jack_wake_enable(struct snd_sof_dev *sdev, bool enable); void hda_codec_jack_check(struct snd_sof_dev *sdev); #endif /* CONFIG_SND_SOC_SOF_HDA */ -- GitLab From ef4d764c99f792b725d4754a3628830f094f5c58 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 13 Jan 2021 02:11:25 +0800 Subject: [PATCH 0648/2012] ASoC: SOF: Intel: hda: Avoid checking jack on system suspend System takes a very long time to suspend after commit 215a22ed31a1 ("ALSA: hda: Refactor codec PM to use direct-complete optimization"): [ 90.065964] PM: suspend entry (s2idle) [ 90.067337] Filesystems sync: 0.001 seconds [ 90.185758] Freezing user space processes ... (elapsed 0.002 seconds) done. [ 90.188713] OOM killer disabled. [ 90.188714] Freezing remaining freezable tasks ... (elapsed 0.001 seconds) done. [ 90.190024] printk: Suspending console(s) (use no_console_suspend to debug) [ 90.904912] intel_pch_thermal 0000:00:12.0: CPU-PCH is cool [49C], continue to suspend [ 321.262505] snd_hda_codec_realtek ehdaudio0D0: Unable to sync register 0x2b8000. -5 [ 328.426919] snd_hda_codec_realtek ehdaudio0D0: Unable to sync register 0x2b8000. -5 [ 329.490933] ACPI: EC: interrupt blocked That commit keeps the codec suspended during the system suspend. However, mute/micmute LED will clear codec's direct-complete flag by dpm_clear_superiors_direct_complete(). This doesn't play well with SOF driver. When its runtime resume is called for system suspend, hda_codec_jack_check() schedules jackpoll_work which uses snd_hdac_is_power_on() to check whether codec is suspended. Because the direct-complete path isn't taken, pm_runtime_disable() isn't called so snd_hdac_is_power_on() returns false and jackpoll continues to run, and snd_hda_power_up_pm() cannot power up an already suspended codec in multiple attempts, causes the long delay on system suspend: if (dev->power.direct_complete) { if (pm_runtime_status_suspended(dev)) { pm_runtime_disable(dev); if (pm_runtime_status_suspended(dev)) { pm_dev_dbg(dev, state, "direct-complete "); goto Complete; } pm_runtime_enable(dev); } dev->power.direct_complete = false; } When direct-complete path is taken, snd_hdac_is_power_on() returns true and hda_jackpoll_work() is skipped by accident. So this is still not correct. If we were to use snd_hdac_is_power_on() in system PM path, pm_runtime_status_suspended() should be used instead of pm_runtime_suspended(), otherwise pm_runtime_{enable,disable}() may change the outcome of snd_hdac_is_power_on(). Because devices suspend in reverse order (i.e. child first), it doesn't make much sense to resume an already suspended codec from audio controller. So avoid the issue by making sure jackpoll isn't used in system PM process. Fixes: 215a22ed31a1 ("ALSA: hda: Refactor codec PM to use direct-complete optimization") Signed-off-by: Kai-Heng Feng Reviewed-by: Kai Vehmanen Link: https://lore.kernel.org/r/20210112181128.1229827-3-kai.heng.feng@canonical.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-dsp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c index 7d00107cf3b27..1c5e05b88a90d 100644 --- a/sound/soc/sof/intel/hda-dsp.c +++ b/sound/soc/sof/intel/hda-dsp.c @@ -685,7 +685,8 @@ static int hda_resume(struct snd_sof_dev *sdev, bool runtime_resume) /* check jack status */ if (runtime_resume) { hda_codec_jack_wake_enable(sdev, false); - hda_codec_jack_check(sdev); + if (sdev->system_suspend_target == SOF_SUSPEND_NONE) + hda_codec_jack_check(sdev); } /* turn off the links that were off before suspend */ -- GitLab From b4411616c26f26c4017b8fa4d3538b1a02028733 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 13 Jan 2021 12:42:24 +0000 Subject: [PATCH 0649/2012] io_uring: fix null-deref in io_disable_sqo_submit general protection fault, probably for non-canonical address 0xdffffc0000000022: 0000 [#1] KASAN: null-ptr-deref in range [0x0000000000000110-0x0000000000000117] RIP: 0010:io_ring_set_wakeup_flag fs/io_uring.c:6929 [inline] RIP: 0010:io_disable_sqo_submit+0xdb/0x130 fs/io_uring.c:8891 Call Trace: io_uring_create fs/io_uring.c:9711 [inline] io_uring_setup+0x12b1/0x38e0 fs/io_uring.c:9739 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 io_disable_sqo_submit() might be called before user rings were allocated, don't do io_ring_set_wakeup_flag() in those cases. Reported-by: syzbot+ab412638aeb652ded540@syzkaller.appspotmail.com Fixes: d9d05217cb69 ("io_uring: stop SQPOLL submit on creator's death") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index b0e6d8e607a33..66db2c46ab824 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8895,7 +8895,8 @@ static void io_disable_sqo_submit(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); /* make sure callers enter the ring to get error */ - io_ring_set_wakeup_flag(ctx); + if (ctx->rings) + io_ring_set_wakeup_flag(ctx); } /* -- GitLab From 06585c497b55045ec21aa8128e340f6a6587351c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 13 Jan 2021 12:42:25 +0000 Subject: [PATCH 0650/2012] io_uring: do sqo disable on install_fd error WARNING: CPU: 0 PID: 8494 at fs/io_uring.c:8717 io_ring_ctx_wait_and_kill+0x4f2/0x600 fs/io_uring.c:8717 Call Trace: io_uring_release+0x3e/0x50 fs/io_uring.c:8759 __fput+0x283/0x920 fs/file_table.c:280 task_work_run+0xdd/0x190 kernel/task_work.c:140 tracehook_notify_resume include/linux/tracehook.h:189 [inline] exit_to_user_mode_loop kernel/entry/common.c:174 [inline] exit_to_user_mode_prepare+0x249/0x250 kernel/entry/common.c:201 __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline] syscall_exit_to_user_mode+0x19/0x50 kernel/entry/common.c:302 entry_SYSCALL_64_after_hwframe+0x44/0xa9 failed io_uring_install_fd() is a special case, we don't do io_ring_ctx_wait_and_kill() directly but defer it to fput, though still need to io_disable_sqo_submit() before. note: it doesn't fix any real problem, just a warning. That's because sqring won't be available to the userspace in this case and so SQPOLL won't submit anything. Reported-by: syzbot+9c9c35374c0ecac06516@syzkaller.appspotmail.com Fixes: d9d05217cb69 ("io_uring: stop SQPOLL submit on creator's death") Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 66db2c46ab824..372be9caf3402 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9708,6 +9708,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, */ ret = io_uring_install_fd(ctx, file); if (ret < 0) { + io_disable_sqo_submit(ctx); /* fput will clean it up */ fput(file); return ret; -- GitLab From 5e941fc033e411118fb3a7d9e0b97f8cf702cd39 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Wed, 13 Jan 2021 17:56:29 +0200 Subject: [PATCH 0651/2012] ALSA: hda: Add AlderLake-P PCI ID and HDMI codec vid Add HD Audio PCI ID and HDMI codec vendor ID for Intel AlderLake-P. Signed-off-by: Kai Vehmanen Reviewed-by: Ranjani Sridharan Reviewed-by: Pierre-Louis Bossart Reviewed-by: Guennadi Liakhovetski Link: https://lore.kernel.org/r/20210113155629.4097057-1-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 3 +++ sound/pci/hda/patch_hdmi.c | 1 + 2 files changed, 4 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index e4dd2ff5e4737..8d568277088a1 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2507,6 +2507,9 @@ static const struct pci_device_id azx_ids[] = { /* Alderlake-S */ { PCI_DEVICE(0x8086, 0x7ad0), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + /* Alderlake-P */ + { PCI_DEVICE(0x8086, 0x51c8), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Elkhart Lake */ { PCI_DEVICE(0x8086, 0x4b55), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 74d246a0dc6de..97adff0cbcab4 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -4346,6 +4346,7 @@ HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862814, "DG1 HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862815, "Alderlake HDMI", patch_i915_tgl_hdmi), +HDA_CODEC_ENTRY(0x8086281c, "Alderlake-P HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862816, "Rocketlake HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x8086281b, "Elkhartlake HDMI", patch_i915_icl_hdmi), -- GitLab From 1e249cb5b7fc09ff216aa5a12f6c302e434e88f9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 12 Jan 2021 11:02:43 -0800 Subject: [PATCH 0652/2012] fs: fix lazytime expiration handling in __writeback_single_inode() When lazytime is enabled and an inode is being written due to its in-memory updated timestamps having expired, either due to a sync() or syncfs() system call or due to dirtytime_expire_interval having elapsed, the VFS needs to inform the filesystem so that the filesystem can copy the inode's timestamps out to the on-disk data structures. This is done by __writeback_single_inode() calling mark_inode_dirty_sync(), which then calls ->dirty_inode(I_DIRTY_SYNC). However, this occurs after __writeback_single_inode() has already cleared the dirty flags from ->i_state. This causes two bugs: - mark_inode_dirty_sync() redirties the inode, causing it to remain dirty. This wastefully causes the inode to be written twice. But more importantly, it breaks cases where sync_filesystem() is expected to clean dirty inodes. This includes the FS_IOC_REMOVE_ENCRYPTION_KEY ioctl (as reported at https://lore.kernel.org/r/20200306004555.GB225345@gmail.com), as well as possibly filesystem freezing (freeze_super()). - Since ->i_state doesn't contain I_DIRTY_TIME when ->dirty_inode() is called from __writeback_single_inode() for lazytime expiration, xfs_fs_dirty_inode() ignores the notification. (XFS only cares about lazytime expirations, and it assumes that i_state will contain I_DIRTY_TIME during those.) Therefore, lazy timestamps aren't persisted by sync(), syncfs(), or dirtytime_expire_interval on XFS. Fix this by moving the call to mark_inode_dirty_sync() to earlier in __writeback_single_inode(), before the dirty flags are cleared from i_state. This makes filesystems be properly notified of the timestamp expiration, and it avoids incorrectly redirtying the inode. This fixes xfstest generic/580 (which tests FS_IOC_REMOVE_ENCRYPTION_KEY) when run on ext4 or f2fs with lazytime enabled. It also fixes the new lazytime xfstest I've proposed, which reproduces the above-mentioned XFS bug (https://lore.kernel.org/r/20210105005818.92978-1-ebiggers@kernel.org). Alternatively, we could call ->dirty_inode(I_DIRTY_SYNC) directly. But due to the introduction of I_SYNC_QUEUED, mark_inode_dirty_sync() is the right thing to do because mark_inode_dirty_sync() now knows not to move the inode to a writeback list if it is currently queued for sync. Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option") Cc: stable@vger.kernel.org Depends-on: 5afced3bf281 ("writeback: Avoid skipping inode writeback") Link: https://lore.kernel.org/r/20210112190253.64307-2-ebiggers@kernel.org Suggested-by: Jan Kara Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Eric Biggers Signed-off-by: Jan Kara --- fs/fs-writeback.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index acfb55834af23..c41cb887eb7d3 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1474,21 +1474,25 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) } /* - * Some filesystems may redirty the inode during the writeback - * due to delalloc, clear dirty metadata flags right before - * write_inode() + * If the inode has dirty timestamps and we need to write them, call + * mark_inode_dirty_sync() to notify the filesystem about it and to + * change I_DIRTY_TIME into I_DIRTY_SYNC. */ - spin_lock(&inode->i_lock); - - dirty = inode->i_state & I_DIRTY; if ((inode->i_state & I_DIRTY_TIME) && - ((dirty & I_DIRTY_INODE) || - wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync || + (wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync || time_after(jiffies, inode->dirtied_time_when + dirtytime_expire_interval * HZ))) { - dirty |= I_DIRTY_TIME; trace_writeback_lazytime(inode); + mark_inode_dirty_sync(inode); } + + /* + * Some filesystems may redirty the inode during the writeback + * due to delalloc, clear dirty metadata flags right before + * write_inode() + */ + spin_lock(&inode->i_lock); + dirty = inode->i_state & I_DIRTY; inode->i_state &= ~dirty; /* @@ -1509,8 +1513,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) spin_unlock(&inode->i_lock); - if (dirty & I_DIRTY_TIME) - mark_inode_dirty_sync(inode); /* Don't write the inode if only I_DIRTY_PAGES was set */ if (dirty & ~I_DIRTY_PAGES) { int err = write_inode(inode, wbc); -- GitLab From 9c25af250214e45f6d1c21ff6239a1ffeeedf20e Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Wed, 13 Jan 2021 17:07:15 +0200 Subject: [PATCH 0653/2012] ASoC: SOF: Intel: fix page fault at probe if i915 init fails The earlier commit to fix runtime PM in case i915 init fails, introduces a possibility to hit a page fault. snd_hdac_ext_bus_device_exit() is designed to be called from dev.release(). Calling it outside device reference counting, is not safe and may lead to calling the device_exit() function twice. Additionally, as part of ext_bus_device_init(), the device is also registered with snd_hdac_device_register(). Thus before calling device_exit(), the device must be removed from device hierarchy first. Fix the issue by rolling back init actions by calling hdac_device_unregister() and then releasing device with put_device(). This matches with existing code in hdac-ext module. To complete the fix, add handling for the case where hda_codec_load_module() returns -ENODEV, and clean up the hdac_ext resources also in this case. In future work, hdac-ext interface should be extended to allow clients more flexibility to handle the life-cycle of individual devices, beyond just the current snd_hdac_ext_bus_device_remove(), which removes all devices. BugLink: https://github.com/thesofproject/linux/issues/2646 Reported-by: Jaroslav Kysela Fixes: 6c63c954e1c5 ("ASoC: SOF: fix a runtime pm issue in SOF when HDMI codec doesn't work") Signed-off-by: Kai Vehmanen Reviewed-by: Rander Wang Reviewed-by: Libin Yang Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20210113150715.3992635-1-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-codec.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sound/soc/sof/intel/hda-codec.c b/sound/soc/sof/intel/hda-codec.c index b7e9931ead57a..6744318de612e 100644 --- a/sound/soc/sof/intel/hda-codec.c +++ b/sound/soc/sof/intel/hda-codec.c @@ -153,7 +153,8 @@ static int hda_codec_probe(struct snd_sof_dev *sdev, int address, if (!hdev->bus->audio_component) { dev_dbg(sdev->dev, "iDisp hw present but no driver\n"); - goto error; + ret = -ENOENT; + goto out; } hda_priv->need_display_power = true; } @@ -170,24 +171,23 @@ static int hda_codec_probe(struct snd_sof_dev *sdev, int address, * other return codes without modification */ if (ret == 0) - goto error; + ret = -ENOENT; } - return ret; - -error: - snd_hdac_ext_bus_device_exit(hdev); - return -ENOENT; - +out: + if (ret < 0) { + snd_hdac_device_unregister(hdev); + put_device(&hdev->dev); + } #else hdev = devm_kzalloc(sdev->dev, sizeof(*hdev), GFP_KERNEL); if (!hdev) return -ENOMEM; ret = snd_hdac_ext_bus_device_init(&hbus->core, address, hdev, HDA_DEV_ASOC); +#endif return ret; -#endif } /* Codec initialization */ -- GitLab From d52e419ac8b50c8bef41b398ed13528e75d7ad48 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 Jan 2021 15:23:51 +0000 Subject: [PATCH 0654/2012] rxrpc: Fix handling of an unsupported token type in rxrpc_read() Clang static analysis reports the following: net/rxrpc/key.c:657:11: warning: Assigned value is garbage or undefined toksize = toksizes[tok++]; ^ ~~~~~~~~~~~~~~~ rxrpc_read() contains two consecutive loops. The first loop calculates the token sizes and stores the results in toksizes[] and the second one uses the array. When there is an error in identifying the token in the first loop, the token is skipped, no change is made to the toksizes[] array. When the same error happens in the second loop, the token is not skipped. This will cause the toksizes[] array to be out of step and will overrun past the calculated sizes. Fix this by making both loops log a message and return an error in this case. This should only happen if a new token type is incompletely implemented, so it should normally be impossible to trigger this. Fixes: 9a059cd5ca7d ("rxrpc: Downgrade the BUG() for unsupported token type in rxrpc_read()") Reported-by: Tom Rix Signed-off-by: David Howells Reviewed-by: Tom Rix Link: https://lore.kernel.org/r/161046503122.2445787.16714129930607546635.stgit@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/key.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 9631aa8543b51..8d2073e0e3da5 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -598,7 +598,7 @@ static long rxrpc_read(const struct key *key, default: /* we have a ticket we can't encode */ pr_err("Unsupported key token type (%u)\n", token->security_index); - continue; + return -ENOPKG; } _debug("token[%u]: toksize=%u", ntoks, toksize); @@ -674,7 +674,9 @@ static long rxrpc_read(const struct key *key, break; default: - break; + pr_err("Unsupported key token type (%u)\n", + token->security_index); + return -ENOPKG; } ASSERTCMP((unsigned long)xdr - (unsigned long)oldxdr, ==, -- GitLab From a95d25dd7b94a5ba18246da09b4218f132fed60e Mon Sep 17 00:00:00 2001 From: Baptiste Lepers Date: Tue, 12 Jan 2021 15:59:15 +0000 Subject: [PATCH 0655/2012] rxrpc: Call state should be read with READ_ONCE() under some circumstances The call state may be changed at any time by the data-ready routine in response to received packets, so if the call state is to be read and acted upon several times in a function, READ_ONCE() must be used unless the call state lock is held. As it happens, we used READ_ONCE() to read the state a few lines above the unmarked read in rxrpc_input_data(), so use that value rather than re-reading it. Fixes: a158bdd3247b ("rxrpc: Fix call timeouts") Signed-off-by: Baptiste Lepers Signed-off-by: David Howells Link: https://lore.kernel.org/r/161046715522.2450566.488819910256264150.stgit@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 667c44aa5a63c..dc201363f2c48 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -430,7 +430,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) return; } - if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) { + if (state == RXRPC_CALL_SERVER_RECV_REQUEST) { unsigned long timo = READ_ONCE(call->next_req_timo); unsigned long now, expect_req_by; -- GitLab From 77b6ec01c29aade01701aa30bf1469acc7f2be76 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Tue, 5 Jan 2021 12:21:26 -0800 Subject: [PATCH 0656/2012] cifs: check pointer before freeing clang static analysis reports this problem dfs_cache.c:591:2: warning: Argument to kfree() is a constant address (18446744073709551614), which is not memory allocated by malloc() kfree(vi); ^~~~~~~~~ In dfs_cache_del_vol() the volume info pointer 'vi' being freed is the return of a call to find_vol(). The large constant address is find_vol() returning an error. Add an error check to dfs_cache_del_vol() similar to the one done in dfs_cache_update_vol(). Fixes: 54be1f6c1c37 ("cifs: Add DFS cache routines") Signed-off-by: Tom Rix Reviewed-by: Nathan Chancellor CC: # v5.0+ Signed-off-by: Steve French --- fs/cifs/dfs_cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 6ad6ba5f6ebee..0fdb0de7ff861 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -1260,7 +1260,8 @@ void dfs_cache_del_vol(const char *fullpath) vi = find_vol(fullpath); spin_unlock(&vol_list_lock); - kref_put(&vi->refcnt, vol_release); + if (!IS_ERR(vi)) + kref_put(&vi->refcnt, vol_release); } /** -- GitLab From 2659d3bff3e1b000f49907d0839178b101a89887 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 13 Jan 2021 14:16:16 -0300 Subject: [PATCH 0657/2012] cifs: fix interrupted close commands Retry close command if it gets interrupted to not leak open handles on the server. Signed-off-by: Paulo Alcantara (SUSE) Reported-by: Duncan Findlay Suggested-by: Pavel Shilovsky Fixes: 6988a619f5b7 ("cifs: allow syscalls to be restarted in __smb_send_rqst()") Cc: stable@vger.kernel.org Reviewd-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 067eb44c7baa8..794fc3b68b4f9 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3248,7 +3248,7 @@ close_exit: free_rsp_buf(resp_buftype, rsp); /* retry close in a worker thread if this one is interrupted */ - if (rc == -EINTR) { + if (is_interrupt_error(rc)) { int tmp_rc; tmp_rc = smb2_handle_cancelled_close(tcon, persistent_fid, -- GitLab From c13e7af042270724b42a466edc48a70a43f571f2 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Tue, 12 Jan 2021 01:13:40 -0800 Subject: [PATCH 0658/2012] fs: cifs: remove unneeded variable in smb3_fs_context_dup 'rc' in smb3_fs_context_dup is not used and can be removed. Signed-off-by: Menglong Dong Reviewed-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/fs_context.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 0afccbbed2e65..076bcadc756a7 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -303,8 +303,6 @@ do { \ int smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx) { - int rc = 0; - memcpy(new_ctx, ctx, sizeof(*ctx)); new_ctx->prepath = NULL; new_ctx->mount_options = NULL; @@ -327,7 +325,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx DUP_CTX_STR(nodename); DUP_CTX_STR(iocharset); - return rc; + return 0; } static int -- GitLab From ed6b1920f84bc5c3d666dc383ff3bbc60f0f62a5 Mon Sep 17 00:00:00 2001 From: YANG LI Date: Mon, 11 Jan 2021 17:15:28 +0800 Subject: [PATCH 0659/2012] cifs: connect: style: Simplify bool comparison Fix the following coccicheck warning: ./fs/cifs/connect.c:3740:6-21: WARNING: Comparison of 0/1 to bool variable Signed-off-by: YANG LI Reported-by: Abaci Robot Signed-off-by: Steve French --- fs/cifs/connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index b9df85506938d..5d39129406ea6 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3740,7 +3740,7 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, if (!ses->binding) { ses->capabilities = server->capabilities; - if (linuxExtEnabled == 0) + if (!linuxExtEnabled) ses->capabilities &= (~server->vals->cap_unix); if (ses->auth_key.response) { -- GitLab From e54fd0716c3db20c0cba73fee2c3a4274b08c24e Mon Sep 17 00:00:00 2001 From: YANG LI Date: Wed, 30 Dec 2020 14:35:45 +0800 Subject: [PATCH 0660/2012] cifs: style: replace one-element array with flexible-array There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use "flexible array members"[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9/process/ deprecated.html#zero-length-and-one-element-arrays Signed-off-by: YANG LI Reported-by: Abaci Signed-off-by: Steve French --- fs/cifs/smb2pdu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 204a622b89ed3..d85edf5d14294 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -424,7 +424,7 @@ struct smb2_rdma_transform_capabilities_context { __le16 TransformCount; __u16 Reserved1; __u32 Reserved2; - __le16 RDMATransformIds[1]; + __le16 RDMATransformIds[]; } __packed; /* Signing algorithms */ -- GitLab From b42b3a2744b3e8f427de79896720c72823af91ad Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 12 Jan 2021 10:16:43 +0100 Subject: [PATCH 0661/2012] can: isotp: isotp_getname(): fix kernel information leak Initialize the sockaddr_can structure to prevent a data leak to user space. Suggested-by: Cong Wang Reported-by: syzbot+057884e2f453e8afebc8@syzkaller.appspotmail.com Fixes: e057dd3fc20f ("can: add ISO 15765-2:2016 transport protocol") Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20210112091643.11789-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/can/isotp.c b/net/can/isotp.c index 7839c3b9e5bea..3ef7f78e553bc 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1155,6 +1155,7 @@ static int isotp_getname(struct socket *sock, struct sockaddr *uaddr, int peer) if (peer) return -EOPNOTSUPP; + memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; addr->can_ifindex = so->ifindex; addr->can_addr.tp.rx_id = so->rxid; -- GitLab From ca4c6ebeeb50112f5178f14bfb6d9e8ddf148545 Mon Sep 17 00:00:00 2001 From: Qinglang Miao Date: Wed, 13 Jan 2021 15:31:00 +0800 Subject: [PATCH 0662/2012] can: mcp251xfd: mcp251xfd_handle_rxif_one(): fix wrong NULL pointer check If alloc_canfd_skb() returns NULL, 'cfg' is an uninitialized variable, so we should check 'skb' rather than 'cfd' after calling alloc_canfd_skb(priv->ndev, &cfd). Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Signed-off-by: Qinglang Miao Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20210113073100.79552-1-miaoqinglang@huawei.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 36235afb0bc66..f07e8b737d31e 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1491,7 +1491,7 @@ mcp251xfd_handle_rxif_one(struct mcp251xfd_priv *priv, else skb = alloc_can_skb(priv->ndev, (struct can_frame **)&cfd); - if (!cfd) { + if (!skb) { stats->rx_dropped++; return 0; } -- GitLab From e4ea77f8e53f9accb9371fba34c189d0447ecce0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 11 Jan 2021 09:16:11 +0100 Subject: [PATCH 0663/2012] ALSA: usb-audio: Always apply the hw constraints for implicit fb sync Since the commit 5a6c3e11c9c9 ("ALSA: usb-audio: Add hw constraint for implicit fb sync"), we apply the hw constraints for the implicit feedback sync to make the secondary open aligned with the already opened stream setup. This change assumed that the secondary open is performed after the first stream has been already set up, and adds the hw constraints to sync with the first stream's parameters only when the EP setup for the first stream was confirmed at the open time. However, most of applications handling the full-duplex operations do open both playback and capture streams at first, then set up both streams. This results in skipping the additional hw constraints since the counter-part stream hasn't been set up yet at the open of the second stream, and it eventually leads to "incompatible EP" error in the end. This patch corrects the behavior by always applying the hw constraints for the implicit fb sync. The hw constraint rules are defined so that they check the sync EP dynamically at each invocation, instead. This covers the concurrent stream setups better and lets the hw refine calls resolving to the right configuration. Also this patch corrects a minor error that has existed in the debug print that isn't built as default. Fixes: 5a6c3e11c9c9 ("ALSA: usb-audio: Add hw constraint for implicit fb sync") Link: https://lore.kernel.org/r/20210111081611.12790-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 171 ++++++++++++++++++++++++++++++------------------ 1 file changed, 108 insertions(+), 63 deletions(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 56079901769fd..f71965bf815fd 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -663,7 +663,7 @@ static int hw_check_valid_format(struct snd_usb_substream *subs, check_fmts.bits[1] = (u32)(fp->formats >> 32); snd_mask_intersect(&check_fmts, fmts); if (snd_mask_empty(&check_fmts)) { - hwc_debug(" > check: no supported format %d\n", fp->format); + hwc_debug(" > check: no supported format 0x%llx\n", fp->formats); return 0; } /* check the channels */ @@ -775,24 +775,11 @@ static int hw_rule_channels(struct snd_pcm_hw_params *params, return apply_hw_params_minmax(it, rmin, rmax); } -static int hw_rule_format(struct snd_pcm_hw_params *params, - struct snd_pcm_hw_rule *rule) +static int apply_hw_params_format_bits(struct snd_mask *fmt, u64 fbits) { - struct snd_usb_substream *subs = rule->private; - const struct audioformat *fp; - struct snd_mask *fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); - u64 fbits; u32 oldbits[2]; int changed; - hwc_debug("hw_rule_format: %x:%x\n", fmt->bits[0], fmt->bits[1]); - fbits = 0; - list_for_each_entry(fp, &subs->fmt_list, list) { - if (!hw_check_valid_format(subs, params, fp)) - continue; - fbits |= fp->formats; - } - oldbits[0] = fmt->bits[0]; oldbits[1] = fmt->bits[1]; fmt->bits[0] &= (u32)fbits; @@ -806,6 +793,24 @@ static int hw_rule_format(struct snd_pcm_hw_params *params, return changed; } +static int hw_rule_format(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + struct snd_usb_substream *subs = rule->private; + const struct audioformat *fp; + struct snd_mask *fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); + u64 fbits; + + hwc_debug("hw_rule_format: %x:%x\n", fmt->bits[0], fmt->bits[1]); + fbits = 0; + list_for_each_entry(fp, &subs->fmt_list, list) { + if (!hw_check_valid_format(subs, params, fp)) + continue; + fbits |= fp->formats; + } + return apply_hw_params_format_bits(fmt, fbits); +} + static int hw_rule_period_time(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule) { @@ -833,64 +838,92 @@ static int hw_rule_period_time(struct snd_pcm_hw_params *params, return apply_hw_params_minmax(it, pmin, UINT_MAX); } -/* apply PCM hw constraints from the concurrent sync EP */ -static int apply_hw_constraint_from_sync(struct snd_pcm_runtime *runtime, - struct snd_usb_substream *subs) +/* get the EP or the sync EP for implicit fb when it's already set up */ +static const struct snd_usb_endpoint * +get_sync_ep_from_substream(struct snd_usb_substream *subs) { struct snd_usb_audio *chip = subs->stream->chip; - struct snd_usb_endpoint *ep; const struct audioformat *fp; - int err; + const struct snd_usb_endpoint *ep; list_for_each_entry(fp, &subs->fmt_list, list) { ep = snd_usb_get_endpoint(chip, fp->endpoint); if (ep && ep->cur_rate) - goto found; + return ep; if (!fp->implicit_fb) continue; /* for the implicit fb, check the sync ep as well */ ep = snd_usb_get_endpoint(chip, fp->sync_ep); if (ep && ep->cur_rate) - goto found; + return ep; } - return 0; + return NULL; +} + +/* additional hw constraints for implicit feedback mode */ +static int hw_rule_format_implicit_fb(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + struct snd_usb_substream *subs = rule->private; + const struct snd_usb_endpoint *ep; + struct snd_mask *fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); - found: - if (!find_format(&subs->fmt_list, ep->cur_format, ep->cur_rate, - ep->cur_channels, false, NULL)) { - usb_audio_dbg(chip, "EP 0x%x being used, but not applicable\n", - ep->ep_num); + ep = get_sync_ep_from_substream(subs); + if (!ep) return 0; - } - usb_audio_dbg(chip, "EP 0x%x being used, using fixed params:\n", - ep->ep_num); - usb_audio_dbg(chip, "rate=%d, period_size=%d, periods=%d\n", - ep->cur_rate, ep->cur_period_frames, - ep->cur_buffer_periods); + hwc_debug("applying %s\n", __func__); + return apply_hw_params_format_bits(fmt, pcm_format_to_bits(ep->cur_format)); +} - runtime->hw.formats = subs->formats; - runtime->hw.rate_min = runtime->hw.rate_max = ep->cur_rate; - runtime->hw.rates = SNDRV_PCM_RATE_KNOT; - runtime->hw.periods_min = runtime->hw.periods_max = - ep->cur_buffer_periods; +static int hw_rule_rate_implicit_fb(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + struct snd_usb_substream *subs = rule->private; + const struct snd_usb_endpoint *ep; + struct snd_interval *it; - err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, - hw_rule_channels, subs, - SNDRV_PCM_HW_PARAM_FORMAT, - SNDRV_PCM_HW_PARAM_RATE, - -1); - if (err < 0) - return err; + ep = get_sync_ep_from_substream(subs); + if (!ep) + return 0; - err = snd_pcm_hw_constraint_minmax(runtime, - SNDRV_PCM_HW_PARAM_PERIOD_SIZE, - ep->cur_period_frames, - ep->cur_period_frames); - if (err < 0) - return err; + hwc_debug("applying %s\n", __func__); + it = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE); + return apply_hw_params_minmax(it, ep->cur_rate, ep->cur_rate); +} + +static int hw_rule_period_size_implicit_fb(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + struct snd_usb_substream *subs = rule->private; + const struct snd_usb_endpoint *ep; + struct snd_interval *it; - return 1; /* notify the finding */ + ep = get_sync_ep_from_substream(subs); + if (!ep) + return 0; + + hwc_debug("applying %s\n", __func__); + it = hw_param_interval(params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE); + return apply_hw_params_minmax(it, ep->cur_period_frames, + ep->cur_period_frames); +} + +static int hw_rule_periods_implicit_fb(struct snd_pcm_hw_params *params, + struct snd_pcm_hw_rule *rule) +{ + struct snd_usb_substream *subs = rule->private; + const struct snd_usb_endpoint *ep; + struct snd_interval *it; + + ep = get_sync_ep_from_substream(subs); + if (!ep) + return 0; + + hwc_debug("applying %s\n", __func__); + it = hw_param_interval(params, SNDRV_PCM_HW_PARAM_PERIODS); + return apply_hw_params_minmax(it, ep->cur_buffer_periods, + ep->cur_buffer_periods); } /* @@ -899,20 +932,11 @@ static int apply_hw_constraint_from_sync(struct snd_pcm_runtime *runtime, static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substream *subs) { - struct snd_usb_audio *chip = subs->stream->chip; const struct audioformat *fp; unsigned int pt, ptmin; int param_period_time_if_needed = -1; int err; - mutex_lock(&chip->mutex); - err = apply_hw_constraint_from_sync(runtime, subs); - mutex_unlock(&chip->mutex); - if (err < 0) - return err; - if (err > 0) /* found the matching? */ - goto add_extra_rules; - runtime->hw.formats = subs->formats; runtime->hw.rate_min = 0x7fffffff; @@ -964,7 +988,6 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre if (err < 0) return err; -add_extra_rules: err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, hw_rule_channels, subs, SNDRV_PCM_HW_PARAM_FORMAT, @@ -993,6 +1016,28 @@ add_extra_rules: return err; } + /* additional hw constraints for implicit fb */ + err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT, + hw_rule_format_implicit_fb, subs, + SNDRV_PCM_HW_PARAM_FORMAT, -1); + if (err < 0) + return err; + err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, + hw_rule_rate_implicit_fb, subs, + SNDRV_PCM_HW_PARAM_RATE, -1); + if (err < 0) + return err; + err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, + hw_rule_period_size_implicit_fb, subs, + SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1); + if (err < 0) + return err; + err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIODS, + hw_rule_periods_implicit_fb, subs, + SNDRV_PCM_HW_PARAM_PERIODS, -1); + if (err < 0) + return err; + return 0; } -- GitLab From a2e38dffcd93541914aba52b30c6a52acca35201 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 5 Jan 2021 18:04:20 -0600 Subject: [PATCH 0664/2012] objtool: Don't add empty symbols to the rbtree Building with the Clang assembler shows the following warning: arch/x86/kernel/ftrace_64.o: warning: objtool: missing symbol for insn at offset 0x16 The Clang assembler strips section symbols. That ends up giving objtool's find_func_containing() much more test coverage than normal. Turns out, find_func_containing() doesn't work so well for overlapping symbols: 2: 000000000000000e 0 NOTYPE LOCAL DEFAULT 2 fgraph_trace 3: 000000000000000f 0 NOTYPE LOCAL DEFAULT 2 trace 4: 0000000000000000 165 FUNC GLOBAL DEFAULT 2 __fentry__ 5: 000000000000000e 0 NOTYPE GLOBAL DEFAULT 2 ftrace_stub The zero-length NOTYPE symbols are inside __fentry__(), confusing the rbtree search for any __fentry__() offset coming after a NOTYPE. Try to avoid this problem by not adding zero-length symbols to the rbtree. They're rare and aren't needed in the rbtree anyway. One caveat, this actually might not end up being the right fix. Non-empty overlapping symbols, if they exist, could have the same problem. But that would need bigger changes, let's see if we can get away with the easy fix for now. Reported-by: Arnd Bergmann Acked-by: Peter Zijlstra (Intel) Signed-off-by: Josh Poimboeuf --- tools/objtool/elf.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index be89c741ba9a0..f9682db33ccab 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -448,6 +448,13 @@ static int read_symbols(struct elf *elf) list_add(&sym->list, entry); elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx); elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name)); + + /* + * Don't store empty STT_NOTYPE symbols in the rbtree. They + * can exist within a function, confusing the sorting. + */ + if (!sym->len) + rb_erase(&sym->node, &sym->sec->symbol_tree); } if (stats) -- GitLab From 838b00a2260ab6ec104a2a5696e619c19e8cd9be Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Mon, 11 Jan 2021 23:05:24 -0800 Subject: [PATCH 0665/2012] net/mlx5: Add HW definition of reg_c_preserve Add capability bit to test whether reg_c value is preserved on recirculation. Signed-off-by: Paul Blakey Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 442c0160caab5..823411e288c01 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1278,7 +1278,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_a0[0x3]; u8 ece_support[0x1]; - u8 reserved_at_a4[0x7]; + u8 reserved_at_a4[0x5]; + u8 reg_c_preserve[0x1]; + u8 reserved_at_aa[0x1]; u8 log_max_srq[0x5]; u8 reserved_at_b0[0x1]; u8 uplink_follow[0x1]; -- GitLab From 21bad8da1e7728eb69bb4f4558058872cbf6b06e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 11 Jan 2021 23:05:25 -0800 Subject: [PATCH 0666/2012] net/mlx5e: Simplify condition on esw_vport_enable_qos() esw->qos.enabled will only be true if both MLX5_CAP_GEN(dev, qos) and MLX5_CAP_QOS(dev, esw_scheduling) are true. Therefore, remove them from the condition in and rely only on esw->qos.enabled. Signed-off-by: Eli Cohen Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index da901e3646564..876e6449edb33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1042,8 +1042,7 @@ static int esw_vport_enable_qos(struct mlx5_eswitch *esw, void *vport_elem; int err = 0; - if (!esw->qos.enabled || !MLX5_CAP_GEN(dev, qos) || - !MLX5_CAP_QOS(dev, esw_scheduling)) + if (!esw->qos.enabled) return 0; if (vport->qos.enabled) -- GitLab From af4c2fab3ff4318b85e7e0ffe86e76d425cc7533 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 11 Jan 2021 23:05:26 -0800 Subject: [PATCH 0667/2012] net/mlx5: E-Switch, use new cap as condition for mpls over udp Use tunnel_stateless_mpls_over_udp instead of MLX5_FLEX_PROTO_CW_MPLS_UDP since new devices have native support for mpls over udp and do not rely on flex parser. Signed-off-by: Eli Cohen Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c index 1f95262442221..3479672e84cf4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c @@ -81,8 +81,8 @@ static int parse_tunnel(struct mlx5e_priv *priv, if (!enc_keyid.mask->keyid) return 0; - if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & - MLX5_FLEX_PROTO_CW_MPLS_UDP)) + if (!MLX5_CAP_ETH(priv->mdev, tunnel_stateless_mpls_over_udp) && + !(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_CW_MPLS_UDP)) return -EOPNOTSUPP; flow_rule_match_mpls(rule, &match); -- GitLab From 9a99c8f1253a1a5854b008aa72d5a839bac2cebe Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 11 Jan 2021 23:05:27 -0800 Subject: [PATCH 0668/2012] net/mlx5e: E-Switch, Offload all chain 0 priorities when modify header and forward action is not supported Miss path handling of tc multi chain filters (i.e. filters that are defined on chain > 0) requires the hardware to communicate to the driver the last chain that was processed. This is possible only when the hardware is capable of performing the combination of modify header and forward to table actions. Currently, if the hardware is missing this capability then the driver only offloads rules that are defined on tc chain 0 prio 1. However, this restriction can be relaxed because packets that miss from chain 0 are processed through all the priorities by tc software. Allow the offload of all the supported priorities for chain 0 even when the hardware is not capable to perform modify header and goto table actions. Signed-off-by: Jianbo Liu Reviewed-by: Oz Shlomo Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ------ drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 3 --- 2 files changed, 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 4cdf834fa74af..56aa39ac1a1cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1317,12 +1317,6 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, int err = 0; int out_index; - if (!mlx5_chains_prios_supported(esw_chains(esw)) && attr->prio != 1) { - NL_SET_ERR_MSG_MOD(extack, - "E-switch priorities unsupported, upgrade FW"); - return -EOPNOTSUPP; - } - /* We check chain range only for tc flows. * For ft flows, we checked attr->chain was originally 0 and set it to * FDB_FT_CHAIN which is outside tc range. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index 947f346bdc2d6..fa61d305990ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -141,9 +141,6 @@ u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains) u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains) { - if (!mlx5_chains_prios_supported(chains)) - return 1; - if (mlx5_chains_ignore_flow_level_supported(chains)) return UINT_MAX; -- GitLab From f822cf86af0003b40292624db91c401682cf7d65 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 11 Jan 2021 23:05:28 -0800 Subject: [PATCH 0669/2012] net/mlx5e: CT: Pass null instead of zero spec No need to pass zero spec to mlx5_add_flow_rules() as the function can handle null spec. Signed-off-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 072363e73f1ce..97bfc42e39131 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -1241,9 +1241,8 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, pre_ct->flow_rule = rule; /* add miss rule */ - memset(spec, 0, sizeof(*spec)); dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct; - rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); ct_dbg("Failed to add pre ct miss rule zone %d", zone); -- GitLab From 8d2c5e7557182b45afce07ac1806998c03f0fc21 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 11 Jan 2021 23:05:29 -0800 Subject: [PATCH 0670/2012] net/mlx5e: Remove redundant initialization to null miss_rule and prio_s args are not being referenced before assigned so there is no need to init them. Signed-off-by: Roi Dayan Reviewed-by: Oz Shlomo Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index fa61d305990ce..381325b4a863e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -538,13 +538,13 @@ mlx5_chains_create_prio(struct mlx5_fs_chains *chains, u32 chain, u32 prio, u32 level) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_flow_handle *miss_rule = NULL; + struct mlx5_flow_handle *miss_rule; struct mlx5_flow_group *miss_group; struct mlx5_flow_table *next_ft; struct mlx5_flow_table *ft; - struct prio *prio_s = NULL; struct fs_chain *chain_s; struct list_head *pos; + struct prio *prio_s; u32 *flow_group_in; int err; -- GitLab From 763e1e547f866c46831b709ec9d1222235a275b4 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 11 Jan 2021 23:05:30 -0800 Subject: [PATCH 0671/2012] net/mlx5e: CT: Remove redundant usage of zone mask The zone member is of type u16 so there is no reason to apply the zone mask on it. This is also matching the call to set a match in other places which don't need and don't apply the mask. Signed-off-by: Roi Dayan Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 97bfc42e39131..e20c1da95a33f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -705,9 +705,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT; mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule); - mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, - entry->tuple.zone & MLX5_CT_ZONE_MASK, - MLX5_CT_ZONE_MASK); + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr); if (IS_ERR(zone_rule->rule)) { -- GitLab From 3a28eda94c8c5da02e708d4dd141595883aeb2c0 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 11 Jan 2021 23:05:31 -0800 Subject: [PATCH 0672/2012] net/mlx5e: IPsec, Enclose csum logic under ipsec config All IPsec logic should be wrapped under the compile flag, including its checksum logic. Introduce an inline function in ipsec datapath header, with a corresponding stub. Signed-off-by: Tariq Toukan Reviewed-by: Raed Salem Reviewed-by: Huy Nguyen Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h | 10 ++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 3 +-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index 9df9b9a8e09ba..fabf4b6b2b848 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -87,6 +87,11 @@ static inline bool mlx5e_ipsec_is_tx_flow(struct mlx5e_accel_tx_ipsec_state *ips return ipsec_st->x; } +static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg) +{ + return eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC); +} + void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg); #else @@ -96,6 +101,11 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, struct mlx5_cqe64 *cqe) {} +static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg) +{ + return false; +} + static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return false; } #endif /* CONFIG_MLX5_EN_IPSEC */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 61ed671fe741b..74f233eece549 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -241,9 +241,8 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; sq->stats->csum_partial++; #endif - } else if (unlikely(eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC))) { + } else if (unlikely(mlx5e_ipsec_eseg_meta(eseg))) { ipsec_txwqe_build_eseg_csum(sq, skb, eseg); - } else sq->stats->csum_none++; } -- GitLab From f3bea940b12c75f8c5330b8c0ef6751b79caab5f Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 11 Jan 2021 23:05:32 -0800 Subject: [PATCH 0673/2012] net/mlx5e: IPsec, Avoid unreachable return Simple code improvement, move default return operation under the #else block. Signed-off-by: Tariq Toukan Reviewed-by: Huy Nguyen Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 1fae7fab8297e..6488098d27006 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -144,9 +144,9 @@ static inline bool mlx5e_accel_tx_is_ipsec_flow(struct mlx5e_accel_tx_state *sta { #ifdef CONFIG_MLX5_EN_IPSEC return mlx5e_ipsec_is_tx_flow(&state->ipsec); -#endif - +#else return false; +#endif } static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq, -- GitLab From a29adad5860e91c57450aed0b351b65d99e149e6 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 11 Jan 2021 23:05:33 -0800 Subject: [PATCH 0674/2012] net/mlx5e: IPsec, Inline feature_check fast-path function Feature check functions are in the TX fast-path of all SKBs, not only IPsec traffic. Move the IPsec feature check function into a header and turn it inline. Use a stub and clean the config flag condition in Eth main driver file. Signed-off-by: Tariq Toukan Reviewed-by: Raed Salem Reviewed-by: Huy Nguyen Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 14 -------------- .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h | 19 +++++++++++++++++-- .../net/ethernet/mellanox/mlx5/core/en_main.c | 2 -- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index a9b45606dbdb7..a97e8d205094d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -497,20 +497,6 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, } } -bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev, - netdev_features_t features) -{ - struct sec_path *sp = skb_sec_path(skb); - struct xfrm_state *x; - - if (sp && sp->len) { - x = sp->xvec[0]; - if (x && x->xso.offload_handle) - return true; - } - return false; -} - void mlx5e_ipsec_build_inverse_table(void) { u16 mss_inv; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index fabf4b6b2b848..3e80742a3caf5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -57,8 +57,6 @@ struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, u32 *cqe_bcnt); void mlx5e_ipsec_inverse_table_init(void); -bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev, - netdev_features_t features); void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x, struct xfrm_offload *xo); void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x, @@ -94,6 +92,21 @@ static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg) void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg); + +static inline bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev, + netdev_features_t features) +{ + struct sec_path *sp = skb_sec_path(skb); + + if (sp && sp->len) { + struct xfrm_state *x = sp->xvec[0]; + + if (x && x->xso.offload_handle) + return true; + } + return false; +} + #else static inline void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, @@ -107,6 +120,8 @@ static inline bool mlx5e_ipsec_eseg_meta(struct mlx5_wqe_eth_seg *eseg) } static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe) { return false; } +static inline bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev, + netdev_features_t features) { return false; } #endif /* CONFIG_MLX5_EN_IPSEC */ #endif /* __MLX5E_IPSEC_RXTX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1f66756f66f99..ee86b2b57f4dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4376,10 +4376,8 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb, features = vlan_features_check(skb, features); features = vxlan_features_check(skb, features); -#ifdef CONFIG_MLX5_EN_IPSEC if (mlx5e_ipsec_feature_check(skb, netdev, features)) return features; -#endif /* Validate if the tunneled packet is being offloaded by HW */ if (skb->encapsulation && -- GitLab From 224169d2a32b2fbb5fde366c6bc17b66c06fc005 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 11 Jan 2021 23:05:34 -0800 Subject: [PATCH 0675/2012] net/mlx5e: IPsec, Remove unnecessary config flag usage MLX5_IPSEC_DEV() is always defined, no need to protect it under config flag CONFIG_MLX5_EN_IPSEC, especially in slow path. Signed-off-by: Tariq Toukan Reviewed-by: Raed Salem Reviewed-by: Huy Nguyen Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 -- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ee86b2b57f4dd..f33c38629886a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2068,10 +2068,8 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, u32 buf_size = 0; int i; -#ifdef CONFIG_MLX5_EN_IPSEC if (MLX5_IPSEC_DEV(mdev)) byte_count += MLX5E_METADATA_ETHER_LEN; -#endif if (mlx5e_rx_is_linear_skb(params, xsk)) { int frag_stride; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7f5851c612181..cb8e3d2b47504 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1786,12 +1786,10 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe; -#ifdef CONFIG_MLX5_EN_IPSEC if (MLX5_IPSEC_DEV(mdev)) { netdev_err(netdev, "MPWQE RQ with IPSec offload not supported\n"); return -EINVAL; } -#endif if (!rq->handle_rx_cqe) { netdev_err(netdev, "RX handler of MPWQE RQ is not set\n"); return -EINVAL; -- GitLab From 1f02efd1bb35bee95feed6aab46d1217f29d555b Mon Sep 17 00:00:00 2001 From: Seb Laveze Date: Tue, 12 Jan 2021 15:01:22 +0100 Subject: [PATCH 0676/2012] net: stmmac: use __napi_schedule() for PREEMPT_RT Use of __napi_schedule_irqoff() is not safe with PREEMPT_RT in which hard interrupts are not disabled while running the threaded interrupt. Using __napi_schedule() works for both PREEMPT_RT and mainline Linux, just at the cost of an additional check if interrupts are disabled for mainline (since they are already disabled). Similar to the fix done for enetc commit 215602a8d212 ("enetc: use napi_schedule to be compatible with PREEMPT_RT") Signed-off-by: Seb Laveze Link: https://lore.kernel.org/r/20210112140121.1487619-1-sebastien.laveze@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5b1c12ff98c05..2d90d6856ec58 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2184,7 +2184,7 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) spin_lock_irqsave(&ch->lock, flags); stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 0); spin_unlock_irqrestore(&ch->lock, flags); - __napi_schedule_irqoff(&ch->rx_napi); + __napi_schedule(&ch->rx_napi); } } @@ -2193,7 +2193,7 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) spin_lock_irqsave(&ch->lock, flags); stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 0, 1); spin_unlock_irqrestore(&ch->lock, flags); - __napi_schedule_irqoff(&ch->tx_napi); + __napi_schedule(&ch->tx_napi); } } -- GitLab From e6dc077b7dffdc01d9c45a5a1f4caf1e51c756a3 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 12 Jan 2021 21:29:24 +0900 Subject: [PATCH 0677/2012] soc: litex: Fix compile warning when device tree is not configured The test robot reported: drivers/soc/litex/litex_soc_ctrl.c:143:34: warning: unused variable 'litex_soc_ctrl_of_match' [-Wunused-const-variable] static const struct of_device_id litex_soc_ctrl_of_match[] = { ^ 1 warning generated. As per the random config device tree is not configured causing the litex_soc_ctrl_of_match match list to not be used. This would usually mean that we cannot even use this driver as it depends on device tree, but as we also have COMPILE_TEST configured we allow it. Fix the warning by surrounding the unused variable with an ifdef CONFIG_OF. Reported-by: kernel test robot Signed-off-by: Stafford Horne --- drivers/soc/litex/litex_soc_ctrl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soc/litex/litex_soc_ctrl.c b/drivers/soc/litex/litex_soc_ctrl.c index 1217cafdfd4d1..9b07663845708 100644 --- a/drivers/soc/litex/litex_soc_ctrl.c +++ b/drivers/soc/litex/litex_soc_ctrl.c @@ -140,12 +140,13 @@ struct litex_soc_ctrl_device { void __iomem *base; }; +#ifdef CONFIG_OF static const struct of_device_id litex_soc_ctrl_of_match[] = { {.compatible = "litex,soc-controller"}, {}, }; - MODULE_DEVICE_TABLE(of, litex_soc_ctrl_of_match); +#endif /* CONFIG_OF */ static int litex_soc_ctrl_probe(struct platform_device *pdev) { -- GitLab From de11ae4f56fdc53474c08b03142860428e6c5655 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 13 Jan 2021 17:33:16 +0100 Subject: [PATCH 0678/2012] selftests/bpf: Enable cross-building Build bpftool and resolve_btfids using the host toolchain when cross-compiling, since they are executed during build to generate the selftests. Add a host build directory in order to build both host and target version of libbpf. Build host tools using $(HOSTCC) defined in Makefile.include. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210113163319.1516382-2-jean-philippe@linaro.org --- tools/testing/selftests/bpf/Makefile | 46 +++++++++++++++++++++------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index c51df6b91befe..95ce81513648d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 include ../../../../scripts/Kbuild.include include ../../../scripts/Makefile.arch +include ../../../scripts/Makefile.include CXX ?= $(CROSS_COMPILE)g++ @@ -113,7 +114,15 @@ SCRATCH_DIR := $(OUTPUT)/tools BUILD_DIR := $(SCRATCH_DIR)/build INCLUDE_DIR := $(SCRATCH_DIR)/include BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a -RESOLVE_BTFIDS := $(BUILD_DIR)/resolve_btfids/resolve_btfids +ifneq ($(CROSS_COMPILE),) +HOST_BUILD_DIR := $(BUILD_DIR)/host +HOST_SCRATCH_DIR := $(OUTPUT)/host-tools +else +HOST_BUILD_DIR := $(BUILD_DIR) +HOST_SCRATCH_DIR := $(SCRATCH_DIR) +endif +HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a +RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ @@ -135,6 +144,14 @@ $(notdir $(TEST_GEN_PROGS) \ $(TEST_GEN_PROGS_EXTENDED) \ $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; +# sort removes libbpf duplicates when not cross-building +MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \ + $(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids \ + $(INCLUDE_DIR)) +$(MAKE_DIRS): + $(call msg,MKDIR,,$@) + $(Q)mkdir -p $@ + $(OUTPUT)/%.o: %.c $(call msg,CC,,$@) $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ @@ -157,7 +174,7 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) $(Q)$(CC) -c $(CFLAGS) -o $@ $< -DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool +DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ @@ -182,10 +199,11 @@ $(OUTPUT)/test_sysctl: cgroup_helpers.c BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ - $(BPFOBJ) | $(BUILD_DIR)/bpftool + $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ - OUTPUT=$(BUILD_DIR)/bpftool/ \ - prefix= DESTDIR=$(SCRATCH_DIR)/ install + CC=$(HOSTCC) LD=$(HOSTLD) \ + OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ + prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install $(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \ -C $(BPFTOOLDIR)/Documentation \ @@ -198,9 +216,14 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers -$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(BUILD_DIR)/resolve_btfids $(INCLUDE_DIR): - $(call msg,MKDIR,,$@) - $(Q)mkdir -p $@ +ifneq ($(BPFOBJ),$(HOST_BPFOBJ)) +$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ + ../../../include/uapi/linux/bpf.h \ + | $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf + $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ + OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \ + DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers +endif $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) ifeq ($(VMLINUX_H),) @@ -211,7 +234,7 @@ else $(Q)cp "$(VMLINUX_H)" $@ endif -$(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \ +$(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ $(TOOLSDIR)/bpf/resolve_btfids/main.c \ $(TOOLSDIR)/lib/rbtree.c \ $(TOOLSDIR)/lib/zalloc.c \ @@ -219,7 +242,8 @@ $(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \ $(TOOLSDIR)/lib/ctype.c \ $(TOOLSDIR)/lib/str_error_r.c $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \ - OUTPUT=$(BUILD_DIR)/resolve_btfids/ BPFOBJ=$(BPFOBJ) + CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) \ + OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ) # Get Clang's default includes on this system, as opposed to those seen by # '-target bpf'. This fixes "missing" files on some architectures/distros, @@ -450,7 +474,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(call msg,BINARY,,$@) $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) -EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \ +EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature \ $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko) -- GitLab From 5837cedef6f33e01d1c4ad90ee4e966bcc6f9c30 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 13 Jan 2021 17:33:17 +0100 Subject: [PATCH 0679/2012] selftests/bpf: Fix out-of-tree build When building out-of-tree, the .skel.h files are generated into the $(OUTPUT) directory, rather than $(CURDIR). Add $(OUTPUT) to the include paths. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210113163319.1516382-3-jean-philippe@linaro.org --- tools/testing/selftests/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 95ce81513648d..92888eed89f30 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -25,7 +25,7 @@ BPF_GCC ?= $(shell command -v bpf-gcc;) SAN_CFLAGS ?= CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) $(SAN_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ - -I$(TOOLSINCDIR) -I$(APIDIR) \ + -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) \ -Dbpf_prog_load=bpf_prog_test_load \ -Dbpf_load_program=bpf_test_load_program LDLIBS += -lcap -lelf -lz -lrt -lpthread -- GitLab From d6ac8cad50f0c0fed5cfeb61301bb19e8e88985c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 13 Jan 2021 17:33:18 +0100 Subject: [PATCH 0680/2012] selftests/bpf: Move generated test files to $(TEST_GEN_FILES) During an out-of-tree build, attempting to install the $(TEST_FILES) into the $(OUTPUT) directory fails, because the objects were already generated into $(OUTPUT): rsync: [sender] link_stat "tools/testing/selftests/bpf/test_lwt_ip_encap.o" failed: No such file or directory (2) rsync: [sender] link_stat "tools/testing/selftests/bpf/test_tc_edt.o" failed: No such file or directory (2) Use $(TEST_GEN_FILES) instead. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210113163319.1516382-4-jean-philippe@linaro.org --- tools/testing/selftests/bpf/Makefile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 92888eed89f30..67cdf858f01fe 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -44,10 +44,9 @@ ifneq ($(BPF_GCC),) TEST_GEN_PROGS += test_progs-bpf_gcc endif -TEST_GEN_FILES = -TEST_FILES = test_lwt_ip_encap.o \ - test_tc_edt.o \ - xsk_prereqs.sh +TEST_GEN_FILES = test_lwt_ip_encap.o \ + test_tc_edt.o +TEST_FILES = xsk_prereqs.sh # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ -- GitLab From ca1e846711a8f49382005eb5feb82973fa88a849 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 13 Jan 2021 17:33:19 +0100 Subject: [PATCH 0681/2012] selftests/bpf: Fix installation of urandom_read For out-of-tree builds, $(TEST_CUSTOM_PROGS) require the $(OUTPUT) prefix, otherwise the kselftest lib doesn't know how to install them: rsync: [sender] link_stat "tools/testing/selftests/bpf/urandom_read" failed: No such file or directory (2) Signed-off-by: Jean-Philippe Brucker Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210113163319.1516382-5-jean-philippe@linaro.org --- tools/testing/selftests/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 67cdf858f01fe..0fafdc022ac36 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -82,7 +82,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ xdpxceiver -TEST_CUSTOM_PROGS = urandom_read +TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read # Emit succinct information message describing current building step # $1 - generic step name (e.g., CC, LINK, etc); -- GitLab From b8d1cbef2ea4415edcdb5a825972d93b63fcf63c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 13 Jan 2021 17:33:20 +0100 Subject: [PATCH 0682/2012] selftests/bpf: Install btf_dump test cases The btf_dump test cannot access the original source files for comparison when running the selftests out of tree, causing several failures: awk: btf_dump_test_case_syntax.c: No such file or directory ... Add those files to $(TEST_FILES) to have "make install" pick them up. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210113163319.1516382-6-jean-philippe@linaro.org --- tools/testing/selftests/bpf/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 0fafdc022ac36..7f8667ad113e5 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -46,7 +46,8 @@ endif TEST_GEN_FILES = test_lwt_ip_encap.o \ test_tc_edt.o -TEST_FILES = xsk_prereqs.sh +TEST_FILES = xsk_prereqs.sh \ + $(wildcard progs/btf_dump_test_case_*.c) # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ -- GitLab From 324cefaf1c723625e93f703d6e6d78e28996b315 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 11 Jan 2021 02:42:21 -0800 Subject: [PATCH 0683/2012] net: core: use eth_type_vlan in __netif_receive_skb_core Replace the check for ETH_P_8021Q and ETH_P_8021AD in __netif_receive_skb_core with eth_type_vlan. Signed-off-by: Menglong Dong Link: https://lore.kernel.org/r/20210111104221.3451-1-dong.menglong@zte.com.cn Signed-off-by: Jakub Kicinski --- net/core/dev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index e4d77c8abe761..267c4a8daa550 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5151,8 +5151,7 @@ another_round: skb_reset_mac_len(skb); } - if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || - skb->protocol == cpu_to_be16(ETH_P_8021AD)) { + if (eth_type_vlan(skb->protocol)) { skb = skb_vlan_untag(skb); if (unlikely(!skb)) goto out; @@ -5236,8 +5235,7 @@ check_vlan_id: * find vlan device. */ skb->pkt_type = PACKET_OTHERHOST; - } else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || - skb->protocol == cpu_to_be16(ETH_P_8021AD)) { + } else if (eth_type_vlan(skb->protocol)) { /* Outer header is 802.1P with vlan 0, inner header is * 802.1Q or 802.1AD and vlan_do_receive() above could * not find vlan dev for vlan id 0. -- GitLab From ce5a518e9de53446f10d46fac98640f7ac026100 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 13 Jan 2021 14:36:08 -0800 Subject: [PATCH 0684/2012] bpf, libbpf: Avoid unused function warning on bpf_tail_call_static Add inline to __always_inline making it match the linux/compiler.h. Adding this avoids an unused function warning on bpf_tail_call_static when compining with -Wall. Signed-off-by: Ian Rogers Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210113223609.3358812-1-irogers@google.com --- tools/lib/bpf/bpf_helpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 72b251110c4d7..ae6c975e0b87b 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -30,7 +30,7 @@ #define SEC(NAME) __attribute__((section(NAME), used)) #ifndef __always_inline -#define __always_inline __attribute__((always_inline)) +#define __always_inline inline __attribute__((always_inline)) #endif #ifndef __noinline #define __noinline __attribute__((noinline)) -- GitLab From bade5c554f1ac70a50cefe96517957629dbc0d8f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 13 Jan 2021 14:36:09 -0800 Subject: [PATCH 0685/2012] tools/bpftool: Add -Wall when building BPF programs No additional warnings are generated by enabling this, but having it enabled will help avoid regressions. Signed-off-by: Ian Rogers Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210113223609.3358812-2-irogers@google.com --- tools/bpf/bpftool/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index f897cb5fb12d0..45ac2f9e0aa91 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -166,7 +166,7 @@ $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF) -I$(srctree)/tools/include/uapi/ \ -I$(LIBBPF_PATH) \ -I$(srctree)/tools/lib \ - -g -O2 -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@ + -g -O2 -Wall -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@ $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@ -- GitLab From b7cf966126eb16c96bcd13e5ba63657b7e2baef2 Mon Sep 17 00:00:00 2001 From: Naveen Mamindlapalli Date: Mon, 11 Jan 2021 16:55:37 +0530 Subject: [PATCH 0686/2012] octeontx2-pf: Add flow classification using IP next level protocol This patch adds support to install flow rules using ipv4 proto or ipv6 next header field to distinguish between tcp/udp/sctp/esp/ah flows when user doesn't specify the other match creteria related to the flow such as tcp/udp/sctp source port and destination port, ah/esp spi value. This is achieved by matching the layer type extracted by NPC HW into the search key. Modified the driver to use Ethertype as match criteria when user doesn't specify source IP address and dest IP address. The esp/ah flow rule matching using security parameter index (spi) is not supported as of now since the field is not extracted into MCAM search key. Modified npc_check_field function to return bool. Signed-off-by: Naveen Mamindlapalli Signed-off-by: Sunil Goutham Link: https://lore.kernel.org/r/20210111112537.3277-1-naveenm@marvell.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/npc.h | 5 + .../marvell/octeontx2/af/rvu_debugfs.c | 1 + .../marvell/octeontx2/af/rvu_npc_fs.c | 52 ++++++-- .../marvell/octeontx2/nic/otx2_flows.c | 116 ++++++++++++++++-- 4 files changed, 153 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h index a1f79445db713..3c640f6aba922 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h @@ -162,6 +162,11 @@ enum key_fields { NPC_DIP_IPV4, NPC_SIP_IPV6, NPC_DIP_IPV6, + NPC_IPPROTO_TCP, + NPC_IPPROTO_UDP, + NPC_IPPROTO_SCTP, + NPC_IPPROTO_AH, + NPC_IPPROTO_ESP, NPC_SPORT_TCP, NPC_DPORT_TCP, NPC_SPORT_UDP, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index d27543c1a166a..0c6882e84d425 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -1757,6 +1757,7 @@ static void rvu_dbg_npc_mcam_show_flows(struct seq_file *s, seq_printf(s, "mask 0x%x\n", ntohs(rule->mask.dport)); break; default: + seq_puts(s, "\n"); break; } } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 14832b66d1fec..7572321cea79e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -26,6 +26,11 @@ static const char * const npc_flow_names[] = { [NPC_DIP_IPV4] = "ipv4 destination ip", [NPC_SIP_IPV6] = "ipv6 source ip", [NPC_DIP_IPV6] = "ipv6 destination ip", + [NPC_IPPROTO_TCP] = "ip proto tcp", + [NPC_IPPROTO_UDP] = "ip proto udp", + [NPC_IPPROTO_SCTP] = "ip proto sctp", + [NPC_IPPROTO_AH] = "ip proto AH", + [NPC_IPPROTO_ESP] = "ip proto ESP", [NPC_SPORT_TCP] = "tcp source port", [NPC_DPORT_TCP] = "tcp destination port", [NPC_SPORT_UDP] = "udp source port", @@ -212,13 +217,13 @@ static bool npc_check_overlap(struct rvu *rvu, int blkaddr, return false; } -static int npc_check_field(struct rvu *rvu, int blkaddr, enum key_fields type, - u8 intf) +static bool npc_check_field(struct rvu *rvu, int blkaddr, enum key_fields type, + u8 intf) { if (!npc_is_field_present(rvu, type, intf) || npc_check_overlap(rvu, blkaddr, type, 0, intf)) - return -EOPNOTSUPP; - return 0; + return false; + return true; } static void npc_scan_parse_result(struct npc_mcam *mcam, u8 bit_number, @@ -448,14 +453,13 @@ static void npc_set_features(struct rvu *rvu, int blkaddr, u8 intf) struct npc_mcam *mcam = &rvu->hw->mcam; u64 *features = &mcam->rx_features; u64 tcp_udp_sctp; - int err, hdr; + int hdr; if (is_npc_intf_tx(intf)) features = &mcam->tx_features; for (hdr = NPC_DMAC; hdr < NPC_HEADER_FIELDS_MAX; hdr++) { - err = npc_check_field(rvu, blkaddr, hdr, intf); - if (!err) + if (npc_check_field(rvu, blkaddr, hdr, intf)) *features |= BIT_ULL(hdr); } @@ -464,13 +468,26 @@ static void npc_set_features(struct rvu *rvu, int blkaddr, u8 intf) BIT_ULL(NPC_SPORT_SCTP) | BIT_ULL(NPC_DPORT_SCTP); /* for tcp/udp/sctp corresponding layer type should be in the key */ - if (*features & tcp_udp_sctp) - if (npc_check_field(rvu, blkaddr, NPC_LD, intf)) + if (*features & tcp_udp_sctp) { + if (!npc_check_field(rvu, blkaddr, NPC_LD, intf)) *features &= ~tcp_udp_sctp; + else + *features |= BIT_ULL(NPC_IPPROTO_TCP) | + BIT_ULL(NPC_IPPROTO_UDP) | + BIT_ULL(NPC_IPPROTO_SCTP); + } + + /* for AH, check if corresponding layer type is present in the key */ + if (npc_check_field(rvu, blkaddr, NPC_LD, intf)) + *features |= BIT_ULL(NPC_IPPROTO_AH); + + /* for ESP, check if corresponding layer type is present in the key */ + if (npc_check_field(rvu, blkaddr, NPC_LE, intf)) + *features |= BIT_ULL(NPC_IPPROTO_ESP); /* for vlan corresponding layer type should be in the key */ if (*features & BIT_ULL(NPC_OUTER_VID)) - if (npc_check_field(rvu, blkaddr, NPC_LB, intf)) + if (!npc_check_field(rvu, blkaddr, NPC_LB, intf)) *features &= ~BIT_ULL(NPC_OUTER_VID); } @@ -743,13 +760,13 @@ static void npc_update_flow(struct rvu *rvu, struct mcam_entry *entry, return; /* For tcp/udp/sctp LTYPE should be present in entry */ - if (features & (BIT_ULL(NPC_SPORT_TCP) | BIT_ULL(NPC_DPORT_TCP))) + if (features & BIT_ULL(NPC_IPPROTO_TCP)) npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_TCP, 0, ~0ULL, 0, intf); - if (features & (BIT_ULL(NPC_SPORT_UDP) | BIT_ULL(NPC_DPORT_UDP))) + if (features & BIT_ULL(NPC_IPPROTO_UDP)) npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_UDP, 0, ~0ULL, 0, intf); - if (features & (BIT_ULL(NPC_SPORT_SCTP) | BIT_ULL(NPC_DPORT_SCTP))) + if (features & BIT_ULL(NPC_IPPROTO_SCTP)) npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_SCTP, 0, ~0ULL, 0, intf); @@ -758,6 +775,15 @@ static void npc_update_flow(struct rvu *rvu, struct mcam_entry *entry, NPC_LT_LB_STAG_QINQ | NPC_LT_LB_CTAG, 0, NPC_LT_LB_STAG_QINQ & NPC_LT_LB_CTAG, 0, intf); + /* For AH, LTYPE should be present in entry */ + if (features & BIT_ULL(NPC_IPPROTO_AH)) + npc_update_entry(rvu, NPC_LD, entry, NPC_LT_LD_AH, + 0, ~0ULL, 0, intf); + /* For ESP, LTYPE should be present in entry */ + if (features & BIT_ULL(NPC_IPPROTO_ESP)) + npc_update_entry(rvu, NPC_LE, entry, NPC_LT_LE_ESP, + 0, ~0ULL, 0, intf); + #define NPC_WRITE_FLOW(field, member, val_lo, val_hi, mask_lo, mask_hi) \ do { \ if (features & BIT_ULL((field))) { \ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 6dd442d88d0e5..d6b5bf247e31b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -272,14 +272,16 @@ int otx2_get_all_flows(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc, return err; } -static void otx2_prepare_ipv4_flow(struct ethtool_rx_flow_spec *fsp, - struct npc_install_flow_req *req, - u32 flow_type) +static int otx2_prepare_ipv4_flow(struct ethtool_rx_flow_spec *fsp, + struct npc_install_flow_req *req, + u32 flow_type) { struct ethtool_usrip4_spec *ipv4_usr_mask = &fsp->m_u.usr_ip4_spec; struct ethtool_usrip4_spec *ipv4_usr_hdr = &fsp->h_u.usr_ip4_spec; struct ethtool_tcpip4_spec *ipv4_l4_mask = &fsp->m_u.tcp_ip4_spec; struct ethtool_tcpip4_spec *ipv4_l4_hdr = &fsp->h_u.tcp_ip4_spec; + struct ethtool_ah_espip4_spec *ah_esp_hdr = &fsp->h_u.ah_ip4_spec; + struct ethtool_ah_espip4_spec *ah_esp_mask = &fsp->m_u.ah_ip4_spec; struct flow_msg *pmask = &req->mask; struct flow_msg *pkt = &req->packet; @@ -299,10 +301,16 @@ static void otx2_prepare_ipv4_flow(struct ethtool_rx_flow_spec *fsp, sizeof(pmask->ip4dst)); req->features |= BIT_ULL(NPC_DIP_IPV4); } + pkt->etype = cpu_to_be16(ETH_P_IP); + pmask->etype = cpu_to_be16(0xFFFF); + req->features |= BIT_ULL(NPC_ETYPE); break; case TCP_V4_FLOW: case UDP_V4_FLOW: case SCTP_V4_FLOW: + pkt->etype = cpu_to_be16(ETH_P_IP); + pmask->etype = cpu_to_be16(0xFFFF); + req->features |= BIT_ULL(NPC_ETYPE); if (ipv4_l4_mask->ip4src) { memcpy(&pkt->ip4src, &ipv4_l4_hdr->ip4src, sizeof(pkt->ip4src)); @@ -341,20 +349,60 @@ static void otx2_prepare_ipv4_flow(struct ethtool_rx_flow_spec *fsp, else req->features |= BIT_ULL(NPC_DPORT_SCTP); } + if (flow_type == UDP_V4_FLOW) + req->features |= BIT_ULL(NPC_IPPROTO_UDP); + else if (flow_type == TCP_V4_FLOW) + req->features |= BIT_ULL(NPC_IPPROTO_TCP); + else + req->features |= BIT_ULL(NPC_IPPROTO_SCTP); + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + pkt->etype = cpu_to_be16(ETH_P_IP); + pmask->etype = cpu_to_be16(0xFFFF); + req->features |= BIT_ULL(NPC_ETYPE); + if (ah_esp_mask->ip4src) { + memcpy(&pkt->ip4src, &ah_esp_hdr->ip4src, + sizeof(pkt->ip4src)); + memcpy(&pmask->ip4src, &ah_esp_mask->ip4src, + sizeof(pmask->ip4src)); + req->features |= BIT_ULL(NPC_SIP_IPV4); + } + if (ah_esp_mask->ip4dst) { + memcpy(&pkt->ip4dst, &ah_esp_hdr->ip4dst, + sizeof(pkt->ip4dst)); + memcpy(&pmask->ip4dst, &ah_esp_mask->ip4dst, + sizeof(pmask->ip4dst)); + req->features |= BIT_ULL(NPC_DIP_IPV4); + } + + /* NPC profile doesn't extract AH/ESP header fields */ + if ((ah_esp_mask->spi & ah_esp_hdr->spi) || + (ah_esp_mask->tos & ah_esp_mask->tos)) + return -EOPNOTSUPP; + + if (flow_type == AH_V4_FLOW) + req->features |= BIT_ULL(NPC_IPPROTO_AH); + else + req->features |= BIT_ULL(NPC_IPPROTO_ESP); break; default: break; } + + return 0; } -static void otx2_prepare_ipv6_flow(struct ethtool_rx_flow_spec *fsp, - struct npc_install_flow_req *req, - u32 flow_type) +static int otx2_prepare_ipv6_flow(struct ethtool_rx_flow_spec *fsp, + struct npc_install_flow_req *req, + u32 flow_type) { struct ethtool_usrip6_spec *ipv6_usr_mask = &fsp->m_u.usr_ip6_spec; struct ethtool_usrip6_spec *ipv6_usr_hdr = &fsp->h_u.usr_ip6_spec; struct ethtool_tcpip6_spec *ipv6_l4_mask = &fsp->m_u.tcp_ip6_spec; struct ethtool_tcpip6_spec *ipv6_l4_hdr = &fsp->h_u.tcp_ip6_spec; + struct ethtool_ah_espip6_spec *ah_esp_hdr = &fsp->h_u.ah_ip6_spec; + struct ethtool_ah_espip6_spec *ah_esp_mask = &fsp->m_u.ah_ip6_spec; struct flow_msg *pmask = &req->mask; struct flow_msg *pkt = &req->packet; @@ -374,10 +422,16 @@ static void otx2_prepare_ipv6_flow(struct ethtool_rx_flow_spec *fsp, sizeof(pmask->ip6dst)); req->features |= BIT_ULL(NPC_DIP_IPV6); } + pkt->etype = cpu_to_be16(ETH_P_IPV6); + pmask->etype = cpu_to_be16(0xFFFF); + req->features |= BIT_ULL(NPC_ETYPE); break; case TCP_V6_FLOW: case UDP_V6_FLOW: case SCTP_V6_FLOW: + pkt->etype = cpu_to_be16(ETH_P_IPV6); + pmask->etype = cpu_to_be16(0xFFFF); + req->features |= BIT_ULL(NPC_ETYPE); if (!ipv6_addr_any((struct in6_addr *)ipv6_l4_mask->ip6src)) { memcpy(&pkt->ip6src, &ipv6_l4_hdr->ip6src, sizeof(pkt->ip6src)); @@ -416,10 +470,47 @@ static void otx2_prepare_ipv6_flow(struct ethtool_rx_flow_spec *fsp, else req->features |= BIT_ULL(NPC_DPORT_SCTP); } + if (flow_type == UDP_V6_FLOW) + req->features |= BIT_ULL(NPC_IPPROTO_UDP); + else if (flow_type == TCP_V6_FLOW) + req->features |= BIT_ULL(NPC_IPPROTO_TCP); + else + req->features |= BIT_ULL(NPC_IPPROTO_SCTP); break; + case AH_V6_FLOW: + case ESP_V6_FLOW: + pkt->etype = cpu_to_be16(ETH_P_IPV6); + pmask->etype = cpu_to_be16(0xFFFF); + req->features |= BIT_ULL(NPC_ETYPE); + if (!ipv6_addr_any((struct in6_addr *)ah_esp_hdr->ip6src)) { + memcpy(&pkt->ip6src, &ah_esp_hdr->ip6src, + sizeof(pkt->ip6src)); + memcpy(&pmask->ip6src, &ah_esp_mask->ip6src, + sizeof(pmask->ip6src)); + req->features |= BIT_ULL(NPC_SIP_IPV6); + } + if (!ipv6_addr_any((struct in6_addr *)ah_esp_hdr->ip6dst)) { + memcpy(&pkt->ip6dst, &ah_esp_hdr->ip6dst, + sizeof(pkt->ip6dst)); + memcpy(&pmask->ip6dst, &ah_esp_mask->ip6dst, + sizeof(pmask->ip6dst)); + req->features |= BIT_ULL(NPC_DIP_IPV6); + } + + /* NPC profile doesn't extract AH/ESP header fields */ + if ((ah_esp_mask->spi & ah_esp_hdr->spi) || + (ah_esp_mask->tclass & ah_esp_mask->tclass)) + return -EOPNOTSUPP; + + if (flow_type == AH_V6_FLOW) + req->features |= BIT_ULL(NPC_IPPROTO_AH); + else + req->features |= BIT_ULL(NPC_IPPROTO_ESP); default: break; } + + return 0; } int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, @@ -430,6 +521,7 @@ int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, struct flow_msg *pmask = &req->mask; struct flow_msg *pkt = &req->packet; u32 flow_type; + int ret; flow_type = fsp->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS); switch (flow_type) { @@ -457,13 +549,21 @@ int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, case TCP_V4_FLOW: case UDP_V4_FLOW: case SCTP_V4_FLOW: - otx2_prepare_ipv4_flow(fsp, req, flow_type); + case AH_V4_FLOW: + case ESP_V4_FLOW: + ret = otx2_prepare_ipv4_flow(fsp, req, flow_type); + if (ret) + return ret; break; case IPV6_USER_FLOW: case TCP_V6_FLOW: case UDP_V6_FLOW: case SCTP_V6_FLOW: - otx2_prepare_ipv6_flow(fsp, req, flow_type); + case AH_V6_FLOW: + case ESP_V6_FLOW: + ret = otx2_prepare_ipv6_flow(fsp, req, flow_type); + if (ret) + return ret; break; default: return -EOPNOTSUPP; -- GitLab From 70b32d8276feef23b9b2c50c1128a7d6252e2b47 Mon Sep 17 00:00:00 2001 From: Ionut-robert Aron Date: Mon, 11 Jan 2021 19:07:25 +0200 Subject: [PATCH 0687/2012] dpaa2-eth: add support for Rx VLAN filtering Declare Rx VLAN filtering as supported and user-changeable only when there are VLAN filtering entries available on the DPNI object. Even then, rx-vlan-filtering is by default disabled. Also, populate the .ndo_vlan_rx_add_vid() and .ndo_vlan_rx_kill_vid() callbacks for adding and removing a specific VLAN from the VLAN table. Signed-off-by: Ionut-robert Aron Signed-off-by: Ioana Ciornei Link: https://lore.kernel.org/r/20210111170725.1818218-1-ciorneiioana@gmail.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/freescale/dpaa2/dpaa2-eth.c | 65 +++++++++++++ .../net/ethernet/freescale/dpaa2/dpni-cmd.h | 17 ++++ drivers/net/ethernet/freescale/dpaa2/dpni.c | 93 +++++++++++++++++++ drivers/net/ethernet/freescale/dpaa2/dpni.h | 9 ++ 4 files changed, 184 insertions(+) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index a8c98869e484f..dd00c5de21151 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -1262,6 +1262,22 @@ static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv, percpu_stats->tx_errors++; } +static int dpaa2_eth_set_rx_vlan_filtering(struct dpaa2_eth_priv *priv, + bool enable) +{ + int err; + + err = dpni_enable_vlan_filter(priv->mc_io, 0, priv->mc_token, enable); + + if (err) { + netdev_err(priv->net_dev, + "dpni_enable_vlan_filter failed\n"); + return err; + } + + return 0; +} + static int dpaa2_eth_set_rx_csum(struct dpaa2_eth_priv *priv, bool enable) { int err; @@ -1952,6 +1968,43 @@ static void dpaa2_eth_add_mc_hw_addr(const struct net_device *net_dev, } } +static int dpaa2_eth_rx_add_vid(struct net_device *net_dev, + __be16 vlan_proto, u16 vid) +{ + struct dpaa2_eth_priv *priv = netdev_priv(net_dev); + int err; + + err = dpni_add_vlan_id(priv->mc_io, 0, priv->mc_token, + vid, 0, 0, 0); + + if (err) { + netdev_warn(priv->net_dev, + "Could not add the vlan id %u\n", + vid); + return err; + } + + return 0; +} + +static int dpaa2_eth_rx_kill_vid(struct net_device *net_dev, + __be16 vlan_proto, u16 vid) +{ + struct dpaa2_eth_priv *priv = netdev_priv(net_dev); + int err; + + err = dpni_remove_vlan_id(priv->mc_io, 0, priv->mc_token, vid); + + if (err) { + netdev_warn(priv->net_dev, + "Could not remove the vlan id %u\n", + vid); + return err; + } + + return 0; +} + static void dpaa2_eth_set_rx_mode(struct net_device *net_dev) { struct dpaa2_eth_priv *priv = netdev_priv(net_dev); @@ -2058,6 +2111,13 @@ static int dpaa2_eth_set_features(struct net_device *net_dev, bool enable; int err; + if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) { + enable = !!(features & NETIF_F_HW_VLAN_CTAG_FILTER); + err = dpaa2_eth_set_rx_vlan_filtering(priv, enable); + if (err) + return err; + } + if (changed & NETIF_F_RXCSUM) { enable = !!(features & NETIF_F_RXCSUM); err = dpaa2_eth_set_rx_csum(priv, enable); @@ -2507,6 +2567,8 @@ static const struct net_device_ops dpaa2_eth_ops = { .ndo_bpf = dpaa2_eth_xdp, .ndo_xdp_xmit = dpaa2_eth_xdp_xmit, .ndo_setup_tc = dpaa2_eth_setup_tc, + .ndo_vlan_rx_add_vid = dpaa2_eth_rx_add_vid, + .ndo_vlan_rx_kill_vid = dpaa2_eth_rx_kill_vid }; static void dpaa2_eth_cdan_cb(struct dpaa2_io_notification_ctx *ctx) @@ -4015,6 +4077,9 @@ static int dpaa2_eth_netdev_init(struct net_device *net_dev) NETIF_F_LLTX | NETIF_F_HW_TC; net_dev->hw_features = net_dev->features; + if (priv->dpni_attrs.vlan_filter_entries) + net_dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + return 0; } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h index 90453dc7baefe..9f80bdfeedece 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h @@ -62,6 +62,10 @@ #define DPNI_CMDID_SET_RX_TC_DIST DPNI_CMD(0x235) +#define DPNI_CMDID_ENABLE_VLAN_FILTER DPNI_CMD(0x230) +#define DPNI_CMDID_ADD_VLAN_ID DPNI_CMD_V2(0x231) +#define DPNI_CMDID_REMOVE_VLAN_ID DPNI_CMD(0x232) + #define DPNI_CMDID_SET_QOS_TBL DPNI_CMD(0x240) #define DPNI_CMDID_ADD_QOS_ENT DPNI_CMD(0x241) #define DPNI_CMDID_REMOVE_QOS_ENT DPNI_CMD(0x242) @@ -662,4 +666,17 @@ struct dpni_rsp_single_step_cfg { __le32 peer_delay; }; +struct dpni_cmd_enable_vlan_filter { + /* only the LSB */ + u8 en; +}; + +struct dpni_cmd_vlan_id { + u8 flags; + u8 tc_id; + u8 flow_id; + u8 pad; + __le16 vlan_id; +}; + #endif /* _FSL_DPNI_CMD_H */ diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c index 6ea7db66a6322..aa429c17c3438 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c @@ -1224,6 +1224,99 @@ int dpni_get_port_mac_addr(struct fsl_mc_io *mc_io, return 0; } +/** + * dpni_enable_vlan_filter() - Enable/disable VLAN filtering mode + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPNI object + * @en: Set to '1' to enable; '0' to disable + * + * Return: '0' on Success; Error code otherwise. + */ +int dpni_enable_vlan_filter(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + u32 en) +{ + struct dpni_cmd_enable_vlan_filter *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPNI_CMDID_ENABLE_VLAN_FILTER, + cmd_flags, + token); + cmd_params = (struct dpni_cmd_enable_vlan_filter *)cmd.params; + dpni_set_field(cmd_params->en, ENABLE, en); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpni_add_vlan_id() - Add VLAN ID filter + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPNI object + * @vlan_id: VLAN ID to add + * @flags: 0 - tc_id and flow_id will be ignored. + * Pkt with this vlan_id will be passed to the next + * classification stages + * DPNI_VLAN_SET_QUEUE_ACTION + * Pkt with this vlan_id will be forward directly to + * queue defined by the tc_id and flow_id + * + * @tc_id: Traffic class selection (0-7) + * @flow_id: Selects the specific queue out of the set allocated for the + * same as tc_id. Value must be in range 0 to NUM_QUEUES - 1 + * + * Return: '0' on Success; Error code otherwise. + */ +int dpni_add_vlan_id(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id, u8 flags, u8 tc_id, u8 flow_id) +{ + struct dpni_cmd_vlan_id *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPNI_CMDID_ADD_VLAN_ID, + cmd_flags, + token); + cmd_params = (struct dpni_cmd_vlan_id *)cmd.params; + cmd_params->flags = flags; + cmd_params->tc_id = tc_id; + cmd_params->flow_id = flow_id; + cmd_params->vlan_id = cpu_to_le16(vlan_id); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + +/** + * dpni_remove_vlan_id() - Remove VLAN ID filter + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPNI object + * @vlan_id: VLAN ID to remove + * + * Return: '0' on Success; Error code otherwise. + */ +int dpni_remove_vlan_id(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id) +{ + struct dpni_cmd_vlan_id *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + /* prepare command */ + cmd.header = mc_encode_cmd_header(DPNI_CMDID_REMOVE_VLAN_ID, + cmd_flags, + token); + cmd_params = (struct dpni_cmd_vlan_id *)cmd.params; + cmd_params->vlan_id = cpu_to_le16(vlan_id); + + /* send command to mc*/ + return mc_send_command(mc_io, &cmd); +} + /** * dpni_add_mac_addr() - Add MAC address filter * @mc_io: Pointer to MC portal's I/O object diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h index e7b9e195b534b..4e96d9362dd29 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h @@ -1114,4 +1114,13 @@ int dpni_get_single_step_cfg(struct fsl_mc_io *mc_io, u16 token, struct dpni_single_step_cfg *ptp_cfg); +int dpni_enable_vlan_filter(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u32 en); + +int dpni_add_vlan_id(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id, u8 flags, u8 tc_id, u8 flow_id); + +int dpni_remove_vlan_id(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token, + u16 vlan_id); + #endif /* __FSL_DPNI_H */ -- GitLab From 848c1903d35e2eb4fdf8b1a0a6721876e8c88e5d Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 11 Jan 2021 19:18:02 +0200 Subject: [PATCH 0688/2012] dpaa2-mac: fix the remove path for non-MAC interfaces Check if the interface is indeed connected to a MAC before trying to close the DPMAC object representing it. Without this check we end up working with a NULL pointer. Fixes: d87e606373f6 ("dpaa2-mac: export MAC counters even when in TYPE_FIXED") Signed-off-by: Ioana Ciornei Link: https://lore.kernel.org/r/20210111171802.1826324-1-ciorneiioana@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index dd00c5de21151..7cb286e533696 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -4150,6 +4150,9 @@ static void dpaa2_eth_disconnect_mac(struct dpaa2_eth_priv *priv) if (dpaa2_eth_is_type_phy(priv)) dpaa2_mac_disconnect(priv->mac); + if (!dpaa2_eth_has_mac(priv)) + return; + dpaa2_mac_close(priv->mac); kfree(priv->mac); priv->mac = NULL; -- GitLab From 7ac6ad051150592557520b45773201b987ecfce3 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 12 Jan 2021 15:42:54 -0800 Subject: [PATCH 0689/2012] bpf: Reject too big ctx_size_in for raw_tp test run syzbot reported a WARNING for allocating too big memory: WARNING: CPU: 1 PID: 8484 at mm/page_alloc.c:4976 __alloc_pages_nodemask+0x5f8/0x730 mm/page_alloc.c:5011 Modules linked in: CPU: 1 PID: 8484 Comm: syz-executor862 Not tainted 5.11.0-rc2-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__alloc_pages_nodemask+0x5f8/0x730 mm/page_alloc.c:4976 Code: 00 00 0c 00 0f 85 a7 00 00 00 8b 3c 24 4c 89 f2 44 89 e6 c6 44 24 70 00 48 89 6c 24 58 e8 d0 d7 ff ff 49 89 c5 e9 ea fc ff ff <0f> 0b e9 b5 fd ff ff 89 74 24 14 4c 89 4c 24 08 4c 89 74 24 18 e8 RSP: 0018:ffffc900012efb10 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 1ffff9200025df66 RCX: 0000000000000000 RDX: 0000000000000000 RSI: dffffc0000000000 RDI: 0000000000140dc0 RBP: 0000000000140dc0 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff81b1f7e1 R11: 0000000000000000 R12: 0000000000000014 R13: 0000000000000014 R14: 0000000000000000 R15: 0000000000000000 FS: 000000000190c880(0000) GS:ffff8880b9e00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f08b7f316c0 CR3: 0000000012073000 CR4: 00000000001506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: alloc_pages_current+0x18c/0x2a0 mm/mempolicy.c:2267 alloc_pages include/linux/gfp.h:547 [inline] kmalloc_order+0x2e/0xb0 mm/slab_common.c:837 kmalloc_order_trace+0x14/0x120 mm/slab_common.c:853 kmalloc include/linux/slab.h:557 [inline] kzalloc include/linux/slab.h:682 [inline] bpf_prog_test_run_raw_tp+0x4b5/0x670 net/bpf/test_run.c:282 bpf_prog_test_run kernel/bpf/syscall.c:3120 [inline] __do_sys_bpf+0x1ea9/0x4f10 kernel/bpf/syscall.c:4398 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x440499 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffe1f3bfb18 EFLAGS: 00000246 ORIG_RAX: 0000000000000141 RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440499 RDX: 0000000000000048 RSI: 0000000020000600 RDI: 000000000000000a RBP: 00000000006ca018 R08: 0000000000000000 R09: 00000000004002c8 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000401ca0 R13: 0000000000401d30 R14: 0000000000000000 R15: 0000000000000000 This is because we didn't filter out too big ctx_size_in. Fix it by rejecting ctx_size_in that are bigger than MAX_BPF_FUNC_ARGS (12) u64 numbers. Fixes: 1b4d60ec162f ("bpf: Enable BPF_PROG_TEST_RUN for raw_tracepoint") Reported-by: syzbot+4f98876664c7337a4ae6@syzkaller.appspotmail.com Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210112234254.1906829-1-songliubraving@fb.com --- net/bpf/test_run.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index c1c30a9f76f34..8b796c499cbb2 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -272,7 +272,8 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, kattr->test.repeat) return -EINVAL; - if (ctx_size_in < prog->aux->max_ctx_offset) + if (ctx_size_in < prog->aux->max_ctx_offset || + ctx_size_in > MAX_BPF_FUNC_ARGS * sizeof(u64)) return -EINVAL; if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0) -- GitLab From 7128c834d30e6b2cf649f14d8fc274941786d0e1 Mon Sep 17 00:00:00 2001 From: Cristian Dumitrescu Date: Mon, 11 Jan 2021 18:11:38 +0000 Subject: [PATCH 0690/2012] i40e: fix potential NULL pointer dereferencing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the function i40e_construct_skb_zc only frees the input xdp buffer when the output skb is successfully built. On error, the function i40e_clean_rx_irq_zc does not commit anything for the current packet descriptor and simply exits the packet descriptor processing loop, with the plan to restart the processing of this descriptor on the next invocation. Therefore, on error the ring next-to-clean pointer should not advance, the xdp i.e. *bi buffer should not be freed and the current buffer info should not be invalidated by setting *bi to NULL. Therefore, the *bi should only be set to NULL when the function i40e_construct_skb_zc is successful, otherwise a NULL *bi will be dereferenced when the work for the current descriptor is eventually restarted. Fixes: 3b4f0b66c2b3 ("i40e, xsk: Migrate to new MEM_TYPE_XSK_BUFF_POOL") Signed-off-by: Cristian Dumitrescu Acked-by: Björn Töpel Link: https://lore.kernel.org/r/20210111181138.49757-1-cristian.dumitrescu@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 47eb9c584a123..492ce213208d2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -348,12 +348,12 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) * SBP is *not* set in PRT_SBPVSI (default not set). */ skb = i40e_construct_skb_zc(rx_ring, *bi); - *bi = NULL; if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; break; } + *bi = NULL; cleaned_count++; i40e_inc_ntc(rx_ring); -- GitLab From 0ae5b43d6dde6003070106e97cd0d41bace2eeb2 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 11 Jan 2021 15:05:52 -0800 Subject: [PATCH 0691/2012] tcp: assign skb hash after tcp_event_data_sent Move skb_set_hash_from_sk s.t. it's called after instead of before tcp_event_data_sent is called. This enables congestion control modules to change the socket hash right before restarting from idle (via the TX_START congestion event). Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Link: https://lore.kernel.org/r/20210111230552.2704579-1-ycheng@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f322e798a3519..899d053cb10ed 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1319,7 +1319,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, skb_orphan(skb); skb->sk = sk; skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree; - skb_set_hash_from_sk(skb, sk); refcount_add(skb->truesize, &sk->sk_wmem_alloc); skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm); @@ -1390,6 +1389,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tcp_skb_pcount(skb)); tp->segs_out += tcp_skb_pcount(skb); + skb_set_hash_from_sk(skb, sk); /* OK, its time to fill skb_shinfo(skb)->gso_{segs|size} */ skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb); skb_shinfo(skb)->gso_size = tcp_skb_mss(skb); -- GitLab From 8ad2a970d2010add3963e7219eb50367ab3fa4eb Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Tue, 12 Jan 2021 11:06:00 +0530 Subject: [PATCH 0692/2012] cxgb4/chtls: Fix tid stuck due to wrong update of qid TID stuck is seen when there is a race in CPL_PASS_ACCEPT_RPL/CPL_ABORT_REQ and abort is arriving before the accept reply, which sets the queue number. In this case HW ends up sending CPL_ABORT_RPL_RSS to an incorrect ingress queue. V1->V2: - Removed the unused variable len in chtls_set_quiesce_ctrl(). V2->V3: - As kfree_skb() has a check for null skb, so removed this check before calling kfree_skb() in func chtls_send_reset(). Fixes: cc35c88ae4db ("crypto : chtls - CPL handler definition") Signed-off-by: Rohit Maheshwari Signed-off-by: Ayush Sawal Link: https://lore.kernel.org/r/20210112053600.24590-1-ayush.sawal@chelsio.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h | 7 ++++ .../chelsio/inline_crypto/chtls/chtls.h | 4 ++ .../chelsio/inline_crypto/chtls/chtls_cm.c | 32 ++++++++++++++- .../chelsio/inline_crypto/chtls/chtls_hw.c | 41 +++++++++++++++++++ 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h index 92473dda55d9f..22a0220123ade 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h @@ -40,6 +40,13 @@ #define TCB_L2T_IX_M 0xfffULL #define TCB_L2T_IX_V(x) ((x) << TCB_L2T_IX_S) +#define TCB_T_FLAGS_W 1 +#define TCB_T_FLAGS_S 0 +#define TCB_T_FLAGS_M 0xffffffffffffffffULL +#define TCB_T_FLAGS_V(x) ((__u64)(x) << TCB_T_FLAGS_S) + +#define TCB_FIELD_COOKIE_TFLAG 1 + #define TCB_SMAC_SEL_W 0 #define TCB_SMAC_SEL_S 24 #define TCB_SMAC_SEL_M 0xffULL diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h index 72bb123d53db7..9e23780136421 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h @@ -575,7 +575,11 @@ int send_tx_flowc_wr(struct sock *sk, int compl, void chtls_tcp_push(struct sock *sk, int flags); int chtls_push_frames(struct chtls_sock *csk, int comp); int chtls_set_tcb_tflag(struct sock *sk, unsigned int bit_pos, int val); +void chtls_set_tcb_field_rpl_skb(struct sock *sk, u16 word, + u64 mask, u64 val, u8 cookie, + int through_l2t); int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 mode, int cipher_type); +void chtls_set_quiesce_ctrl(struct sock *sk, int val); void skb_entail(struct sock *sk, struct sk_buff *skb, int flags); unsigned int keyid_to_addr(int start_addr, int keyid); void free_tls_keyid(struct sock *sk); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 51dd030b3b366..e5cfbe196ba66 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -32,6 +32,7 @@ #include "chtls.h" #include "chtls_cm.h" #include "clip_tbl.h" +#include "t4_tcb.h" /* * State transitions and actions for close. Note that if we are in SYN_SENT @@ -267,7 +268,9 @@ static void chtls_send_reset(struct sock *sk, int mode, struct sk_buff *skb) if (sk->sk_state != TCP_SYN_RECV) chtls_send_abort(sk, mode, skb); else - goto out; + chtls_set_tcb_field_rpl_skb(sk, TCB_T_FLAGS_W, + TCB_T_FLAGS_V(TCB_T_FLAGS_M), 0, + TCB_FIELD_COOKIE_TFLAG, 1); return; out: @@ -1949,6 +1952,8 @@ static void chtls_close_con_rpl(struct sock *sk, struct sk_buff *skb) else if (tcp_sk(sk)->linger2 < 0 && !csk_flag_nochk(csk, CSK_ABORT_SHUTDOWN)) chtls_abort_conn(sk, skb); + else if (csk_flag_nochk(csk, CSK_TX_DATA_SENT)) + chtls_set_quiesce_ctrl(sk, 0); break; default: pr_info("close_con_rpl in bad state %d\n", sk->sk_state); @@ -2292,6 +2297,28 @@ static int chtls_wr_ack(struct chtls_dev *cdev, struct sk_buff *skb) return 0; } +static int chtls_set_tcb_rpl(struct chtls_dev *cdev, struct sk_buff *skb) +{ + struct cpl_set_tcb_rpl *rpl = cplhdr(skb) + RSS_HDR; + unsigned int hwtid = GET_TID(rpl); + struct sock *sk; + + sk = lookup_tid(cdev->tids, hwtid); + + /* return EINVAL if socket doesn't exist */ + if (!sk) + return -EINVAL; + + /* Reusing the skb as size of cpl_set_tcb_field structure + * is greater than cpl_abort_req + */ + if (TCB_COOKIE_G(rpl->cookie) == TCB_FIELD_COOKIE_TFLAG) + chtls_send_abort(sk, CPL_ABORT_SEND_RST, NULL); + + kfree_skb(skb); + return 0; +} + chtls_handler_func chtls_handlers[NUM_CPL_CMDS] = { [CPL_PASS_OPEN_RPL] = chtls_pass_open_rpl, [CPL_CLOSE_LISTSRV_RPL] = chtls_close_listsrv_rpl, @@ -2304,5 +2331,6 @@ chtls_handler_func chtls_handlers[NUM_CPL_CMDS] = { [CPL_CLOSE_CON_RPL] = chtls_conn_cpl, [CPL_ABORT_REQ_RSS] = chtls_conn_cpl, [CPL_ABORT_RPL_RSS] = chtls_conn_cpl, - [CPL_FW4_ACK] = chtls_wr_ack, + [CPL_FW4_ACK] = chtls_wr_ack, + [CPL_SET_TCB_RPL] = chtls_set_tcb_rpl, }; diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c index a4fb463af22ac..1e67140b0f801 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_hw.c @@ -88,6 +88,24 @@ static int chtls_set_tcb_field(struct sock *sk, u16 word, u64 mask, u64 val) return ret < 0 ? ret : 0; } +void chtls_set_tcb_field_rpl_skb(struct sock *sk, u16 word, + u64 mask, u64 val, u8 cookie, + int through_l2t) +{ + struct sk_buff *skb; + unsigned int wrlen; + + wrlen = sizeof(struct cpl_set_tcb_field) + sizeof(struct ulptx_idata); + wrlen = roundup(wrlen, 16); + + skb = alloc_skb(wrlen, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + return; + + __set_tcb_field(sk, skb, word, mask, val, cookie, 0); + send_or_defer(sk, tcp_sk(sk), skb, through_l2t); +} + /* * Set one of the t_flags bits in the TCB. */ @@ -113,6 +131,29 @@ static int chtls_set_tcb_quiesce(struct sock *sk, int val) TF_RX_QUIESCE_V(val)); } +void chtls_set_quiesce_ctrl(struct sock *sk, int val) +{ + struct chtls_sock *csk; + struct sk_buff *skb; + unsigned int wrlen; + int ret; + + wrlen = sizeof(struct cpl_set_tcb_field) + sizeof(struct ulptx_idata); + wrlen = roundup(wrlen, 16); + + skb = alloc_skb(wrlen, GFP_ATOMIC); + if (!skb) + return; + + csk = rcu_dereference_sk_user_data(sk); + + __set_tcb_field(sk, skb, 1, TF_RX_QUIESCE_V(1), 0, 0, 1); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, csk->port_id); + ret = cxgb4_ofld_send(csk->egress_dev, skb); + if (ret < 0) + kfree_skb(skb); +} + /* TLS Key bitmap processing */ int chtls_init_kmap(struct chtls_dev *cdev, struct cxgb4_lld_info *lldi) { -- GitLab From 744ea4e3885eccb6d332a06fae9eb7420a622c0f Mon Sep 17 00:00:00 2001 From: Gilad Reti Date: Wed, 13 Jan 2021 07:38:07 +0200 Subject: [PATCH 0693/2012] bpf: Support PTR_TO_MEM{,_OR_NULL} register spilling Add support for pointer to mem register spilling, to allow the verifier to track pointers to valid memory addresses. Such pointers are returned for example by a successful call of the bpf_ringbuf_reserve helper. The patch was partially contributed by CyberArk Software, Inc. Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Suggested-by: Yonghong Song Signed-off-by: Gilad Reti Signed-off-by: Alexei Starovoitov Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210113053810.13518-1-gilad.reti@gmail.com --- kernel/bpf/verifier.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 17270b8404f17..36af69fac5910 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2217,6 +2217,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_RDWR_BUF: case PTR_TO_RDWR_BUF_OR_NULL: case PTR_TO_PERCPU_BTF_ID: + case PTR_TO_MEM: + case PTR_TO_MEM_OR_NULL: return true; default: return false; -- GitLab From 4237e9f4a96228ccc8a7abe5e4b30834323cd353 Mon Sep 17 00:00:00 2001 From: Gilad Reti Date: Wed, 13 Jan 2021 07:38:08 +0200 Subject: [PATCH 0694/2012] selftests/bpf: Add verifier test for PTR_TO_MEM spill Add a test to check that the verifier is able to recognize spilling of PTR_TO_MEM registers, by reserving a ringbuf buffer, forcing the spill of a pointer holding the buffer address to the stack, filling it back in from the stack and writing to the memory area pointed by it. The patch was partially contributed by CyberArk Software, Inc. Signed-off-by: Gilad Reti Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210113053810.13518-2-gilad.reti@gmail.com --- tools/testing/selftests/bpf/test_verifier.c | 12 +++++++- .../selftests/bpf/verifier/spill_fill.c | 30 +++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 777a81404fdbd..f8569f04064b7 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -50,7 +50,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 20 +#define MAX_NR_MAPS 21 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -87,6 +87,7 @@ struct bpf_test { int fixup_sk_storage_map[MAX_FIXUPS]; int fixup_map_event_output[MAX_FIXUPS]; int fixup_map_reuseport_array[MAX_FIXUPS]; + int fixup_map_ringbuf[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; uint32_t insn_processed; @@ -640,6 +641,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_sk_storage_map = test->fixup_sk_storage_map; int *fixup_map_event_output = test->fixup_map_event_output; int *fixup_map_reuseport_array = test->fixup_map_reuseport_array; + int *fixup_map_ringbuf = test->fixup_map_ringbuf; if (test->fill_helper) { test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); @@ -817,6 +819,14 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_map_reuseport_array++; } while (*fixup_map_reuseport_array); } + if (*fixup_map_ringbuf) { + map_fds[20] = create_map(BPF_MAP_TYPE_RINGBUF, 0, + 0, 4096); + do { + prog[*fixup_map_ringbuf].imm = map_fds[20]; + fixup_map_ringbuf++; + } while (*fixup_map_ringbuf); + } } struct libcap { diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 45d43bf82f269..0b943897aaf6c 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -28,6 +28,36 @@ .result = ACCEPT, .result_unpriv = ACCEPT, }, +{ + "check valid spill/fill, ptr to mem", + .insns = { + /* reserve 8 byte ringbuf memory */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + /* store a pointer to the reserved memory in R6 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* check whether the reservation was successful */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + /* spill R6(mem) into the stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + /* fill it back in R7 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8), + /* should be able to access *(R7) = 0 */ + BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0), + /* submit the reserved ringbuf memory */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 1 }, + .result = ACCEPT, + .result_unpriv = ACCEPT, +}, { "check corrupted spill/fill", .insns = { -- GitLab From 5b55299eed78538cc4746e50ee97103a1643249c Mon Sep 17 00:00:00 2001 From: David Wu Date: Wed, 13 Jan 2021 11:41:09 +0800 Subject: [PATCH 0695/2012] net: stmmac: Fixed mtu channged by cache aligned Since the original mtu is not used when the mtu is updated, the mtu is aligned with cache, this will get an incorrect. For example, if you want to configure the mtu to be 1500, but mtu 1536 is configured in fact. Fixed: eaf4fac478077 ("net: stmmac: Do not accept invalid MTU values") Signed-off-by: David Wu Link: https://lore.kernel.org/r/20210113034109.27865-1-david.wu@rock-chips.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2d90d6856ec58..26b971cd4da5a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4026,6 +4026,7 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) { struct stmmac_priv *priv = netdev_priv(dev); int txfifosz = priv->plat->tx_fifo_size; + const int mtu = new_mtu; if (txfifosz == 0) txfifosz = priv->dma_cap.tx_fifo_size; @@ -4043,7 +4044,7 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) if ((txfifosz < new_mtu) || (new_mtu > BUF_SIZE_16KiB)) return -EINVAL; - dev->mtu = new_mtu; + dev->mtu = mtu; netdev_update_features(dev); -- GitLab From c25a053e15778f6b4d6553708673736e27a6c2cf Mon Sep 17 00:00:00 2001 From: Nick Hu Date: Wed, 13 Jan 2021 10:24:10 +0800 Subject: [PATCH 0696/2012] riscv: Fix KASAN memory mapping. Use virtual address instead of physical address when translating the address to shadow memory by kasan_mem_to_shadow(). Signed-off-by: Nick Hu Signed-off-by: Nylon Chen Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/kasan_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 12ddd1f6bf70c..a8a2ffd9114aa 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -93,8 +93,8 @@ void __init kasan_init(void) VMALLOC_END)); for_each_mem_range(i, &_start, &_end) { - void *start = (void *)_start; - void *end = (void *)_end; + void *start = (void *)__va(_start); + void *end = (void *)__va(_end); if (start >= end) break; -- GitLab From 41131a5e54ae7ba5a2bb8d7b30d1818b3f5b13d2 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Tue, 12 Jan 2021 11:55:15 +0000 Subject: [PATCH 0697/2012] powerpc/vdso: Fix clock_gettime_fallback for vdso32 The second argument of __kernel_clock_gettime64 points to a struct __kernel_timespec, with 64-bit time_t, so use the clock_gettime64 syscall in the fallback function for the 32-bit VDSO. Similarly, clock_getres_fallback should use the clock_getres_time64 syscall, though it isn't yet called from the 32-bit VDSO. Fixes: d0e3fc69d00d ("powerpc/vdso: Provide __kernel_clock_gettime64() on vdso32") Signed-off-by: Andreas Schwab [chleroy: Moved into a single #ifdef __powerpc64__ block] Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0c0ab0eb3cc80687c326f76ff0dd5762b8812ecc.1610452505.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso/gettimeofday.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index 81671aa365b34..77c635c2c90d4 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -103,6 +103,8 @@ int gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz return do_syscall_2(__NR_gettimeofday, (unsigned long)_tv, (unsigned long)_tz); } +#ifdef __powerpc64__ + static __always_inline int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) { @@ -115,10 +117,22 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts); } -#ifdef CONFIG_VDSO32 +#else #define BUILD_VDSO32 1 +static __always_inline +int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + return do_syscall_2(__NR_clock_gettime64, _clkid, (unsigned long)_ts); +} + +static __always_inline +int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + return do_syscall_2(__NR_clock_getres_time64, _clkid, (unsigned long)_ts); +} + static __always_inline int clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts) { -- GitLab From be969b7cfbcfa8a835a528f1dc467f0975c6d883 Mon Sep 17 00:00:00 2001 From: Sagar Shrikant Kadam Date: Tue, 10 Nov 2020 07:22:10 -0800 Subject: [PATCH 0698/2012] dts: phy: fix missing mdio device and probe failure of vsc8541-01 device HiFive unleashed A00 board has VSC8541-01 ethernet phy, this device is identified as a Revision B device as described in device identification registers. In order to use this phy in the unmanaged mode, it requires a specific reset sequence of logical 0-1-0-1 transition on the NRESET pin as documented here [1]. Currently, the bootloader (fsbl or u-boot-spl) takes care of the phy reset. If due to some reason the phy device hasn't received the reset by the prior stages before the linux macb driver comes into the picture, the MACB mii bus gets probed but the mdio scan fails and is not even able to read the phy ID registers. It gives an error message: "libphy: MACB_mii_bus: probed mdio_bus 10090000.ethernet-ffffffff: MDIO device at address 0 is missing." Thus adding the device OUI (Organizationally Unique Identifier) to the phy device node helps to probe the phy device. [1]: VSC8541-01 datasheet: https://www.mouser.com/ds/2/523/Microsemi_VSC8541-01_Datasheet_10496_V40-1148034.pdf Signed-off-by: Sagar Shrikant Kadam Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 4a2729f5ca3f0..60846e88ae4b1 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -88,6 +88,7 @@ phy-mode = "gmii"; phy-handle = <&phy0>; phy0: ethernet-phy@0 { + compatible = "ethernet-phy-id0007.0771"; reg = <0>; }; }; -- GitLab From a0fa9d727043da2238432471e85de0bdb8a8df65 Mon Sep 17 00:00:00 2001 From: Sagar Shrikant Kadam Date: Tue, 10 Nov 2020 07:22:11 -0800 Subject: [PATCH 0699/2012] dts: phy: add GPIO number and active state used for phy reset The GEMGXL_RST line on HiFive Unleashed is pulled low and is using GPIO number 12. Add these reset-gpio details to dt-node using which the linux phylib can reset the phy. Signed-off-by: Sagar Shrikant Kadam Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 60846e88ae4b1..24d75a146e02d 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -90,6 +90,7 @@ phy0: ethernet-phy@0 { compatible = "ethernet-phy-id0007.0771"; reg = <0>; + reset-gpios = <&gpio 12 GPIO_ACTIVE_LOW>; }; }; -- GitLab From 0983834a83931606a647c275e5d4165ce4e7b49f Mon Sep 17 00:00:00 2001 From: Sagar Shrikant Kadam Date: Tue, 10 Nov 2020 07:22:12 -0800 Subject: [PATCH 0700/2012] riscv: defconfig: enable gpio support for HiFive Unleashed Ethernet phy VSC8541-01 on HiFive Unleashed has its reset line connected to a gpio, so enable GPIO driver's required to reset the phy. Signed-off-by: Sagar Shrikant Kadam Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index d222d353d86d4..8c3d1e4517031 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -64,6 +64,8 @@ CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y CONFIG_SPI=y CONFIG_SPI_SIFIVE=y +CONFIG_GPIOLIB=y +CONFIG_GPIO_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_POWER_RESET=y CONFIG_DRM=y -- GitLab From 101c2fae5108d78915517d0279323ee215e70df2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 5 Jan 2021 15:14:29 -0500 Subject: [PATCH 0701/2012] MAINTAINERS: update radeon/amdgpu/amdkfd git trees MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FDO is out of space, so move to gitlab. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index cc1e6a5ee6e67..82a47081f62a5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -907,7 +907,7 @@ AMD KFD M: Felix Kuehling L: amd-gfx@lists.freedesktop.org S: Supported -T: git git://people.freedesktop.org/~agd5f/linux +T: git https://gitlab.freedesktop.org/agd5f/linux.git F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd*.[ch] F: drivers/gpu/drm/amd/amdkfd/ F: drivers/gpu/drm/amd/include/cik_structs.h @@ -14818,7 +14818,7 @@ M: Alex Deucher M: Christian König L: amd-gfx@lists.freedesktop.org S: Supported -T: git git://people.freedesktop.org/~agd5f/linux +T: git https://gitlab.freedesktop.org/agd5f/linux.git F: drivers/gpu/drm/amd/ F: drivers/gpu/drm/radeon/ F: include/uapi/drm/amdgpu_drm.h -- GitLab From ff9346dbabbb6595c5c20d90d88ae4a2247487a9 Mon Sep 17 00:00:00 2001 From: Alexandre Demers Date: Thu, 7 Jan 2021 18:53:03 -0500 Subject: [PATCH 0702/2012] drm/amdgpu: fix DRM_INFO flood if display core is not supported (bug 210921) This fix bug 210921 where DRM_INFO floods log when hitting an unsupported ASIC in amdgpu_device_asic_has_dc_support(). This info should be only called once. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=210921 Signed-off-by: Alexandre Demers Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b69c34074d8d7..087afab67e221 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3034,7 +3034,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) #endif default: if (amdgpu_dc > 0) - DRM_INFO("Display Core has been requested via kernel parameter " + DRM_INFO_ONCE("Display Core has been requested via kernel parameter " "but isn't supported by ASIC, ignoring\n"); return false; } -- GitLab From f14a5c34d143f6627f0be70c0de1d962f3a6ff1c Mon Sep 17 00:00:00 2001 From: Victor Zhao Date: Tue, 5 Jan 2021 15:04:01 +0800 Subject: [PATCH 0703/2012] drm/amdgpu/psp: fix psp gfx ctrl cmds psp GFX_CTRL_CMD_ID_CONSUME_CMD different for windows and linux, according to psp, linux cmds are not correct. v2: only correct GFX_CTRL_CMD_ID_CONSUME_CMD. Signed-off-by: Victor Zhao Reviewed-by: Emily.Deng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index d65a5339d354a..3ba7bdfde65d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -47,7 +47,7 @@ enum psp_gfx_crtl_cmd_id GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */ GFX_CTRL_CMD_ID_GBR_IH_SET = 0x00080000, /* set Gbr IH_RB_CNTL registers */ - GFX_CTRL_CMD_ID_CONSUME_CMD = 0x000A0000, /* send interrupt to psp for updating write pointer of vf */ + GFX_CTRL_CMD_ID_CONSUME_CMD = 0x00090000, /* send interrupt to psp for updating write pointer of vf */ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */ GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ -- GitLab From 73644143b31cb95866c19e0d94be9e3127ec3a6b Mon Sep 17 00:00:00 2001 From: Qingqing Zhuo Date: Wed, 9 Dec 2020 18:04:04 -0500 Subject: [PATCH 0704/2012] drm/amd/display: NULL pointer hang [Why] In dc_link_dp_set_test_pattern, we assume all pipes have a stream, which can cause null pointer dereference. [How] Add a null pointer check before accessing stream. Tested-by: Daniel Wheeler Signed-off-by: Qingqing Zhuo Reviewed-by: Nicholas Kazlauskas Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 2fc12239b22cb..1bd1a09352906 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3992,7 +3992,7 @@ bool dc_link_dp_set_test_pattern( unsigned int cust_pattern_size) { struct pipe_ctx *pipes = link->dc->current_state->res_ctx.pipe_ctx; - struct pipe_ctx *pipe_ctx = &pipes[0]; + struct pipe_ctx *pipe_ctx = NULL; unsigned int lane; unsigned int i; unsigned char link_qual_pattern[LANE_COUNT_DP_MAX] = {0}; @@ -4002,12 +4002,18 @@ bool dc_link_dp_set_test_pattern( memset(&training_pattern, 0, sizeof(training_pattern)); for (i = 0; i < MAX_PIPES; i++) { + if (pipes[i].stream == NULL) + continue; + if (pipes[i].stream->link == link && !pipes[i].top_pipe && !pipes[i].prev_odm_pipe) { pipe_ctx = &pipes[i]; break; } } + if (pipe_ctx == NULL) + return false; + /* Reset CRTC Test Pattern if it is currently running and request is VideoMode */ if (link->test_pattern_enabled && test_pattern == DP_TEST_PATTERN_VIDEO_MODE) { -- GitLab From 4336be4b07ed3b03a18ac35564c3127eeea05ab6 Mon Sep 17 00:00:00 2001 From: Wesley Chalmers Date: Mon, 14 Dec 2020 15:35:02 -0500 Subject: [PATCH 0705/2012] drm/amd/display: Initialize stack variable [WHY] The stack variable "val" is potentially unpopulate it, so initialize it with the value 0xf (indicating an invalid mux) Tested-by: Daniel Wheeler Signed-off-by: Wesley Chalmers Reviewed-by: Martin Leung Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index 100ce0e28fd5a..b096011acb490 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -470,7 +470,7 @@ void mpc1_cursor_lock(struct mpc *mpc, int opp_id, bool lock) unsigned int mpc1_get_mpc_out_mux(struct mpc *mpc, int opp_id) { struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); - uint32_t val = 0; + uint32_t val = 0xf; if (opp_id < MAX_OPP && REG(MUX[opp_id])) REG_GET(MUX[opp_id], MPC_OUT_MUX, &val); -- GitLab From 0eb31a82e378cab17beec1d213e1414e9fea1767 Mon Sep 17 00:00:00 2001 From: Nikola Cornij Date: Tue, 29 Dec 2020 23:33:32 -0500 Subject: [PATCH 0706/2012] drm/amd/display: Add a missing DCN3.01 API mapping [why] Required for DSC MST Tested-by: Daniel Wheeler Signed-off-by: Nikola Cornij Reviewed-by: Zhan Liu Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index 4825c5c1c6ed6..35f5bf08ae96e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -1731,6 +1731,7 @@ static struct resource_funcs dcn301_res_pool_funcs = { .populate_dml_pipes = dcn30_populate_dml_pipes_from_context, .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer, .add_stream_to_ctx = dcn30_add_stream_to_ctx, + .add_dsc_to_stream_resource = dcn20_add_dsc_to_stream_resource, .remove_stream_from_ctx = dcn20_remove_stream_from_ctx, .populate_dml_writeback_from_context = dcn30_populate_dml_writeback_from_context, .set_mcif_arb_params = dcn30_set_mcif_arb_params, -- GitLab From 9d03bb102028b4a3f4a64d6069b219e2e1c1f306 Mon Sep 17 00:00:00 2001 From: "Li, Roman" Date: Wed, 30 Dec 2020 18:03:02 +0000 Subject: [PATCH 0707/2012] drm/amd/display: disable dcn10 pipe split by default [Why] The initial purpose of dcn10 pipe split is to support some high bandwidth mode which requires dispclk greater than max dispclk. By initial bring up power measurement data, it showed power consumption is less with pipe split for dcn block. This could be reason for enable pipe split by default. By battery life measurement of some Chromebooks, result shows battery life is longer with pipe split disabled. [How] Disable pipe split by default. Pipe split could be still enabled when required dispclk is greater than max dispclk. Tested-by: Daniel Wheeler Signed-off-by: Hersen Wu Signed-off-by: Roman Li Reviewed-by: Roman Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 36745193c391c..90e912fef2b36 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -608,8 +608,8 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_pplib_clock_request = false, .disable_pplib_wm_range = false, .pplib_wm_report_mode = WM_REPORT_DEFAULT, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, - .force_single_disp_pipe_split = true, + .pipe_split_policy = MPC_SPLIT_AVOID, + .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .voltage_align_fclk = true, .disable_stereo_support = true, -- GitLab From 4eec66c014e9a406d8d453de958f6791d05427e4 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Mon, 11 Jan 2021 11:31:51 -0500 Subject: [PATCH 0708/2012] Revert "drm/amd/display: Fixed Intermittent blue screen on OLED panel" commit a861736dae64 ("drm/amd/display: Fixed Intermittent blue screen on OLED panel") causes power regression for many users. It seems that this change causes the MCLK to get forced high; this creates a regression for many users since their devices were not able to drop to a low state after this change. For this reason, this reverts commit a861736dae644a0d7abbca0c638ae6aad28feeb8. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1407 Cc: Aurabindo Pillai Cc: Alex Deucher Cc: Harry Wentland Cc: Naveed Ashfaq Cc: Hersen Wu Cc: Roman Li Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../amd/display/dc/dml/dcn20/display_mode_vba_20v2.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c index 860e72a51534c..80170f9721ce9 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c @@ -2635,14 +2635,15 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP } if (mode_lib->vba.DRAMClockChangeSupportsVActive && - mode_lib->vba.MinActiveDRAMClockChangeMargin > 60 && - mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + mode_lib->vba.MinActiveDRAMClockChangeMargin > 60) { mode_lib->vba.DRAMClockChangeWatermark += 25; for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) { - if (mode_lib->vba.DRAMClockChangeWatermark > - dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark)) - mode_lib->vba.MinTTUVBlank[k] += 25; + if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) { + if (mode_lib->vba.DRAMClockChangeWatermark > + dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark)) + mode_lib->vba.MinTTUVBlank[k] += 25; + } } mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive; -- GitLab From 8b335bff643f3b39935c7377dbcd361c5b605d98 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Mon, 11 Jan 2021 16:05:28 -0500 Subject: [PATCH 0709/2012] drm/amdkfd: Fix out-of-bounds read in kdf_create_vcrat_image_cpu() KASAN reported a slab-out-of-bounds read of size 1 in kdf_create_vcrat_image_cpu(). This occurs when, for example, when on an x86_64 with a single NUMA node because kfd_fill_iolink_info_for_cpu() is a no-op, but afterwards the sub_type_hdr->length, which is out-of-bounds, is read and multiplied by entries. Fortunately, entries is 0 in this case so the overall crat_table->length is still correct. Check if there were any entries before de-referencing sub_type_hdr which may be pointing to out-of-bounds memory. Fixes: b7b6c38529c9 ("drm/amdkfd: Calculate CPU VCRAT size dynamically (v2)") Suggested-by: Felix Kuehling Signed-off-by: Jeremy Cline Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 8cac497c2c459..a5640a6138cf2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1040,11 +1040,14 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) (struct crat_subtype_iolink *)sub_type_hdr); if (ret < 0) return ret; - crat_table->length += (sub_type_hdr->length * entries); - crat_table->total_entries += entries; - sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + - sub_type_hdr->length * entries); + if (entries) { + crat_table->length += (sub_type_hdr->length * entries); + crat_table->total_entries += entries; + + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length * entries); + } #else pr_info("IO link not available for non x86 platforms\n"); #endif -- GitLab From 04eb6e773e9f3167a5921d74e8ad99cdcc4166c3 Mon Sep 17 00:00:00 2001 From: chen gong Date: Fri, 8 Jan 2021 12:46:44 +0800 Subject: [PATCH 0710/2012] drm/amdgpu/gfx10: add updated GOLDEN_TSC_COUNT_UPPER/LOWER register offsets for VGH The address of the GOLDEN_TSC_COUNT_UPPER/GOLDEN_TSC_COUNT_LOWER for Vnagogh are different from the others. The offset of the GOLDEN_TSC_COUNT_UPPER for Vangogh is 0x0025 by calculation. The offset of the GOLDEN_TSC_COUNT_LOWER for Vangogh is 0x0026 by calculation. Signed-off-by: chen gong Acked-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ba10867845255..34141b785aa4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -99,6 +99,10 @@ #define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580 #define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0 +#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh 0x0025 +#define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX 1 +#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh 0x0026 +#define mmGOLDEN_TSC_COUNT_LOWER_Vangogh_BASE_IDX 1 #define mmSPI_CONFIG_CNTL_1_Vangogh 0x2441 #define mmSPI_CONFIG_CNTL_1_Vangogh_BASE_IDX 1 #define mmVGT_TF_MEMORY_BASE_HI_Vangogh 0x2261 @@ -7377,8 +7381,16 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); - clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) | - ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL); + switch (adev->asic_type) { + case CHIP_VANGOGH: + clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh) | + ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL); + break; + default: + clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER) | + ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER) << 32ULL); + break; + } mutex_unlock(&adev->gfx.gpu_clock_mutex); amdgpu_gfx_off_ctrl(adev, true); return clock; -- GitLab From 12f2df72205fe348481d941c3e593e8068d2d23d Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Wed, 13 Jan 2021 20:13:17 +0800 Subject: [PATCH 0711/2012] drm/amdgpu: fix vram type and bandwidth error for DDR5 and DDR4 This patch is to update atomfirmware parser for the memory type and bandwidth of DDR5 and DDR4. Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 53 +++++++++++++------ 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 306077884a679..6107ac91db250 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -112,6 +112,7 @@ int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev) union igp_info { struct atom_integrated_system_info_v1_11 v11; struct atom_integrated_system_info_v1_12 v12; + struct atom_integrated_system_info_v2_1 v21; }; union umc_info { @@ -209,24 +210,42 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, if (adev->flags & AMD_IS_APU) { igp_info = (union igp_info *) (mode_info->atom_context->bios + data_offset); - switch (crev) { - case 11: - mem_channel_number = igp_info->v11.umachannelnumber; - /* channel width is 64 */ - if (vram_width) - *vram_width = mem_channel_number * 64; - mem_type = igp_info->v11.memorytype; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + switch (frev) { + case 1: + switch (crev) { + case 11: + case 12: + mem_channel_number = igp_info->v11.umachannelnumber; + if (!mem_channel_number) + mem_channel_number = 1; + /* channel width is 64 */ + if (vram_width) + *vram_width = mem_channel_number * 64; + mem_type = igp_info->v11.memorytype; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; + default: + return -EINVAL; + } break; - case 12: - mem_channel_number = igp_info->v12.umachannelnumber; - /* channel width is 64 */ - if (vram_width) - *vram_width = mem_channel_number * 64; - mem_type = igp_info->v12.memorytype; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + case 2: + switch (crev) { + case 1: + case 2: + mem_channel_number = igp_info->v21.umachannelnumber; + if (!mem_channel_number) + mem_channel_number = 1; + /* channel width is 64 */ + if (vram_width) + *vram_width = mem_channel_number * 64; + mem_type = igp_info->v21.memorytype; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + break; + default: + return -EINVAL; + } break; default: return -EINVAL; -- GitLab From 21702c8cae51535e09b91341a069503c6ef3d2a3 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Fri, 2 Oct 2020 10:58:55 -0400 Subject: [PATCH 0712/2012] drm/amdgpu: add green_sardine device id (v2) Add green_sardine PCI id support and map it to renoir asic type. v2: add apu flag Signed-off-by: Prike Liang Reviewed-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.10.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 72efd579ec5ee..e191383f34f78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1085,6 +1085,7 @@ static const struct pci_device_id pciidlist[] = { /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, + {0x1002, 0x1638, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, /* Navi12 */ {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12}, -- GitLab From 53f1e7f6a1720f8299b5283857eedc8f07d29533 Mon Sep 17 00:00:00 2001 From: mengwang Date: Wed, 12 Aug 2020 11:49:29 +0800 Subject: [PATCH 0713/2012] drm/amdgpu: add new device id for Renior add DID 0x164C into pciidlist under CHIP_RENOIR family. Signed-off-by: mengwang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.10.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e191383f34f78..7169fb5e3d9c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1086,6 +1086,7 @@ static const struct pci_device_id pciidlist[] = { /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, {0x1002, 0x1638, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, + {0x1002, 0x164C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, /* Navi12 */ {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12}, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8a23636ecc27f..0b3516c4eefb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1239,7 +1239,8 @@ static int soc15_common_early_init(void *handle) break; case CHIP_RENOIR: adev->asic_funcs = &soc15_asic_funcs; - if (adev->pdev->device == 0x1636) + if ((adev->pdev->device == 0x1636) || + (adev->pdev->device == 0x164c)) adev->apu_flags |= AMD_APU_IS_RENOIR; else adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; -- GitLab From 652562e5ff06d27b9b83c8166cb71845a55607ad Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:23 +0100 Subject: [PATCH 0714/2012] can: length: can_fd_len2dlc(): simplify length calculcation If the length paramter in len2dlc() exceeds the size of the len2dlc array, we return 0xF. This is equal to the last 16 members of the array. This patch removes these members from the array, uses ARRAY_SIZE() for the length check, and returns CANFD_MAX_DLC (which is 0xf). Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-9-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/length.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/dev/length.c b/drivers/net/can/dev/length.c index 5e7d481717ea6..64673a8d1234e 100644 --- a/drivers/net/can/dev/length.c +++ b/drivers/net/can/dev/length.c @@ -27,15 +27,13 @@ static const u8 len2dlc[] = { 13, 13, 13, 13, 13, 13, 13, 13, /* 25 - 32 */ 14, 14, 14, 14, 14, 14, 14, 14, /* 33 - 40 */ 14, 14, 14, 14, 14, 14, 14, 14, /* 41 - 48 */ - 15, 15, 15, 15, 15, 15, 15, 15, /* 49 - 56 */ - 15, 15, 15, 15, 15, 15, 15, 15 /* 57 - 64 */ }; /* map the sanitized data length to an appropriate data length code */ u8 can_fd_len2dlc(u8 len) { - if (unlikely(len > 64)) - return 0xF; + if (len >= ARRAY_SIZE(len2dlc)) + return CANFD_MAX_DLC; return len2dlc[len]; } -- GitLab From 99b7beb0431a4b9728023e9169ed15af678d2c7f Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:24 +0100 Subject: [PATCH 0715/2012] can: length: canfd_sanitize_len(): add function to sanitize CAN-FD data length The data field in CAN-FD frames have specifig frame length (0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 16, 20, 24, 32, 48, 64). This function "rounds" up a given length to the next valid CAN-FD frame length. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-10-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- include/linux/can/length.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/can/length.h b/include/linux/can/length.h index 156b9d17969a7..b2313b2a0b024 100644 --- a/include/linux/can/length.h +++ b/include/linux/can/length.h @@ -45,4 +45,10 @@ u8 can_fd_dlc2len(u8 dlc); /* map the sanitized data length to an appropriate data length code */ u8 can_fd_len2dlc(u8 len); +/* map the data length to an appropriate data link layer length */ +static inline u8 canfd_sanitize_len(u8 len) +{ + return can_fd_dlc2len(can_fd_len2dlc(len)); +} + #endif /* !_CAN_LENGTH_H */ -- GitLab From 85d99c3e2a13d542445342a1383a686dadeaac3d Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Mon, 11 Jan 2021 15:19:25 +0100 Subject: [PATCH 0716/2012] can: length: can_skb_get_frame_len(): introduce function to get data length of frame in data link layer This patch adds the function can_skb_get_frame_len() which returns the length of a CAN frame on the data link layer, including Start-of-frame, Identifier, various other bits, the actual data, the CRC, the End-of-frame, the Inter frame spacing. Co-developed-by: Arunachalam Santhanam Signed-off-by: Arunachalam Santhanam Co-developed-by: Vincent Mailhol Signed-off-by: Vincent Mailhol Acked-by: Vincent Mailhol Reviewed-by: Vincent Mailhol Co-developed-by: Marc Kleine-Budde Link: https://lore.kernel.org/r/20210111141930.693847-11-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/length.c | 50 +++++++++++++++ include/linux/can/length.h | 120 +++++++++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+) diff --git a/drivers/net/can/dev/length.c b/drivers/net/can/dev/length.c index 64673a8d1234e..d35c4e82314d1 100644 --- a/drivers/net/can/dev/length.c +++ b/drivers/net/can/dev/length.c @@ -38,3 +38,53 @@ u8 can_fd_len2dlc(u8 len) return len2dlc[len]; } EXPORT_SYMBOL_GPL(can_fd_len2dlc); + +/** + * can_skb_get_frame_len() - Calculate the CAN Frame length in bytes + * of a given skb. + * @skb: socket buffer of a CAN message. + * + * Do a rough calculation: bit stuffing is ignored and length in bits + * is rounded up to a length in bytes. + * + * Rationale: this function is to be used for the BQL functions + * (netdev_sent_queue() and netdev_completed_queue()) which expect a + * value in bytes. Just using skb->len is insufficient because it will + * return the constant value of CAN(FD)_MTU. Doing the bit stuffing + * calculation would be too expensive in term of computing resources + * for no noticeable gain. + * + * Remarks: The payload of CAN FD frames with BRS flag are sent at a + * different bitrate. Currently, the can-utils canbusload tool does + * not support CAN-FD yet and so we could not run any benchmark to + * measure the impact. There might be possible improvement here. + * + * Return: length in bytes. + */ +unsigned int can_skb_get_frame_len(const struct sk_buff *skb) +{ + const struct canfd_frame *cf = (const struct canfd_frame *)skb->data; + u8 len; + + if (can_is_canfd_skb(skb)) + len = canfd_sanitize_len(cf->len); + else if (cf->can_id & CAN_RTR_FLAG) + len = 0; + else + len = cf->len; + + if (can_is_canfd_skb(skb)) { + if (cf->can_id & CAN_EFF_FLAG) + len += CANFD_FRAME_OVERHEAD_EFF; + else + len += CANFD_FRAME_OVERHEAD_SFF; + } else { + if (cf->can_id & CAN_EFF_FLAG) + len += CAN_FRAME_OVERHEAD_EFF; + else + len += CAN_FRAME_OVERHEAD_SFF; + } + + return len; +} +EXPORT_SYMBOL_GPL(can_skb_get_frame_len); diff --git a/include/linux/can/length.h b/include/linux/can/length.h index b2313b2a0b024..6995092b774ec 100644 --- a/include/linux/can/length.h +++ b/include/linux/can/length.h @@ -1,10 +1,127 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) 2020 Oliver Hartkopp + * Copyright (C) 2020 Marc Kleine-Budde */ #ifndef _CAN_LENGTH_H #define _CAN_LENGTH_H +/* + * Size of a Classical CAN Standard Frame + * + * Name of Field Bits + * --------------------------------------------------------- + * Start-of-frame 1 + * Identifier 11 + * Remote transmission request (RTR) 1 + * Identifier extension bit (IDE) 1 + * Reserved bit (r0) 1 + * Data length code (DLC) 4 + * Data field 0...64 + * CRC 15 + * CRC delimiter 1 + * ACK slot 1 + * ACK delimiter 1 + * End-of-frame (EOF) 7 + * Inter frame spacing 3 + * + * rounded up and ignoring bitstuffing + */ +#define CAN_FRAME_OVERHEAD_SFF DIV_ROUND_UP(47, 8) + +/* + * Size of a Classical CAN Extended Frame + * + * Name of Field Bits + * --------------------------------------------------------- + * Start-of-frame 1 + * Identifier A 11 + * Substitute remote request (SRR) 1 + * Identifier extension bit (IDE) 1 + * Identifier B 18 + * Remote transmission request (RTR) 1 + * Reserved bits (r1, r0) 2 + * Data length code (DLC) 4 + * Data field 0...64 + * CRC 15 + * CRC delimiter 1 + * ACK slot 1 + * ACK delimiter 1 + * End-of-frame (EOF) 7 + * Inter frame spacing 3 + * + * rounded up and ignoring bitstuffing + */ +#define CAN_FRAME_OVERHEAD_EFF DIV_ROUND_UP(67, 8) + +/* + * Size of a CAN-FD Standard Frame + * + * Name of Field Bits + * --------------------------------------------------------- + * Start-of-frame 1 + * Identifier 11 + * Reserved bit (r1) 1 + * Identifier extension bit (IDE) 1 + * Flexible data rate format (FDF) 1 + * Reserved bit (r0) 1 + * Bit Rate Switch (BRS) 1 + * Error Status Indicator (ESI) 1 + * Data length code (DLC) 4 + * Data field 0...512 + * Stuff Bit Count (SBC) 0...16: 4 20...64:5 + * CRC 0...16: 17 20...64:21 + * CRC delimiter (CD) 1 + * ACK slot (AS) 1 + * ACK delimiter (AD) 1 + * End-of-frame (EOF) 7 + * Inter frame spacing 3 + * + * assuming CRC21, rounded up and ignoring bitstuffing + */ +#define CANFD_FRAME_OVERHEAD_SFF DIV_ROUND_UP(61, 8) + +/* + * Size of a CAN-FD Extended Frame + * + * Name of Field Bits + * --------------------------------------------------------- + * Start-of-frame 1 + * Identifier A 11 + * Substitute remote request (SRR) 1 + * Identifier extension bit (IDE) 1 + * Identifier B 18 + * Reserved bit (r1) 1 + * Flexible data rate format (FDF) 1 + * Reserved bit (r0) 1 + * Bit Rate Switch (BRS) 1 + * Error Status Indicator (ESI) 1 + * Data length code (DLC) 4 + * Data field 0...512 + * Stuff Bit Count (SBC) 0...16: 4 20...64:5 + * CRC 0...16: 17 20...64:21 + * CRC delimiter (CD) 1 + * ACK slot (AS) 1 + * ACK delimiter (AD) 1 + * End-of-frame (EOF) 7 + * Inter frame spacing 3 + * + * assuming CRC21, rounded up and ignoring bitstuffing + */ +#define CANFD_FRAME_OVERHEAD_EFF DIV_ROUND_UP(80, 8) + +/* + * Maximum size of a Classical CAN frame + * (rounded up and ignoring bitstuffing) + */ +#define CAN_FRAME_LEN_MAX (CAN_FRAME_OVERHEAD_EFF + CAN_MAX_DLEN) + +/* + * Maximum size of a CAN-FD frame + * (rounded up and ignoring bitstuffing) + */ +#define CANFD_FRAME_LEN_MAX (CANFD_FRAME_OVERHEAD_EFF + CANFD_MAX_DLEN) + /* * can_cc_dlc2len(value) - convert a given data length code (dlc) of a * Classical CAN frame into a valid data length of max. 8 bytes. @@ -45,6 +162,9 @@ u8 can_fd_dlc2len(u8 dlc); /* map the sanitized data length to an appropriate data length code */ u8 can_fd_len2dlc(u8 len); +/* calculate the CAN Frame length in bytes of a given skb */ +unsigned int can_skb_get_frame_len(const struct sk_buff *skb); + /* map the data length to an appropriate data link layer length */ static inline u8 canfd_sanitize_len(u8 len) { -- GitLab From f0ef72febc9a6a569d92cdf6c7996015dfa8e8bb Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:26 +0100 Subject: [PATCH 0717/2012] can: dev: extend struct can_skb_priv to hold CAN frame length In order to implement byte queue limits (bql) in CAN drivers, the length of the CAN frame needs to be passed into the networking stack after queueing and after transmission completion. To avoid to calculate this length twice, extend the struct can_skb_priv to hold the length of the CAN frame and extend __can_get_echo_skb() to return that value. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-12-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/rx-offload.c | 2 +- drivers/net/can/dev/skb.c | 9 +++++++-- include/linux/can/skb.h | 4 +++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/dev/rx-offload.c b/drivers/net/can/dev/rx-offload.c index 3c1912c0430b6..6a26b5282df16 100644 --- a/drivers/net/can/dev/rx-offload.c +++ b/drivers/net/can/dev/rx-offload.c @@ -271,7 +271,7 @@ unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, u8 len; int err; - skb = __can_get_echo_skb(dev, idx, &len); + skb = __can_get_echo_skb(dev, idx, &len, NULL); if (!skb) return 0; diff --git a/drivers/net/can/dev/skb.c b/drivers/net/can/dev/skb.c index 26cd597ff7714..24f782a234093 100644 --- a/drivers/net/can/dev/skb.c +++ b/drivers/net/can/dev/skb.c @@ -76,7 +76,8 @@ int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, EXPORT_SYMBOL_GPL(can_put_echo_skb); struct sk_buff * -__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) +__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr, + unsigned int *frame_len_ptr) { struct can_priv *priv = netdev_priv(dev); @@ -91,6 +92,7 @@ __can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) * length is supported on both CAN and CANFD frames. */ struct sk_buff *skb = priv->echo_skb[idx]; + struct can_skb_priv *can_skb_priv = can_skb_prv(skb); struct canfd_frame *cf = (struct canfd_frame *)skb->data; /* get the real payload length for netdev statistics */ @@ -99,6 +101,9 @@ __can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) else *len_ptr = cf->len; + if (frame_len_ptr) + *frame_len_ptr = can_skb_priv->frame_len; + priv->echo_skb[idx] = NULL; return skb; @@ -118,7 +123,7 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) struct sk_buff *skb; u8 len; - skb = __can_get_echo_skb(dev, idx, &len); + skb = __can_get_echo_skb(dev, idx, &len, NULL); if (!skb) return 0; diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index c90ebbd3008c5..5db9da30843c2 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -20,7 +20,7 @@ void can_flush_echo_skb(struct net_device *dev); int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, - u8 *len_ptr); + u8 *len_ptr, unsigned int *frame_len_ptr); unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); void can_free_echo_skb(struct net_device *dev, unsigned int idx); struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); @@ -42,11 +42,13 @@ struct sk_buff *alloc_can_err_skb(struct net_device *dev, * struct can_skb_priv - private additional data inside CAN sk_buffs * @ifindex: ifindex of the first interface the CAN frame appeared on * @skbcnt: atomic counter to have an unique id together with skb pointer + * @frame_len: length of CAN frame in data link layer * @cf: align to the following CAN frame at skb->data */ struct can_skb_priv { int ifindex; int skbcnt; + unsigned int frame_len; struct can_frame cf[]; }; -- GitLab From 1dcb6e57db833419483d0df2d956b1cc2a802683 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Mon, 11 Jan 2021 15:19:27 +0100 Subject: [PATCH 0718/2012] can: dev: can_put_echo_skb(): extend to handle frame_len Add a frame_len argument to can_put_echo_skb() which is used to save length of the CAN frame into field frame_len of struct can_skb_priv so that it can be later used after transmission completion. Convert all users of this function, too. Drivers which implement BQL call can_put_echo_skb() with the output of can_skb_get_frame_len(skb) and drivers which do not simply pass zero as an input (in the same way that NULL would be given to can_get_echo_skb()). This way, we have a nice symmetry between the two echo functions. Link: https://lore.kernel.org/r/20210111061335.39983-1-mailhol.vincent@wanadoo.fr Signed-off-by: Marc Kleine-Budde Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-13-mkl@pengutronix.de Signed-off-by: Vincent Mailhol --- drivers/net/can/at91_can.c | 2 +- drivers/net/can/c_can/c_can.c | 2 +- drivers/net/can/cc770/cc770.c | 2 +- drivers/net/can/dev/skb.c | 5 ++++- drivers/net/can/flexcan.c | 2 +- drivers/net/can/grcan.c | 2 +- drivers/net/can/ifi_canfd/ifi_canfd.c | 2 +- drivers/net/can/kvaser_pciefd.c | 2 +- drivers/net/can/m_can/m_can.c | 4 ++-- drivers/net/can/mscan/mscan.c | 2 +- drivers/net/can/pch_can.c | 2 +- drivers/net/can/peak_canfd/peak_canfd.c | 2 +- drivers/net/can/rcar/rcar_can.c | 2 +- drivers/net/can/rcar/rcar_canfd.c | 2 +- drivers/net/can/sja1000/sja1000.c | 2 +- drivers/net/can/softing/softing_main.c | 2 +- drivers/net/can/spi/hi311x.c | 2 +- drivers/net/can/spi/mcp251x.c | 2 +- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +- drivers/net/can/sun4i_can.c | 2 +- drivers/net/can/ti_hecc.c | 2 +- drivers/net/can/usb/ems_usb.c | 2 +- drivers/net/can/usb/esd_usb2.c | 2 +- drivers/net/can/usb/gs_usb.c | 2 +- drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c | 2 +- drivers/net/can/usb/mcba_usb.c | 2 +- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 2 +- drivers/net/can/usb/ucan.c | 2 +- drivers/net/can/usb/usb_8dev.c | 2 +- drivers/net/can/xilinx_can.c | 4 ++-- include/linux/can/skb.h | 2 +- 31 files changed, 36 insertions(+), 33 deletions(-) diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index 5284f0ab3b069..90b223a80ed45 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -484,7 +484,7 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev) stats->tx_bytes += cf->len; /* _NOTE_: subtract AT91_MB_TX_FIRST offset from mb! */ - can_put_echo_skb(skb, dev, mb - get_mb_tx_first(priv)); + can_put_echo_skb(skb, dev, mb - get_mb_tx_first(priv), 0); /* * we have to stop the queue and deliver all messages in case diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 63f48b016ecd8..13638954a25c5 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -476,7 +476,7 @@ static netdev_tx_t c_can_start_xmit(struct sk_buff *skb, */ c_can_setup_tx_object(dev, IF_TX, frame, idx); priv->dlc[idx] = frame->len; - can_put_echo_skb(skb, dev, idx); + can_put_echo_skb(skb, dev, idx, 0); /* Update the active bits */ atomic_add((1 << idx), &priv->tx_active); diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index 8d9f332c35e0c..e53ca338368a8 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -702,7 +702,7 @@ static void cc770_tx_interrupt(struct net_device *dev, unsigned int o) stats->tx_bytes += cf->len; stats->tx_packets++; - can_put_echo_skb(priv->tx_skb, dev, 0); + can_put_echo_skb(priv->tx_skb, dev, 0, 0); can_get_echo_skb(dev, 0); priv->tx_skb = NULL; diff --git a/drivers/net/can/dev/skb.c b/drivers/net/can/dev/skb.c index 24f782a234093..c184b4dce19eb 100644 --- a/drivers/net/can/dev/skb.c +++ b/drivers/net/can/dev/skb.c @@ -38,7 +38,7 @@ void can_flush_echo_skb(struct net_device *dev) * priv->echo_skb, if necessary. */ int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, - unsigned int idx) + unsigned int idx, unsigned int frame_len) { struct can_priv *priv = netdev_priv(dev); @@ -62,6 +62,9 @@ int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, skb->ip_summed = CHECKSUM_UNNECESSARY; skb->dev = dev; + /* save frame_len to reuse it when transmission is completed */ + can_skb_prv(skb)->frame_len = frame_len; + /* save this skb for tx interrupt echo handling */ priv->echo_skb[idx] = skb; } else { diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 7ab20a6b0d1db..202d08f8e1a40 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -815,7 +815,7 @@ static netdev_tx_t flexcan_start_xmit(struct sk_buff *skb, struct net_device *de priv->write(data, &priv->tx_mb->data[i / sizeof(u32)]); } - can_put_echo_skb(skb, dev, 0); + can_put_echo_skb(skb, dev, 0, 0); priv->write(can_id, &priv->tx_mb->can_id); priv->write(ctrl, &priv->tx_mb->can_ctrl); diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index f5d94a6925767..8086cdc100005 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -1448,7 +1448,7 @@ static netdev_tx_t grcan_start_xmit(struct sk_buff *skb, * taken. */ priv->txdlc[slotindex] = cf->len; /* Store dlc for statistics */ - can_put_echo_skb(skb, dev, slotindex); + can_put_echo_skb(skb, dev, slotindex, 0); /* Make sure everything is written before allowing hardware to * read from the memory diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index 86b0e1406a215..56ac9e1dace7a 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -922,7 +922,7 @@ static netdev_tx_t ifi_canfd_start_xmit(struct sk_buff *skb, writel(0, priv->base + IFI_CANFD_TXFIFO_REPEATCOUNT); writel(0, priv->base + IFI_CANFD_TXFIFO_SUSPEND_US); - can_put_echo_skb(skb, ndev, 0); + can_put_echo_skb(skb, ndev, 0, 0); /* Start the transmission */ writel(IFI_CANFD_TXSTCMD_ADD_MSG, priv->base + IFI_CANFD_TXSTCMD); diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 969cedb9b0b60..0cf82f0646a34 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -778,7 +778,7 @@ static netdev_tx_t kvaser_pciefd_start_xmit(struct sk_buff *skb, spin_lock_irqsave(&can->echo_lock, irq_flags); /* Prepare and save echo skb in internal slot */ - can_put_echo_skb(skb, netdev, can->echo_idx); + can_put_echo_skb(skb, netdev, can->echo_idx, 0); /* Move echo index to the next slot */ can->echo_idx = (can->echo_idx + 1) % can->can.echo_skb_max; diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index da551fd0f5026..fff7432103cb2 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1483,7 +1483,7 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) M_CAN_FIFO_DATA(i / 4), *(u32 *)(cf->data + i)); - can_put_echo_skb(skb, dev, 0); + can_put_echo_skb(skb, dev, 0, 0); if (cdev->can.ctrlmode & CAN_CTRLMODE_FD) { cccr = m_can_read(cdev, M_CAN_CCCR); @@ -1554,7 +1554,7 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev) /* Push loopback echo. * Will be looped back on TX interrupt based on message marker */ - can_put_echo_skb(skb, dev, putidx); + can_put_echo_skb(skb, dev, putidx, 0); /* Enable TX FIFO element to start transfer */ m_can_write(cdev, M_CAN_TXBAR, (1 << putidx)); diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c index 5ed00a1558e1d..a28fdaa411c65 100644 --- a/drivers/net/can/mscan/mscan.c +++ b/drivers/net/can/mscan/mscan.c @@ -270,7 +270,7 @@ static netdev_tx_t mscan_start_xmit(struct sk_buff *skb, struct net_device *dev) list_add_tail(&priv->tx_queue[buf_id].list, &priv->tx_head); - can_put_echo_skb(skb, dev, buf_id); + can_put_echo_skb(skb, dev, buf_id, 0); /* Enable interrupt. */ priv->tx_active |= 1 << buf_id; diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c index 4f9e7ec192aa8..a4c35b48d8e97 100644 --- a/drivers/net/can/pch_can.c +++ b/drivers/net/can/pch_can.c @@ -924,7 +924,7 @@ static netdev_tx_t pch_xmit(struct sk_buff *skb, struct net_device *ndev) &priv->regs->ifregs[1].data[i / 2]); } - can_put_echo_skb(skb, ndev, tx_obj_no - PCH_RX_OBJ_END - 1); + can_put_echo_skb(skb, ndev, tx_obj_no - PCH_RX_OBJ_END - 1, 0); /* Set the size of the data. Update if2_mcont */ iowrite32(cf->len | PCH_IF_MCONT_NEWDAT | PCH_IF_MCONT_TXRQXT | diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c index c5334b0c3038f..179a8e10fbb8d 100644 --- a/drivers/net/can/peak_canfd/peak_canfd.c +++ b/drivers/net/can/peak_canfd/peak_canfd.c @@ -716,7 +716,7 @@ static netdev_tx_t peak_canfd_start_xmit(struct sk_buff *skb, spin_lock_irqsave(&priv->echo_lock, flags); /* prepare and save echo skb in internal slot */ - can_put_echo_skb(skb, ndev, priv->echo_idx); + can_put_echo_skb(skb, ndev, priv->echo_idx, 0); /* move echo index to the next slot */ priv->echo_idx = (priv->echo_idx + 1) % priv->can.echo_skb_max; diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c index c803327f8f79d..0b7e488bc4fe2 100644 --- a/drivers/net/can/rcar/rcar_can.c +++ b/drivers/net/can/rcar/rcar_can.c @@ -617,7 +617,7 @@ static netdev_tx_t rcar_can_start_xmit(struct sk_buff *skb, writeb(cf->len, &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].dlc); priv->tx_dlc[priv->tx_head % RCAR_CAN_FIFO_DEPTH] = cf->len; - can_put_echo_skb(skb, ndev, priv->tx_head % RCAR_CAN_FIFO_DEPTH); + can_put_echo_skb(skb, ndev, priv->tx_head % RCAR_CAN_FIFO_DEPTH, 0); priv->tx_head++; /* Start Tx: write 0xff to the TFPCR register to increment * the CPU-side pointer for the transmit FIFO to the next diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c index 2778ed5c61d18..38376f29bc561 100644 --- a/drivers/net/can/rcar/rcar_canfd.c +++ b/drivers/net/can/rcar/rcar_canfd.c @@ -1390,7 +1390,7 @@ static netdev_tx_t rcar_canfd_start_xmit(struct sk_buff *skb, } priv->tx_len[priv->tx_head % RCANFD_FIFO_DEPTH] = cf->len; - can_put_echo_skb(skb, ndev, priv->tx_head % RCANFD_FIFO_DEPTH); + can_put_echo_skb(skb, ndev, priv->tx_head % RCANFD_FIFO_DEPTH, 0); spin_lock_irqsave(&priv->tx_lock, flags); priv->tx_head++; diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index b6a7003c51d26..e98482c7bf333 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -318,7 +318,7 @@ static netdev_tx_t sja1000_start_xmit(struct sk_buff *skb, for (i = 0; i < cf->len; i++) priv->write_reg(priv, dreg++, cf->data[i]); - can_put_echo_skb(skb, dev, 0); + can_put_echo_skb(skb, dev, 0, 0); if (priv->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT) cmd_reg_val |= CMD_AT; diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c index 40070c930202b..a5314448c5ae8 100644 --- a/drivers/net/can/softing/softing_main.c +++ b/drivers/net/can/softing/softing_main.c @@ -104,7 +104,7 @@ static netdev_tx_t softing_netdev_start_xmit(struct sk_buff *skb, card->tx.last_bus = priv->index; ++card->tx.pending; ++priv->tx.pending; - can_put_echo_skb(skb, dev, priv->tx.echo_put); + can_put_echo_skb(skb, dev, priv->tx.echo_put, 0); ++priv->tx.echo_put; if (priv->tx.echo_put >= TX_ECHO_SKB_MAX) priv->tx.echo_put = 0; diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index f9455de94786e..8c83a9e5a9e48 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -586,7 +586,7 @@ static void hi3110_tx_work_handler(struct work_struct *ws) frame = (struct can_frame *)priv->tx_skb->data; hi3110_hw_tx(spi, frame); priv->tx_len = 1 + frame->len; - can_put_echo_skb(priv->tx_skb, net, 0); + can_put_echo_skb(priv->tx_skb, net, 0, 0); priv->tx_skb = NULL; } } diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index 25859d16d06f8..40866754aafca 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -1002,7 +1002,7 @@ static void mcp251x_tx_work_handler(struct work_struct *ws) frame->len = CAN_FRAME_MAX_DATA_LEN; mcp251x_hw_tx(spi, frame, 0); priv->tx_len = 1 + frame->len; - can_put_echo_skb(priv->tx_skb, net, 0); + can_put_echo_skb(priv->tx_skb, net, 0, 0); priv->tx_skb = NULL; } } diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 36235afb0bc66..95bba456a4cda 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2436,7 +2436,7 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, if (tx_ring->head - tx_ring->tail >= tx_ring->obj_num) netif_stop_queue(ndev); - can_put_echo_skb(skb, ndev, tx_head); + can_put_echo_skb(skb, ndev, tx_head, 0); err = mcp251xfd_tx_obj_write(priv, tx_obj); if (err) diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c index 783b63218b7b7..b75175d59104f 100644 --- a/drivers/net/can/sun4i_can.c +++ b/drivers/net/can/sun4i_can.c @@ -448,7 +448,7 @@ static netdev_tx_t sun4ican_start_xmit(struct sk_buff *skb, struct net_device *d writel(msg_flag_n, priv->base + SUN4I_REG_BUF0_ADDR); - can_put_echo_skb(skb, dev, 0); + can_put_echo_skb(skb, dev, 0, 0); if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) sun4i_can_write_cmdreg(priv, SUN4I_CMD_SELF_RCV_REQ); diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index a6850ff0b55b6..485c19bc98c29 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -513,7 +513,7 @@ static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev) be32_to_cpu(*(__be32 *)(cf->data + 4))); else *(u32 *)(cf->data + 4) = 0; - can_put_echo_skb(skb, ndev, mbxno); + can_put_echo_skb(skb, ndev, mbxno, 0); spin_lock_irqsave(&priv->mbx_lock, flags); --priv->tx_head; diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 25eee4466364a..5e5330060464e 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -801,7 +801,7 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_anchor_urb(urb, &dev->tx_submitted); - can_put_echo_skb(skb, netdev, context->echo_index); + can_put_echo_skb(skb, netdev, context->echo_index, 0); atomic_inc(&dev->active_tx_urbs); diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 9eed75a4b678b..68d8a85f00c47 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -783,7 +783,7 @@ static netdev_tx_t esd_usb2_start_xmit(struct sk_buff *skb, usb_anchor_urb(urb, &priv->tx_submitted); - can_put_echo_skb(skb, netdev, context->echo_index); + can_put_echo_skb(skb, netdev, context->echo_index, 0); atomic_inc(&priv->active_tx_jobs); diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 0487095e1fd04..5ce9ba5d29d61 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -525,7 +525,7 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_anchor_urb(urb, &dev->tx_submitted); - can_put_echo_skb(skb, netdev, idx); + can_put_echo_skb(skb, netdev, idx, 0); atomic_inc(&dev->active_tx_urbs); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index e2d58846c40ca..2b7efd296758d 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -578,7 +578,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, context->priv = priv; - can_put_echo_skb(skb, netdev, context->echo_index); + can_put_echo_skb(skb, netdev, context->echo_index, 0); usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index df54eb7d4b36b..5347c89992ce7 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -355,7 +355,7 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb, if (cf->can_id & CAN_RTR_FLAG) usb_msg.dlc |= MCBA_DLC_RTR_MASK; - can_put_echo_skb(skb, priv->netdev, ctx->ndx); + can_put_echo_skb(skb, priv->netdev, ctx->ndx, 0); err = mcba_usb_xmit(priv, (struct mcba_usb_msg *)&usb_msg, ctx); if (err) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 251835ea15aa7..95672750419a2 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -365,7 +365,7 @@ static netdev_tx_t peak_usb_ndo_start_xmit(struct sk_buff *skb, usb_anchor_urb(urb, &dev->tx_submitted); - can_put_echo_skb(skb, netdev, context->echo_index); + can_put_echo_skb(skb, netdev, context->echo_index, 0); atomic_inc(&dev->active_tx_urbs); diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c index 7d92da8954fe9..5add27614e2bc 100644 --- a/drivers/net/can/usb/ucan.c +++ b/drivers/net/can/usb/ucan.c @@ -1137,7 +1137,7 @@ static netdev_tx_t ucan_start_xmit(struct sk_buff *skb, /* put the skb on can loopback stack */ spin_lock_irqsave(&up->echo_skb_lock, flags); - can_put_echo_skb(skb, up->netdev, echo_index); + can_put_echo_skb(skb, up->netdev, echo_index, 0); spin_unlock_irqrestore(&up->echo_skb_lock, flags); /* transmit it */ diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 44478304ff469..2e824d9d8167e 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -664,7 +664,7 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_anchor_urb(urb, &priv->tx_submitted); - can_put_echo_skb(skb, netdev, context->echo_index); + can_put_echo_skb(skb, netdev, context->echo_index, 0); atomic_inc(&priv->active_tx_urbs); diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 3f54edee92ebf..8d5132a3f2c9e 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -592,9 +592,9 @@ static void xcan_write_frame(struct net_device *ndev, struct sk_buff *skb, if (!(priv->devtype.flags & XCAN_FLAG_TX_MAILBOXES) && (priv->devtype.flags & XCAN_FLAG_TXFEMP)) - can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max); + can_put_echo_skb(skb, ndev, priv->tx_head % priv->tx_max, 0); else - can_put_echo_skb(skb, ndev, 0); + can_put_echo_skb(skb, ndev, 0, 0); priv->tx_head++; diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index 5db9da30843c2..eaac4a637ae01 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -18,7 +18,7 @@ void can_flush_echo_skb(struct net_device *dev); int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, - unsigned int idx); + unsigned int idx, unsigned int frame_len); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr, unsigned int *frame_len_ptr); unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); -- GitLab From 9420e1d495e2a3b5f673148b7e3ebc861b1441f7 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:28 +0100 Subject: [PATCH 0719/2012] can: dev: can_get_echo_skb(): extend to return can frame length In order to implement byte queue limits (bql) in CAN drivers, the length of the CAN frame needs to be passed into the networking stack after queueing and after transmission completion. To avoid to calculate this length twice, extend can_get_echo_skb() to return that value. Convert all users of this function, too. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-14-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/at91_can.c | 2 +- drivers/net/can/c_can/c_can.c | 2 +- drivers/net/can/cc770/cc770.c | 2 +- drivers/net/can/dev/skb.c | 5 +++-- drivers/net/can/grcan.c | 2 +- drivers/net/can/ifi_canfd/ifi_canfd.c | 2 +- drivers/net/can/kvaser_pciefd.c | 4 ++-- drivers/net/can/m_can/m_can.c | 4 ++-- drivers/net/can/mscan/mscan.c | 2 +- drivers/net/can/pch_can.c | 2 +- drivers/net/can/peak_canfd/peak_canfd.c | 2 +- drivers/net/can/rcar/rcar_can.c | 2 +- drivers/net/can/rcar/rcar_canfd.c | 2 +- drivers/net/can/sja1000/sja1000.c | 2 +- drivers/net/can/softing/softing_main.c | 2 +- drivers/net/can/spi/hi311x.c | 2 +- drivers/net/can/spi/mcp251x.c | 2 +- drivers/net/can/sun4i_can.c | 2 +- drivers/net/can/usb/ems_usb.c | 2 +- drivers/net/can/usb/esd_usb2.c | 2 +- drivers/net/can/usb/gs_usb.c | 2 +- drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 2 +- drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c | 2 +- drivers/net/can/usb/mcba_usb.c | 2 +- drivers/net/can/usb/peak_usb/pcan_usb_core.c | 2 +- drivers/net/can/usb/ucan.c | 2 +- drivers/net/can/usb/usb_8dev.c | 2 +- drivers/net/can/xilinx_can.c | 2 +- include/linux/can/skb.h | 3 ++- 29 files changed, 34 insertions(+), 32 deletions(-) diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index 90b223a80ed45..9ad9b39f480e7 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -856,7 +856,7 @@ static void at91_irq_tx(struct net_device *dev, u32 reg_sr) if (likely(reg_msr & AT91_MSR_MRDY && ~reg_msr & AT91_MSR_MABT)) { /* _NOTE_: subtract AT91_MB_TX_FIRST offset from mb! */ - can_get_echo_skb(dev, mb - get_mb_tx_first(priv)); + can_get_echo_skb(dev, mb - get_mb_tx_first(priv), NULL); dev->stats.tx_packets++; can_led_event(dev, CAN_LED_EVENT_TX); } diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 13638954a25c5..ef474bae47a14 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -733,7 +733,7 @@ static void c_can_do_tx(struct net_device *dev) pend &= ~(1 << idx); obj = idx + C_CAN_MSG_OBJ_TX_FIRST; c_can_inval_tx_object(dev, IF_RX, obj); - can_get_echo_skb(dev, idx); + can_get_echo_skb(dev, idx, NULL); bytes += priv->dlc[idx]; pkts++; } diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c index e53ca338368a8..f8a130f594e2e 100644 --- a/drivers/net/can/cc770/cc770.c +++ b/drivers/net/can/cc770/cc770.c @@ -703,7 +703,7 @@ static void cc770_tx_interrupt(struct net_device *dev, unsigned int o) stats->tx_packets++; can_put_echo_skb(priv->tx_skb, dev, 0, 0); - can_get_echo_skb(dev, 0); + can_get_echo_skb(dev, 0, NULL); priv->tx_skb = NULL; netif_wake_queue(dev); diff --git a/drivers/net/can/dev/skb.c b/drivers/net/can/dev/skb.c index c184b4dce19eb..53683d4312f18 100644 --- a/drivers/net/can/dev/skb.c +++ b/drivers/net/can/dev/skb.c @@ -121,12 +121,13 @@ __can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr, * is handled in the device driver. The driver must protect * access to priv->echo_skb, if necessary. */ -unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) +unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx, + unsigned int *frame_len_ptr) { struct sk_buff *skb; u8 len; - skb = __can_get_echo_skb(dev, idx, &len, NULL); + skb = __can_get_echo_skb(dev, idx, &len, frame_len_ptr); if (!skb) return 0; diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index 8086cdc100005..4a84532905309 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -517,7 +517,7 @@ static int catch_up_echo_skb(struct net_device *dev, int budget, bool echo) stats->tx_packets++; stats->tx_bytes += priv->txdlc[i]; priv->txdlc[i] = 0; - can_get_echo_skb(dev, i); + can_get_echo_skb(dev, i, NULL); } else { /* For cleanup of untransmitted messages */ can_free_echo_skb(dev, i); diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index 56ac9e1dace7a..5bb957a26bc69 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -629,7 +629,7 @@ static irqreturn_t ifi_canfd_isr(int irq, void *dev_id) /* TX IRQ */ if (isr & IFI_CANFD_INTERRUPT_TXFIFO_REMOVE) { - stats->tx_bytes += can_get_echo_skb(ndev, 0); + stats->tx_bytes += can_get_echo_skb(ndev, 0, NULL); stats->tx_packets++; can_led_event(ndev, CAN_LED_EVENT_TX); } diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 0cf82f0646a34..37e05010ca914 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -1467,7 +1467,7 @@ static int kvaser_pciefd_handle_eack_packet(struct kvaser_pciefd *pcie, can->reg_base + KVASER_PCIEFD_KCAN_CTRL_REG); } else { int echo_idx = p->header[0] & KVASER_PCIEFD_PACKET_SEQ_MSK; - int dlc = can_get_echo_skb(can->can.dev, echo_idx); + int dlc = can_get_echo_skb(can->can.dev, echo_idx, NULL); struct net_device_stats *stats = &can->can.dev->stats; stats->tx_bytes += dlc; @@ -1533,7 +1533,7 @@ static int kvaser_pciefd_handle_ack_packet(struct kvaser_pciefd *pcie, netdev_dbg(can->can.dev, "Packet was flushed\n"); } else { int echo_idx = p->header[0] & KVASER_PCIEFD_PACKET_SEQ_MSK; - int dlc = can_get_echo_skb(can->can.dev, echo_idx); + int dlc = can_get_echo_skb(can->can.dev, echo_idx, NULL); u8 count = ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NPACKETS_REG) & 0xff; diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index fff7432103cb2..3752520a7d4b7 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -930,7 +930,7 @@ static void m_can_echo_tx_event(struct net_device *dev) (fgi << TXEFA_EFAI_SHIFT))); /* update stats */ - stats->tx_bytes += can_get_echo_skb(dev, msg_mark); + stats->tx_bytes += can_get_echo_skb(dev, msg_mark, NULL); stats->tx_packets++; } } @@ -972,7 +972,7 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) if (cdev->version == 30) { if (ir & IR_TC) { /* Transmission Complete Interrupt*/ - stats->tx_bytes += can_get_echo_skb(dev, 0); + stats->tx_bytes += can_get_echo_skb(dev, 0, NULL); stats->tx_packets++; can_led_event(dev, CAN_LED_EVENT_TX); netif_wake_queue(dev); diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c index a28fdaa411c65..fa32e418eb296 100644 --- a/drivers/net/can/mscan/mscan.c +++ b/drivers/net/can/mscan/mscan.c @@ -448,7 +448,7 @@ static irqreturn_t mscan_isr(int irq, void *dev_id) out_8(®s->cantbsel, mask); stats->tx_bytes += in_8(®s->tx.dlr); stats->tx_packets++; - can_get_echo_skb(dev, entry->id); + can_get_echo_skb(dev, entry->id, NULL); priv->tx_active &= ~mask; list_del(pos); } diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c index a4c35b48d8e97..92a54a5fd4c50 100644 --- a/drivers/net/can/pch_can.c +++ b/drivers/net/can/pch_can.c @@ -711,7 +711,7 @@ static void pch_can_tx_complete(struct net_device *ndev, u32 int_stat) struct net_device_stats *stats = &(priv->ndev->stats); u32 dlc; - can_get_echo_skb(ndev, int_stat - PCH_RX_OBJ_END - 1); + can_get_echo_skb(ndev, int_stat - PCH_RX_OBJ_END - 1, NULL); iowrite32(PCH_CMASK_RX_TX_GET | PCH_CMASK_CLRINTPND, &priv->regs->ifregs[1].cmask); pch_can_rw_msg_obj(&priv->regs->ifregs[1].creq, int_stat); diff --git a/drivers/net/can/peak_canfd/peak_canfd.c b/drivers/net/can/peak_canfd/peak_canfd.c index 179a8e10fbb8d..00847cbaf7b62 100644 --- a/drivers/net/can/peak_canfd/peak_canfd.c +++ b/drivers/net/can/peak_canfd/peak_canfd.c @@ -266,7 +266,7 @@ static int pucan_handle_can_rx(struct peak_canfd_priv *priv, unsigned long flags; spin_lock_irqsave(&priv->echo_lock, flags); - can_get_echo_skb(priv->ndev, msg->client); + can_get_echo_skb(priv->ndev, msg->client, NULL); /* count bytes of the echo instead of skb */ stats->tx_bytes += cf_len; diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c index 0b7e488bc4fe2..4870c4ea190a9 100644 --- a/drivers/net/can/rcar/rcar_can.c +++ b/drivers/net/can/rcar/rcar_can.c @@ -386,7 +386,7 @@ static void rcar_can_tx_done(struct net_device *ndev) stats->tx_bytes += priv->tx_dlc[priv->tx_tail % RCAR_CAN_FIFO_DEPTH]; priv->tx_dlc[priv->tx_tail % RCAR_CAN_FIFO_DEPTH] = 0; - can_get_echo_skb(ndev, priv->tx_tail % RCAR_CAN_FIFO_DEPTH); + can_get_echo_skb(ndev, priv->tx_tail % RCAR_CAN_FIFO_DEPTH, NULL); priv->tx_tail++; netif_wake_queue(ndev); } diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c index 38376f29bc561..d8d233e629904 100644 --- a/drivers/net/can/rcar/rcar_canfd.c +++ b/drivers/net/can/rcar/rcar_canfd.c @@ -1044,7 +1044,7 @@ static void rcar_canfd_tx_done(struct net_device *ndev) stats->tx_packets++; stats->tx_bytes += priv->tx_len[sent]; priv->tx_len[sent] = 0; - can_get_echo_skb(ndev, sent); + can_get_echo_skb(ndev, sent, NULL); spin_lock_irqsave(&priv->tx_lock, flags); priv->tx_tail++; diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index e98482c7bf333..9e86488ba55f1 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -531,7 +531,7 @@ irqreturn_t sja1000_interrupt(int irq, void *dev_id) stats->tx_bytes += priv->read_reg(priv, SJA1000_FI) & 0xf; stats->tx_packets++; - can_get_echo_skb(dev, 0); + can_get_echo_skb(dev, 0, NULL); } netif_wake_queue(dev); can_led_event(dev, CAN_LED_EVENT_TX); diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c index a5314448c5ae8..c44f3411e5612 100644 --- a/drivers/net/can/softing/softing_main.c +++ b/drivers/net/can/softing/softing_main.c @@ -284,7 +284,7 @@ static int softing_handle_1(struct softing *card) skb = priv->can.echo_skb[priv->tx.echo_get]; if (skb) skb->tstamp = ktime; - can_get_echo_skb(netdev, priv->tx.echo_get); + can_get_echo_skb(netdev, priv->tx.echo_get, NULL); ++priv->tx.echo_get; if (priv->tx.echo_get >= TX_ECHO_SKB_MAX) priv->tx.echo_get = 0; diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index 8c83a9e5a9e48..c3e020c901116 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -725,7 +725,7 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id) net->stats.tx_bytes += priv->tx_len - 1; can_led_event(net, CAN_LED_EVENT_TX); if (priv->tx_len) { - can_get_echo_skb(net, 0); + can_get_echo_skb(net, 0, NULL); priv->tx_len = 0; } netif_wake_queue(net); diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index 40866754aafca..f69fb4238a654 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -1171,7 +1171,7 @@ static irqreturn_t mcp251x_can_ist(int irq, void *dev_id) net->stats.tx_bytes += priv->tx_len - 1; can_led_event(net, CAN_LED_EVENT_TX); if (priv->tx_len) { - can_get_echo_skb(net, 0); + can_get_echo_skb(net, 0, NULL); priv->tx_len = 0; } netif_wake_queue(net); diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c index b75175d59104f..54aa7c25c4de1 100644 --- a/drivers/net/can/sun4i_can.c +++ b/drivers/net/can/sun4i_can.c @@ -655,7 +655,7 @@ static irqreturn_t sun4i_can_interrupt(int irq, void *dev_id) readl(priv->base + SUN4I_REG_RBUF_RBACK_START_ADDR) & 0xf; stats->tx_packets++; - can_get_echo_skb(dev, 0); + can_get_echo_skb(dev, 0, NULL); netif_wake_queue(dev); can_led_event(dev, CAN_LED_EVENT_TX); } diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 5e5330060464e..18f40eb203605 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -518,7 +518,7 @@ static void ems_usb_write_bulk_callback(struct urb *urb) netdev->stats.tx_packets++; netdev->stats.tx_bytes += context->dlc; - can_get_echo_skb(netdev, context->echo_index); + can_get_echo_skb(netdev, context->echo_index, NULL); /* Release context */ context->echo_index = MAX_TX_URBS; diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 68d8a85f00c47..562acbf454fd6 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -357,7 +357,7 @@ static void esd_usb2_tx_done_msg(struct esd_usb2_net_priv *priv, if (!msg->msg.txdone.status) { stats->tx_packets++; stats->tx_bytes += context->len; - can_get_echo_skb(netdev, context->echo_index); + can_get_echo_skb(netdev, context->echo_index, NULL); } else { stats->tx_errors++; can_free_echo_skb(netdev, context->echo_index); diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 5ce9ba5d29d61..a00dc19044151 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -370,7 +370,7 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) goto resubmit_urb; } - can_get_echo_skb(netdev, hf->echo_id); + can_get_echo_skb(netdev, hf->echo_id, NULL); gs_free_tx_context(txc); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c index 480bd2ecb2969..dcee8dc828ecc 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c @@ -1151,7 +1151,7 @@ static void kvaser_usb_hydra_tx_acknowledge(const struct kvaser_usb *dev, spin_lock_irqsave(&priv->tx_contexts_lock, irq_flags); - can_get_echo_skb(priv->netdev, context->echo_index); + can_get_echo_skb(priv->netdev, context->echo_index, NULL); context->echo_index = dev->max_tx_urbs; --priv->active_tx_contexts; netif_wake_queue(priv->netdev); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index 98c016ef0607d..59ba7c7beec00 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -594,7 +594,7 @@ static void kvaser_usb_leaf_tx_acknowledge(const struct kvaser_usb *dev, spin_lock_irqsave(&priv->tx_contexts_lock, flags); - can_get_echo_skb(priv->netdev, context->echo_index); + can_get_echo_skb(priv->netdev, context->echo_index, NULL); context->echo_index = dev->max_tx_urbs; --priv->active_tx_contexts; netif_wake_queue(priv->netdev); diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 5347c89992ce7..4232a7126c1bd 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -237,7 +237,7 @@ static void mcba_usb_write_bulk_callback(struct urb *urb) netdev->stats.tx_bytes += ctx->dlc; can_led_event(netdev, CAN_LED_EVENT_TX); - can_get_echo_skb(netdev, ctx->ndx); + can_get_echo_skb(netdev, ctx->ndx, NULL); } if (urb->status) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 95672750419a2..573b11559d733 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -309,7 +309,7 @@ static void peak_usb_write_bulk_callback(struct urb *urb) } /* should always release echo skb and corresponding context */ - can_get_echo_skb(netdev, context->echo_index); + can_get_echo_skb(netdev, context->echo_index, NULL); context->echo_index = PCAN_USB_MAX_TX_URBS; /* do wakeup tx queue in case of success only */ diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c index 5add27614e2bc..fa403c080871e 100644 --- a/drivers/net/can/usb/ucan.c +++ b/drivers/net/can/usb/ucan.c @@ -672,7 +672,7 @@ static void ucan_tx_complete_msg(struct ucan_priv *up, /* update statistics */ up->netdev->stats.tx_packets++; up->netdev->stats.tx_bytes += dlc; - can_get_echo_skb(up->netdev, echo_index); + can_get_echo_skb(up->netdev, echo_index, NULL); } else { up->netdev->stats.tx_dropped++; can_free_echo_skb(up->netdev, echo_index); diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 2e824d9d8167e..e8c42430a4fcd 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -585,7 +585,7 @@ static void usb_8dev_write_bulk_callback(struct urb *urb) netdev->stats.tx_packets++; netdev->stats.tx_bytes += context->dlc; - can_get_echo_skb(netdev, context->echo_index); + can_get_echo_skb(netdev, context->echo_index, NULL); can_led_event(netdev, CAN_LED_EVENT_TX); diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 8d5132a3f2c9e..37fa19c62d733 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -1292,7 +1292,7 @@ static void xcan_tx_interrupt(struct net_device *ndev, u32 isr) while (frames_sent--) { stats->tx_bytes += can_get_echo_skb(ndev, priv->tx_tail % - priv->tx_max); + priv->tx_max, NULL); priv->tx_tail++; stats->tx_packets++; } diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index eaac4a637ae01..685f34cfba207 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -21,7 +21,8 @@ int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx, unsigned int frame_len); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr, unsigned int *frame_len_ptr); -unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); +unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx, + unsigned int *frame_len_ptr); void can_free_echo_skb(struct net_device *dev, unsigned int idx); struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf); struct sk_buff *alloc_canfd_skb(struct net_device *dev, -- GitLab From 99842c9685ab00fa8689e8bd12bde62b706b6198 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 11 Jan 2021 15:19:29 +0100 Subject: [PATCH 0720/2012] can: dev: can_rx_offload_get_echo_skb(): extend to return can frame length In order to implement byte queue limits (bql) in CAN drivers, the length of the CAN frame needs to be passed into the networking stack after queueing and after transmission completion. To avoid to calculate this length twice, extend can_rx_offload_get_echo_skb() to return that value. Convert all users of this function, too. Reviewed-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210111141930.693847-15-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/rx-offload.c | 5 +++-- drivers/net/can/flexcan.c | 5 +++-- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +- drivers/net/can/ti_hecc.c | 2 +- include/linux/can/rx-offload.h | 3 ++- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/can/dev/rx-offload.c b/drivers/net/can/dev/rx-offload.c index 6a26b5282df16..ab2c1543786cf 100644 --- a/drivers/net/can/dev/rx-offload.c +++ b/drivers/net/can/dev/rx-offload.c @@ -263,7 +263,8 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload, EXPORT_SYMBOL_GPL(can_rx_offload_queue_sorted); unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, - unsigned int idx, u32 timestamp) + unsigned int idx, u32 timestamp, + unsigned int *frame_len_ptr) { struct net_device *dev = offload->dev; struct net_device_stats *stats = &dev->stats; @@ -271,7 +272,7 @@ unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, u8 len; int err; - skb = __can_get_echo_skb(dev, idx, &len, NULL); + skb = __can_get_echo_skb(dev, idx, &len, frame_len_ptr); if (!skb) return 0; diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 202d08f8e1a40..5d9157c655e9a 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1122,8 +1122,9 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) u32 reg_ctrl = priv->read(&priv->tx_mb->can_ctrl); handled = IRQ_HANDLED; - stats->tx_bytes += can_rx_offload_get_echo_skb(&priv->offload, - 0, reg_ctrl << 16); + stats->tx_bytes += + can_rx_offload_get_echo_skb(&priv->offload, 0, + reg_ctrl << 16, NULL); stats->tx_packets++; can_led_event(dev, CAN_LED_EVENT_TX); diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 95bba456a4cda..63bbe0930e535 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1271,7 +1271,7 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv, stats->tx_bytes += can_rx_offload_get_echo_skb(&priv->offload, mcp251xfd_get_tef_tail(priv), - hw_tef_obj->ts); + hw_tef_obj->ts, NULL); stats->tx_packets++; priv->tef->tail++; diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c index 485c19bc98c29..73245d8836a93 100644 --- a/drivers/net/can/ti_hecc.c +++ b/drivers/net/can/ti_hecc.c @@ -757,7 +757,7 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id) stamp = hecc_read_stamp(priv, mbxno); stats->tx_bytes += can_rx_offload_get_echo_skb(&priv->offload, - mbxno, stamp); + mbxno, stamp, NULL); stats->tx_packets++; can_led_event(ndev, CAN_LED_EVENT_TX); --priv->tx_tail; diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index f1b38088b7659..40882df7105e8 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -44,7 +44,8 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload); int can_rx_offload_queue_sorted(struct can_rx_offload *offload, struct sk_buff *skb, u32 timestamp); unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, - unsigned int idx, u32 timestamp); + unsigned int idx, u32 timestamp, + unsigned int *frame_len_ptr); int can_rx_offload_queue_tail(struct can_rx_offload *offload, struct sk_buff *skb); void can_rx_offload_del(struct can_rx_offload *offload); -- GitLab From 741b91f1b0ea34f00f6a7d4539b767c409291fcf Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Tue, 12 Jan 2021 18:54:37 +0900 Subject: [PATCH 0721/2012] can: dev: can_put_echo_skb(): add software tx timestamps Call skb_tx_timestamp() within can_put_echo_skb() so that a software tx timestamp gets attached to the skb. There two main reasons to include this call in can_put_echo_skb(): * It easily allow to enable the tx timestamp on all devices with just one small change. * According to Documentation/networking/timestamping.rst, the tx timestamps should be generated in the device driver as close as possible, but always prior to passing the packet to the network interface. During the call to can_put_echo_skb(), the skb gets cloned meaning that the driver should not dereference the skb variable anymore after can_put_echo_skb() returns. This makes can_put_echo_skb() the very last place we can use the skb without having to access the echo_skb[] array. Remark: by default, skb_tx_timestamp() does nothing. It needs to be activated by passing the SOF_TIMESTAMPING_TX_SOFTWARE flag either through socket options or control messages. References: * Support for the error queue in CAN RAW sockets (which is needed for tx timestamps) was introduced in: https://git.kernel.org//torvalds/c/eb88531bdbfaafb827192d1fc6c5a3fcc4fadd96 * Put the call to skb_tx_timestamp() just before adding it to the array: https://lore.kernel.org/r/043c3ea1-6bdd-59c0-0269-27b2b5b36cec@victronenergy.com * About Tx hardware timestamps https://lore.kernel.org/r/20210111171152.GB11715@hoboy.vegasvil.org Signed-off-by: Vincent Mailhol Link: https://lore.kernel.org/r/20210112095437.6488-2-mailhol.vincent@wanadoo.fr Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/skb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/dev/skb.c b/drivers/net/can/dev/skb.c index 53683d4312f18..6a64fe410987e 100644 --- a/drivers/net/can/dev/skb.c +++ b/drivers/net/can/dev/skb.c @@ -65,6 +65,8 @@ int can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, /* save frame_len to reuse it when transmission is completed */ can_skb_prv(skb)->frame_len = frame_len; + skb_tx_timestamp(skb); + /* save this skb for tx interrupt echo handling */ priv->echo_skb[idx] = skb; } else { -- GitLab From 1105592cb8fdfcc96f2c9c693ff4106bac5fac7c Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 13 Jan 2021 21:00:53 +0100 Subject: [PATCH 0722/2012] can: tcan4x5x: remove __packed attribute from struct tcan4x5x_map_buf The first member of struct tcan4x5x_map_buf is the struct tcan4x5x_buf_cmd, which has a size of 4 bytes. It's followed by an array of u8. The compiler places the array directly after the struct tcan4x5x_buf_cmd. This patch removes the not needed attribute __packed from the struct tcan4x5x_map_buf. Suggested-by: Jakub Kicinski Link: https://lore.kernel.org/r/20210113203955.912916-1-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/tcan4x5x.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/tcan4x5x.h b/drivers/net/can/m_can/tcan4x5x.h index 7bf264f8e81f4..c66da829b795a 100644 --- a/drivers/net/can/m_can/tcan4x5x.h +++ b/drivers/net/can/m_can/tcan4x5x.h @@ -25,7 +25,7 @@ struct __packed tcan4x5x_buf_cmd { u8 len; }; -struct __packed tcan4x5x_map_buf { +struct tcan4x5x_map_buf { struct tcan4x5x_buf_cmd cmd; u8 data[256 * sizeof(u32)]; } ____cacheline_aligned; -- GitLab From 6e6aa61d81194c01283880950df563b1b9abec46 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 13 Jan 2021 14:45:10 -0500 Subject: [PATCH 0723/2012] USB: gadget: dummy-hcd: Fix errors in port-reset handling Commit c318840fb2a4 ("USB: Gadget: dummy-hcd: Fix shift-out-of-bounds bug") messed up the way dummy-hcd handles requests to turn on the RESET port feature (I didn't notice that the original switch case ended with a fallthrough). The call to set_link_state() was inadvertently removed, as was the code to set the USB_PORT_STAT_RESET flag when the speed is USB2. In addition, the original code never checked whether the port was connected before handling the port-reset request. There was a check for the port being powered, but it was removed by that commit! In practice this doesn't matter much because the kernel doesn't try to reset disconnected ports, but it's still bad form. This patch fixes these problems by changing the fallthrough to break, adding back in the missing set_link_state() call, setting the port-reset status flag, adding a port-is-connected test, and removing a redundant assignment statement. Fixes: c318840fb2a4 ("USB: Gadget: dummy-hcd: Fix shift-out-of-bounds bug") CC: Acked-by: Felipe Balbi Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/20210113194510.GA1290698@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/dummy_hcd.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 1a953f44183aa..57067763b1005 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -2270,17 +2270,20 @@ static int dummy_hub_control( } fallthrough; case USB_PORT_FEAT_RESET: + if (!(dum_hcd->port_status & USB_PORT_STAT_CONNECTION)) + break; /* if it's already enabled, disable */ if (hcd->speed == HCD_USB3) { - dum_hcd->port_status = 0; dum_hcd->port_status = (USB_SS_PORT_STAT_POWER | USB_PORT_STAT_CONNECTION | USB_PORT_STAT_RESET); - } else + } else { dum_hcd->port_status &= ~(USB_PORT_STAT_ENABLE | USB_PORT_STAT_LOW_SPEED | USB_PORT_STAT_HIGH_SPEED); + dum_hcd->port_status |= USB_PORT_STAT_RESET; + } /* * We want to reset device status. All but the * Self powered feature @@ -2292,7 +2295,8 @@ static int dummy_hub_control( * interval? Is it still 50msec as for HS? */ dum_hcd->re_timeout = jiffies + msecs_to_jiffies(50); - fallthrough; + set_link_state(dum_hcd); + break; case USB_PORT_FEAT_C_CONNECTION: case USB_PORT_FEAT_C_RESET: case USB_PORT_FEAT_C_ENABLE: -- GitLab From 11663111cd49b4c6dd27479774e420f139e4c447 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Jan 2021 17:22:27 +0000 Subject: [PATCH 0724/2012] KVM: arm64: Hide PMU registers from userspace when not available It appears that while we are now able to properly hide PMU registers from the guest when a PMU isn't available (either because none has been configured, the host doesn't have the PMU support compiled in, or that the HW doesn't have one at all), we are still exposing more than we should to userspace. Introduce a visibility callback gating all the PMU registers, which covers both usrespace and guest. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 68 +++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 42ccc27fb684c..45f4ae71c8dc6 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -590,6 +590,15 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) vcpu_write_sys_reg(vcpu, (1ULL << 31) | mpidr, MPIDR_EL1); } +static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + if (kvm_vcpu_has_pmu(vcpu)) + return 0; + + return REG_HIDDEN; +} + static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 pmcr, val; @@ -936,15 +945,18 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { SYS_DESC(SYS_DBGWCRn_EL1(n)), \ trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr } +#define PMU_SYS_REG(r) \ + SYS_DESC(r), .reset = reset_unknown, .visibility = pmu_visibility + /* Macro to expand the PMEVCNTRn_EL0 register */ #define PMU_PMEVCNTR_EL0(n) \ - { SYS_DESC(SYS_PMEVCNTRn_EL0(n)), \ - access_pmu_evcntr, reset_unknown, (PMEVCNTR0_EL0 + n), } + { PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)), \ + .access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), } /* Macro to expand the PMEVTYPERn_EL0 register */ #define PMU_PMEVTYPER_EL0(n) \ - { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ - access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } + { PMU_SYS_REG(SYS_PMEVTYPERn_EL0(n)), \ + .access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), } static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -1486,8 +1498,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 }, { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, - { SYS_DESC(SYS_PMINTENSET_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 }, - { SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 }, + { PMU_SYS_REG(SYS_PMINTENSET_EL1), + .access = access_pminten, .reg = PMINTENSET_EL1 }, + { PMU_SYS_REG(SYS_PMINTENCLR_EL1), + .access = access_pminten, .reg = PMINTENSET_EL1 }, { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, @@ -1526,23 +1540,36 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, { SYS_DESC(SYS_CTR_EL0), access_ctr }, - { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 }, - { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, - { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, - { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 }, - { SYS_DESC(SYS_PMSWINC_EL0), access_pmswinc, reset_unknown, PMSWINC_EL0 }, - { SYS_DESC(SYS_PMSELR_EL0), access_pmselr, reset_unknown, PMSELR_EL0 }, - { SYS_DESC(SYS_PMCEID0_EL0), access_pmceid }, - { SYS_DESC(SYS_PMCEID1_EL0), access_pmceid }, - { SYS_DESC(SYS_PMCCNTR_EL0), access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 }, - { SYS_DESC(SYS_PMXEVTYPER_EL0), access_pmu_evtyper }, - { SYS_DESC(SYS_PMXEVCNTR_EL0), access_pmu_evcntr }, + { PMU_SYS_REG(SYS_PMCR_EL0), .access = access_pmcr, + .reset = reset_pmcr, .reg = PMCR_EL0 }, + { PMU_SYS_REG(SYS_PMCNTENSET_EL0), + .access = access_pmcnten, .reg = PMCNTENSET_EL0 }, + { PMU_SYS_REG(SYS_PMCNTENCLR_EL0), + .access = access_pmcnten, .reg = PMCNTENSET_EL0 }, + { PMU_SYS_REG(SYS_PMOVSCLR_EL0), + .access = access_pmovs, .reg = PMOVSSET_EL0 }, + { PMU_SYS_REG(SYS_PMSWINC_EL0), + .access = access_pmswinc, .reg = PMSWINC_EL0 }, + { PMU_SYS_REG(SYS_PMSELR_EL0), + .access = access_pmselr, .reg = PMSELR_EL0 }, + { PMU_SYS_REG(SYS_PMCEID0_EL0), + .access = access_pmceid, .reset = NULL }, + { PMU_SYS_REG(SYS_PMCEID1_EL0), + .access = access_pmceid, .reset = NULL }, + { PMU_SYS_REG(SYS_PMCCNTR_EL0), + .access = access_pmu_evcntr, .reg = PMCCNTR_EL0 }, + { PMU_SYS_REG(SYS_PMXEVTYPER_EL0), + .access = access_pmu_evtyper, .reset = NULL }, + { PMU_SYS_REG(SYS_PMXEVCNTR_EL0), + .access = access_pmu_evcntr, .reset = NULL }, /* * PMUSERENR_EL0 resets as unknown in 64bit mode while it resets as zero * in 32bit mode. Here we choose to reset it as zero for consistency. */ - { SYS_DESC(SYS_PMUSERENR_EL0), access_pmuserenr, reset_val, PMUSERENR_EL0, 0 }, - { SYS_DESC(SYS_PMOVSSET_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 }, + { PMU_SYS_REG(SYS_PMUSERENR_EL0), .access = access_pmuserenr, + .reset = reset_val, .reg = PMUSERENR_EL0, .val = 0 }, + { PMU_SYS_REG(SYS_PMOVSSET_EL0), + .access = access_pmovs, .reg = PMOVSSET_EL0 }, { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, @@ -1694,7 +1721,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { * PMCCFILTR_EL0 resets as unknown in 64bit mode while it resets as zero * in 32bit mode. Here we choose to reset it as zero for consistency. */ - { SYS_DESC(SYS_PMCCFILTR_EL0), access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 }, + { PMU_SYS_REG(SYS_PMCCFILTR_EL0), .access = access_pmu_evtyper, + .reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 }, { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 }, -- GitLab From 7ded92e25cac9758a755b8f524b11b509c49afe1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Jan 2021 17:22:28 +0000 Subject: [PATCH 0725/2012] KVM: arm64: Simplify handling of absent PMU system registers Now that all PMU registers are gated behind a .visibility callback, remove the other checks against an absent PMU. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 45f4ae71c8dc6..93f0a4a0789ab 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -622,9 +622,8 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) { u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0); - bool enabled = kvm_vcpu_has_pmu(vcpu); + bool enabled = (reg & flags) || vcpu_mode_priv(vcpu); - enabled &= (reg & flags) || vcpu_mode_priv(vcpu); if (!enabled) kvm_inject_undefined(vcpu); @@ -909,11 +908,6 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - if (!kvm_vcpu_has_pmu(vcpu)) { - kvm_inject_undefined(vcpu); - return false; - } - if (p->is_write) { if (!vcpu_mode_priv(vcpu)) { kvm_inject_undefined(vcpu); -- GitLab From 2c91ef39216149df6703c3fa6a47dd9a1e6091c1 Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Tue, 29 Dec 2020 16:00:59 +0000 Subject: [PATCH 0726/2012] KVM: arm64: Allow PSCI SYSTEM_OFF/RESET to return The KVM/arm64 PSCI relay assumes that SYSTEM_OFF and SYSTEM_RESET should not return, as dictated by the PSCI spec. However, there is firmware out there which breaks this assumption, leading to a hyp panic. Make KVM more robust to broken firmware by allowing these to return. Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20201229160059.64135-1-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/psci-relay.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index e3947846ffcb9..8e7128cb76678 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -77,12 +77,6 @@ static unsigned long psci_forward(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 2), cpu_reg(host_ctxt, 3)); } -static __noreturn unsigned long psci_forward_noreturn(struct kvm_cpu_context *host_ctxt) -{ - psci_forward(host_ctxt); - hyp_panic(); /* unreachable */ -} - static unsigned int find_cpu_id(u64 mpidr) { unsigned int i; @@ -251,10 +245,13 @@ static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ case PSCI_0_2_FN_MIGRATE_INFO_TYPE: case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: return psci_forward(host_ctxt); + /* + * SYSTEM_OFF/RESET should not return according to the spec. + * Allow it so as to stay robust to broken firmware. + */ case PSCI_0_2_FN_SYSTEM_OFF: case PSCI_0_2_FN_SYSTEM_RESET: - psci_forward_noreturn(host_ctxt); - unreachable(); + return psci_forward(host_ctxt); case PSCI_0_2_FN64_CPU_SUSPEND: return psci_cpu_suspend(func_id, host_ctxt); case PSCI_0_2_FN64_CPU_ON: -- GitLab From 7ba8b4380afbdbb29d53c50bee6563cd7457fc34 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Wed, 6 Jan 2021 14:42:18 +0000 Subject: [PATCH 0727/2012] KVM: arm64: Use the reg_to_encoding() macro instead of sys_reg() The reg_to_encoding() macro is a wrapper over sys_reg() and conveniently takes a sys_reg_desc or a sys_reg_params argument and returns the 32 bit register encoding. Use it instead of calling sys_reg() directly. Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210106144218.110665-1-alexandru.elisei@arm.com --- arch/arm64/kvm/sys_regs.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 93f0a4a0789ab..7c4f79532406b 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -43,6 +43,10 @@ * 64bit interface. */ +#define reg_to_encoding(x) \ + sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \ + (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2) + static bool read_from_write_only(struct kvm_vcpu *vcpu, struct sys_reg_params *params, const struct sys_reg_desc *r) @@ -273,8 +277,7 @@ static bool trap_loregion(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 val = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); - u32 sr = sys_reg((u32)r->Op0, (u32)r->Op1, - (u32)r->CRn, (u32)r->CRm, (u32)r->Op2); + u32 sr = reg_to_encoding(r); if (!(val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))) { kvm_inject_undefined(vcpu); @@ -924,10 +927,6 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } -#define reg_to_encoding(x) \ - sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \ - (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2) - /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ @@ -1026,8 +1025,7 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r, bool raz) { - u32 id = sys_reg((u32)r->Op0, (u32)r->Op1, - (u32)r->CRn, (u32)r->CRm, (u32)r->Op2); + u32 id = reg_to_encoding(r); u64 val = raz ? 0 : read_sanitised_ftr_reg(id); if (id == SYS_ID_AA64PFR0_EL1) { @@ -1068,8 +1066,7 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, static unsigned int id_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - u32 id = sys_reg((u32)r->Op0, (u32)r->Op1, - (u32)r->CRn, (u32)r->CRm, (u32)r->Op2); + u32 id = reg_to_encoding(r); switch (id) { case SYS_ID_AA64ZFR0_EL1: -- GitLab From 1eb8f690bcb565a6600f8b6dcc78f7b239ceba17 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 14 Jan 2021 10:36:59 +0100 Subject: [PATCH 0728/2012] x86/topology: Make __max_die_per_package available unconditionally Move it outside of CONFIG_SMP in order to avoid ifdeffery at the usage sites. Fixes: 76e2fc63ca40 ("x86/cpu/amd: Set __max_die_per_package on AMD") Reported-by: Stephen Rothwell Reported-by: kernel test robot Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210114111814.5346-1-bp@alien8.de --- arch/x86/include/asm/topology.h | 4 ++-- arch/x86/kernel/cpu/topology.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 488a8e8487548..9239399e54914 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -110,6 +110,8 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); #define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) +extern unsigned int __max_die_per_package; + #ifdef CONFIG_SMP #define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu)) #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) @@ -118,8 +120,6 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); extern unsigned int __max_logical_packages; #define topology_max_packages() (__max_logical_packages) -extern unsigned int __max_die_per_package; - static inline int topology_max_die_per_package(void) { return __max_die_per_package; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 1068002c85323..8678864ce7123 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -25,10 +25,10 @@ #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) #define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) -#ifdef CONFIG_SMP unsigned int __max_die_per_package __read_mostly = 1; EXPORT_SYMBOL(__max_die_per_package); +#ifdef CONFIG_SMP /* * Check if given CPUID extended toplogy "leaf" is implemented */ -- GitLab From 495dc7637cb5ca8e39c46db818328410bb6e73a1 Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Thu, 14 Jan 2021 16:27:28 +0800 Subject: [PATCH 0729/2012] ALSA: hda/realtek - Limit int mic boost on Acer Aspire E5-575T The Acer Apire E5-575T laptop with codec ALC255 has a terrible background noise comes from internal mic capture. And the jack sensing dose not work for headset like some other Acer laptops. This patch limits the internal mic boost on top of the existing ALC255_FIXUP_ACER_MIC_NO_PRESENCE quirk for Acer Aspire E5-575T. Signed-off-by: Chris Chiu Cc: Link: https://lore.kernel.org/r/20210114082728.74729-1-chiu@endlessos.org Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index dd82ff2bd5d65..ed5b6b894dc19 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6371,6 +6371,7 @@ enum { ALC256_FIXUP_HP_HEADSET_MIC, ALC236_FIXUP_DELL_AIO_HEADSET_MIC, ALC282_FIXUP_ACER_DISABLE_LINEOUT, + ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST, }; static const struct hda_fixup alc269_fixups[] = { @@ -7808,6 +7809,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE }, + [ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_limit_int_mic_boost, + .chained = true, + .chain_id = ALC255_FIXUP_ACER_MIC_NO_PRESENCE, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7826,6 +7833,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1065, "Acer Aspire C20-820", ALC269VC_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK), + SND_PCI_QUIRK(0x1025, 0x1094, "Acer Aspire E5-575T", ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1025, 0x1099, "Acer Aspire E5-523G", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x110e, "Acer Aspire ES1-432", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1166, "Acer Veriton N4640G", ALC269_FIXUP_LIFEBOOK), -- GitLab From 3c516e038f0cc3915825bdac619d448c2b1811f2 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 14 Jan 2021 15:19:23 +0800 Subject: [PATCH 0730/2012] Documentation: ACPI: EINJ: Fix error type values for PCIe errors Fix the error type value for PCI Express uncorrectable non-fatal error to 0x00000080 and fix the error type value for PCI Express uncorrectable fatal error to 0x00000100. See Advanced Configuration and Power Interface Specification, version 6.2, table "18-409 Error Type Definition". Signed-off-by: Qiuxu Zhuo Reported-by: Lijian Zhao [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- Documentation/firmware-guide/acpi/apei/einj.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/firmware-guide/acpi/apei/einj.rst b/Documentation/firmware-guide/acpi/apei/einj.rst index e588bccf51583..c042176e17078 100644 --- a/Documentation/firmware-guide/acpi/apei/einj.rst +++ b/Documentation/firmware-guide/acpi/apei/einj.rst @@ -50,8 +50,8 @@ The following files belong to it: 0x00000010 Memory Uncorrectable non-fatal 0x00000020 Memory Uncorrectable fatal 0x00000040 PCI Express Correctable - 0x00000080 PCI Express Uncorrectable fatal - 0x00000100 PCI Express Uncorrectable non-fatal + 0x00000080 PCI Express Uncorrectable non-fatal + 0x00000100 PCI Express Uncorrectable fatal 0x00000200 Platform Correctable 0x00000400 Platform Uncorrectable non-fatal 0x00000800 Platform Uncorrectable fatal -- GitLab From 7de843dbaaa68aa514090e6226ed7c6374fd7e49 Mon Sep 17 00:00:00 2001 From: Nicholas Miell Date: Sun, 10 Jan 2021 22:09:25 -0800 Subject: [PATCH 0731/2012] HID: logitech-hidpp: Add product ID for MX Ergo in Bluetooth mode The Logitech MX Ergo trackball supports HID++ 4.5 over Bluetooth. Add its product ID to the table so we can get battery monitoring support. (The hid-logitech-hidpp driver already recognizes it when connected via a Unifying Receiver.) [jkosina@suse.cz: fix whitespace damage] Signed-off-by: Nicholas Miell Reviewed-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/hid-logitech-hidpp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index f85781464807d..7eb9a6ddb46a6 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4053,6 +4053,8 @@ static const struct hid_device_id hidpp_devices[] = { { /* MX Master mouse over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb012), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 }, + { /* MX Ergo trackball over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01d) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01e), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 }, { /* MX Master 3 mouse over Bluetooth */ -- GitLab From 67ea698c3950d10925be33c21ca49ffb64e21842 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 14 Jan 2021 08:24:53 +0100 Subject: [PATCH 0732/2012] ALSA: hda/via: Add minimum mute flag It turned out that VIA codecs also mute the sound in the lowest mixer level. Turn on the dac_min_mute flag to indicate the mute-as-minimum in TLV like already done in Conexant and IDT codecs. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=210559 Cc: Link: https://lore.kernel.org/r/20210114072453.11379-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_via.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index 0ab40a8a68fb5..834367dd54e1b 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -113,6 +113,7 @@ static struct via_spec *via_new_spec(struct hda_codec *codec) spec->codec_type = VT1708S; spec->gen.indep_hp = 1; spec->gen.keep_eapd_on = 1; + spec->gen.dac_min_mute = 1; spec->gen.pcm_playback_hook = via_playback_pcm_hook; spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO; codec->power_save_node = 1; -- GitLab From 7a2da5d7960a64ee923fe3e31f01a1101052c66f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 14 Jan 2021 13:09:37 +0000 Subject: [PATCH 0733/2012] spi: fsl: Fix driver breakage when SPI_CS_HIGH is not set in spi->mode Commit 766c6b63aa04 ("spi: fix client driver breakages when using GPIO descriptors") broke fsl spi driver. As now we fully rely on gpiolib for handling the polarity of chip selects, the driver shall not alter the GPIO value anymore when SPI_CS_HIGH is not set in spi->mode. Fixes: 766c6b63aa04 ("spi: fix client driver breakages when using GPIO descriptors") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Link: https://lore.kernel.org/r/6b51cc2bfbca70d3e9b9da7b7aa4c7a9d793ca0e.1610629002.git.christophe.leroy@csgroup.eu Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-spi.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c index 9494257e1c33f..6d8e0a05a5355 100644 --- a/drivers/spi/spi-fsl-spi.c +++ b/drivers/spi/spi-fsl-spi.c @@ -115,14 +115,13 @@ static void fsl_spi_chipselect(struct spi_device *spi, int value) { struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master); struct fsl_spi_platform_data *pdata; - bool pol = spi->mode & SPI_CS_HIGH; struct spi_mpc8xxx_cs *cs = spi->controller_state; pdata = spi->dev.parent->parent->platform_data; if (value == BITBANG_CS_INACTIVE) { if (pdata->cs_control) - pdata->cs_control(spi, !pol); + pdata->cs_control(spi, false); } if (value == BITBANG_CS_ACTIVE) { @@ -134,7 +133,7 @@ static void fsl_spi_chipselect(struct spi_device *spi, int value) fsl_spi_change_mode(spi); if (pdata->cs_control) - pdata->cs_control(spi, pol); + pdata->cs_control(spi, true); } } -- GitLab From c87a95dc28b1431c7e77e2c0c983cf37698089d2 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Wed, 13 Jan 2021 19:17:17 +0000 Subject: [PATCH 0734/2012] dm crypt: defer decryption to a tasklet if interrupts disabled On some specific hardware on early boot we occasionally get: [ 1193.920255][ T0] BUG: sleeping function called from invalid context at mm/mempool.c:381 [ 1193.936616][ T0] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/69 [ 1193.953233][ T0] no locks held by swapper/69/0. [ 1193.965871][ T0] irq event stamp: 575062 [ 1193.977724][ T0] hardirqs last enabled at (575061): [] tick_nohz_idle_exit+0xe2/0x3e0 [ 1194.002762][ T0] hardirqs last disabled at (575062): [] flush_smp_call_function_from_idle+0x4f/0x80 [ 1194.029035][ T0] softirqs last enabled at (575050): [] asm_call_irq_on_stack+0x12/0x20 [ 1194.054227][ T0] softirqs last disabled at (575043): [] asm_call_irq_on_stack+0x12/0x20 [ 1194.079389][ T0] CPU: 69 PID: 0 Comm: swapper/69 Not tainted 5.10.6-cloudflare-kasan-2021.1.4-dev #1 [ 1194.104103][ T0] Hardware name: NULL R162-Z12-CD/MZ12-HD4-CD, BIOS R10 06/04/2020 [ 1194.119591][ T0] Call Trace: [ 1194.130233][ T0] dump_stack+0x9a/0xcc [ 1194.141617][ T0] ___might_sleep.cold+0x180/0x1b0 [ 1194.153825][ T0] mempool_alloc+0x16b/0x300 [ 1194.165313][ T0] ? remove_element+0x160/0x160 [ 1194.176961][ T0] ? blk_mq_end_request+0x4b/0x490 [ 1194.188778][ T0] crypt_convert+0x27f6/0x45f0 [dm_crypt] [ 1194.201024][ T0] ? rcu_read_lock_sched_held+0x3f/0x70 [ 1194.212906][ T0] ? module_assert_mutex_or_preempt+0x3e/0x70 [ 1194.225318][ T0] ? __module_address.part.0+0x1b/0x3a0 [ 1194.237212][ T0] ? is_kernel_percpu_address+0x5b/0x190 [ 1194.249238][ T0] ? crypt_iv_tcw_ctr+0x4a0/0x4a0 [dm_crypt] [ 1194.261593][ T0] ? is_module_address+0x25/0x40 [ 1194.272905][ T0] ? static_obj+0x8a/0xc0 [ 1194.283582][ T0] ? lockdep_init_map_waits+0x26a/0x700 [ 1194.295570][ T0] ? __raw_spin_lock_init+0x39/0x110 [ 1194.307330][ T0] kcryptd_crypt_read_convert+0x31c/0x560 [dm_crypt] [ 1194.320496][ T0] ? kcryptd_queue_crypt+0x1be/0x380 [dm_crypt] [ 1194.333203][ T0] blk_update_request+0x6d7/0x1500 [ 1194.344841][ T0] ? blk_mq_trigger_softirq+0x190/0x190 [ 1194.356831][ T0] blk_mq_end_request+0x4b/0x490 [ 1194.367994][ T0] ? blk_mq_trigger_softirq+0x190/0x190 [ 1194.379693][ T0] flush_smp_call_function_queue+0x24b/0x560 [ 1194.391847][ T0] flush_smp_call_function_from_idle+0x59/0x80 [ 1194.403969][ T0] do_idle+0x287/0x450 [ 1194.413891][ T0] ? arch_cpu_idle_exit+0x40/0x40 [ 1194.424716][ T0] ? lockdep_hardirqs_on_prepare+0x286/0x3f0 [ 1194.436399][ T0] ? _raw_spin_unlock_irqrestore+0x39/0x40 [ 1194.447759][ T0] cpu_startup_entry+0x19/0x20 [ 1194.458038][ T0] secondary_startup_64_no_verify+0xb0/0xbb IO completion can be queued to a different CPU by the block subsystem as a "call single function/data". The CPU may run these routines from the idle task, but it does so with interrupts disabled. It is not a good idea to do decryption with irqs disabled even in an idle task context, so just defer it to a tasklet (as is done with requests from hard irqs). Fixes: 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") Cc: stable@vger.kernel.org # v5.9+ Signed-off-by: Ignat Korchagin Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index b4d7418b50368..8c874710f0bcf 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -2221,8 +2221,12 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) || (bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) { - if (in_irq()) { - /* Crypto API's "skcipher_walk_first() refuses to work in hard IRQ context */ + /* + * in_irq(): Crypto API's skcipher_walk_first() refuses to work in hard IRQ context. + * irqs_disabled(): the kernel may run some IO completion from the idle thread, but + * it is being executed with irqs disabled. + */ + if (in_irq() || irqs_disabled()) { tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work); tasklet_schedule(&io->tasklet); return; -- GitLab From 55ed4560774d81d7343223b8fd2784c530a9c6c1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 9 Dec 2020 14:27:44 +0900 Subject: [PATCH 0735/2012] tools/bootconfig: Add tracing_on support to helper scripts Add ftrace.instance.INSTANCE.tracing_on support to ftrace2bconf.sh and bconf2ftrace.sh. commit 8490db06f914 ("tracing/boot: Add per-instance tracing_on option support") added the per-instance tracing_on option, but forgot to update the helper scripts. Link: https://lkml.kernel.org/r/160749166410.3497930.14204335886811029800.stgit@devnote2 Cc: stable@vger.kernel.org Fixes: 8490db06f914 ("tracing/boot: Add per-instance tracing_on option support") Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/scripts/bconf2ftrace.sh | 1 + tools/bootconfig/scripts/ftrace2bconf.sh | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/tools/bootconfig/scripts/bconf2ftrace.sh b/tools/bootconfig/scripts/bconf2ftrace.sh index 595e164dc352f..feb30c2c78815 100755 --- a/tools/bootconfig/scripts/bconf2ftrace.sh +++ b/tools/bootconfig/scripts/bconf2ftrace.sh @@ -152,6 +152,7 @@ setup_instance() { # [instance] set_array_of ${instance}.options ${instancedir}/trace_options set_value_of ${instance}.trace_clock ${instancedir}/trace_clock set_value_of ${instance}.cpumask ${instancedir}/tracing_cpumask + set_value_of ${instance}.tracing_on ${instancedir}/tracing_on set_value_of ${instance}.tracer ${instancedir}/current_tracer set_array_of ${instance}.ftrace.filters \ ${instancedir}/set_ftrace_filter diff --git a/tools/bootconfig/scripts/ftrace2bconf.sh b/tools/bootconfig/scripts/ftrace2bconf.sh index 6c0d4b61e0c26..a0c3bcc6da4f3 100755 --- a/tools/bootconfig/scripts/ftrace2bconf.sh +++ b/tools/bootconfig/scripts/ftrace2bconf.sh @@ -221,6 +221,10 @@ instance_options() { # [instance-name] if [ `echo $val | sed -e s/f//g`x != x ]; then emit_kv $PREFIX.cpumask = $val fi + val=`cat $INSTANCE/tracing_on` + if [ `echo $val | sed -e s/f//g`x != x ]; then + emit_kv $PREFIX.tracing_on = $val + fi val= for i in `cat $INSTANCE/set_event`; do -- GitLab From 3c97be6982e689d7b2430187a11f8c78e573abdb Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 4 Jan 2021 10:30:57 +0100 Subject: [PATCH 0736/2012] mtd: rawnand: nandsim: Fix the logic when selecting Hamming soft ECC engine I have been fooled by the logic picking the right ECC engine which is spread across two functions: *init_module() and *_attach(). I thought this driver was not impacted by the recent changes around the ECC engines DT parsing logic but in fact it is. Reported-by: kernel test robot Fixes: d7157ff49a5b ("mtd: rawnand: Use the ECC framework user input parsing bits") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210104093057.31178-1-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/nandsim.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c index f2b9250c0ea83..0750121ac371c 100644 --- a/drivers/mtd/nand/raw/nandsim.c +++ b/drivers/mtd/nand/raw/nandsim.c @@ -2210,6 +2210,9 @@ static int ns_attach_chip(struct nand_chip *chip) { unsigned int eccsteps, eccbytes; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + chip->ecc.algo = bch ? NAND_ECC_ALGO_BCH : NAND_ECC_ALGO_HAMMING; + if (!bch) return 0; @@ -2233,8 +2236,6 @@ static int ns_attach_chip(struct nand_chip *chip) return -EINVAL; } - chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - chip->ecc.algo = NAND_ECC_ALGO_BCH; chip->ecc.size = 512; chip->ecc.strength = bch; chip->ecc.bytes = eccbytes; @@ -2273,8 +2274,6 @@ static int __init ns_init_module(void) nsmtd = nand_to_mtd(chip); nand_set_controller_data(chip, (void *)ns); - chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - chip->ecc.algo = NAND_ECC_ALGO_HAMMING; /* The NAND_SKIP_BBTSCAN option is necessary for 'overridesize' */ /* and 'badblocks' parameters to work */ chip->options |= NAND_SKIP_BBTSCAN; -- GitLab From 18f62614308be69a2752afb5f6bbad60096ad774 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Wed, 6 Jan 2021 15:09:43 +0100 Subject: [PATCH 0737/2012] mtd: rawnand: intel: check the mtd name only after setting the variable Move the check for mtd->name after the mtd variable has actually been initialized. While here, also drop the NULL assignment to the mtd variable as it's overwritten later on anyways and the NULL value is never read. Fixes: 0b1039f016e8a3 ("mtd: rawnand: Add NAND controller support on Intel LGM SoC") Signed-off-by: Martin Blumenstingl Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210106140943.98072-1-martin.blumenstingl@googlemail.com --- drivers/mtd/nand/raw/intel-nand-controller.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/intel-nand-controller.c b/drivers/mtd/nand/raw/intel-nand-controller.c index fdb112e8a90d2..a304fda5d1fa5 100644 --- a/drivers/mtd/nand/raw/intel-nand-controller.c +++ b/drivers/mtd/nand/raw/intel-nand-controller.c @@ -579,7 +579,7 @@ static int ebu_nand_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct ebu_nand_controller *ebu_host; struct nand_chip *nand; - struct mtd_info *mtd = NULL; + struct mtd_info *mtd; struct resource *res; char *resname; int ret; @@ -647,12 +647,13 @@ static int ebu_nand_probe(struct platform_device *pdev) ebu_host->ebu + EBU_ADDR_SEL(cs)); nand_set_flash_node(&ebu_host->chip, dev->of_node); + + mtd = nand_to_mtd(&ebu_host->chip); if (!mtd->name) { dev_err(ebu_host->dev, "NAND label property is mandatory\n"); return -EINVAL; } - mtd = nand_to_mtd(&ebu_host->chip); mtd->dev.parent = dev; ebu_host->dev = dev; -- GitLab From e708789c4a87989faff1131ccfdc465a1c1eddbc Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 7 Jan 2021 09:38:13 +0100 Subject: [PATCH 0738/2012] mtd: spinand: Fix MTD_OPS_AUTO_OOB requests The initial change breaking the logic is commit 3d1f08b032dc ("mtd: spinand: Use the external ECC engine logic") It inadvertently dropped proper OOB support while doing something else. Shortly later, half of it got re-integrated by commit 868cbe2a6dce ("mtd: spinand: Fix OOB read") (pointing by the way to a more early change which had nothing to do with the issue). Problem is, this commit failed to revert the faulty change entirely and missed the logic handling MTD_OPS_AUTO_OOB requests. Let's fix this mess by re-inserting the missing part now. Fixes: 868cbe2a6dce ("mtd: spinand: Fix OOB read") Reported-by: Felix Fietkau Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210107083813.24283-1-miquel.raynal@bootlin.com --- drivers/mtd/nand/spi/core.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 8ea545bb924d2..61d932c1b7180 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -343,6 +343,7 @@ static int spinand_read_from_cache_op(struct spinand_device *spinand, const struct nand_page_io_req *req) { struct nand_device *nand = spinand_to_nand(spinand); + struct mtd_info *mtd = spinand_to_mtd(spinand); struct spi_mem_dirmap_desc *rdesc; unsigned int nbytes = 0; void *buf = NULL; @@ -382,9 +383,16 @@ static int spinand_read_from_cache_op(struct spinand_device *spinand, memcpy(req->databuf.in, spinand->databuf + req->dataoffs, req->datalen); - if (req->ooblen) - memcpy(req->oobbuf.in, spinand->oobbuf + req->ooboffs, - req->ooblen); + if (req->ooblen) { + if (req->mode == MTD_OPS_AUTO_OOB) + mtd_ooblayout_get_databytes(mtd, req->oobbuf.in, + spinand->oobbuf, + req->ooboffs, + req->ooblen); + else + memcpy(req->oobbuf.in, spinand->oobbuf + req->ooboffs, + req->ooblen); + } return 0; } -- GitLab From 5e6dca82bcaa49348f9e5fcb48df4881f6d6c4ae Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 12 Jan 2021 11:46:24 -0800 Subject: [PATCH 0739/2012] x86/entry: Emit a symbol for register restoring thunk Arnd found a randconfig that produces the warning: arch/x86/entry/thunk_64.o: warning: objtool: missing symbol for insn at offset 0x3e when building with LLVM_IAS=1 (Clang's integrated assembler). Josh notes: With the LLVM assembler not generating section symbols, objtool has no way to reference this code when it generates ORC unwinder entries, because this code is outside of any ELF function. The limitation now being imposed by objtool is that all code must be contained in an ELF symbol. And .L symbols don't create such symbols. So basically, you can use an .L symbol *inside* a function or a code segment, you just can't use the .L symbol to contain the code using a SYM_*_START/END annotation pair. Fangrui notes that this optimization is helpful for reducing image size when compiling with -ffunction-sections and -fdata-sections. I have observed on the order of tens of thousands of symbols for the kernel images built with those flags. A patch has been authored against GNU binutils to match this behavior of not generating unused section symbols ([1]), so this will also become a problem for users of GNU binutils once they upgrade to 2.36. Omit the .L prefix on a label so that the assembler will emit an entry into the symbol table for the label, with STB_LOCAL binding. This enables objtool to generate proper unwind info here with LLVM_IAS=1 or GNU binutils 2.36+. [ bp: Massage commit message. ] Reported-by: Arnd Bergmann Suggested-by: Josh Poimboeuf Suggested-by: Borislav Petkov Suggested-by: Mark Brown Signed-off-by: Nick Desaulniers Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/20210112194625.4181814-1-ndesaulniers@google.com Link: https://github.com/ClangBuiltLinux/linux/issues/1209 Link: https://reviews.llvm.org/D93783 Link: https://sourceware.org/binutils/docs/as/Symbol-Names.html Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=d1bcae833b32f1408485ce69f844dcd7ded093a8 [1] --- Documentation/asm-annotations.rst | 5 +++++ arch/x86/entry/thunk_64.S | 8 ++++---- include/linux/linkage.h | 5 +++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/Documentation/asm-annotations.rst b/Documentation/asm-annotations.rst index 32ea57483378d..76424e0431f4b 100644 --- a/Documentation/asm-annotations.rst +++ b/Documentation/asm-annotations.rst @@ -100,6 +100,11 @@ Instruction Macros ~~~~~~~~~~~~~~~~~~ This section covers ``SYM_FUNC_*`` and ``SYM_CODE_*`` enumerated above. +``objtool`` requires that all code must be contained in an ELF symbol. Symbol +names that have a ``.L`` prefix do not emit symbol table entries. ``.L`` +prefixed symbols can be used within a code region, but should be avoided for +denoting a range of code via ``SYM_*_START/END`` annotations. + * ``SYM_FUNC_START`` and ``SYM_FUNC_START_LOCAL`` are supposed to be **the most frequent markings**. They are used for functions with standard calling conventions -- global and local. Like in C, they both align the functions to diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index ccd32877a3c41..c9a9fbf1655f3 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -31,7 +31,7 @@ SYM_FUNC_START_NOALIGN(\name) .endif call \func - jmp .L_restore + jmp __thunk_restore SYM_FUNC_END(\name) _ASM_NOKPROBE(\name) .endm @@ -44,7 +44,7 @@ SYM_FUNC_END(\name) #endif #ifdef CONFIG_PREEMPTION -SYM_CODE_START_LOCAL_NOALIGN(.L_restore) +SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore) popq %r11 popq %r10 popq %r9 @@ -56,6 +56,6 @@ SYM_CODE_START_LOCAL_NOALIGN(.L_restore) popq %rdi popq %rbp ret - _ASM_NOKPROBE(.L_restore) -SYM_CODE_END(.L_restore) + _ASM_NOKPROBE(__thunk_restore) +SYM_CODE_END(__thunk_restore) #endif diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 5bcfbd972e970..dbf8506decca0 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -178,6 +178,11 @@ * Objtool generates debug info for both FUNC & CODE, but needs special * annotations for each CODE's start (to describe the actual stack frame). * + * Objtool requires that all code must be contained in an ELF symbol. Symbol + * names that have a .L prefix do not emit symbol table entries. .L + * prefixed symbols can be used within a code region, but should be avoided for + * denoting a range of code via ``SYM_*_START/END`` annotations. + * * ALIAS -- does not generate debug info -- the aliased function will */ -- GitLab From b7c568752ef3b36afa78e1a1866dc049d175993b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 29 Dec 2020 14:15:48 -0700 Subject: [PATCH 0740/2012] mt76: Fix queue ID variable types after mcu queue split Clang warns in both mt7615 and mt7915: drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:271:9: warning: implicit conversion from enumeration type 'enum mt76_mcuq_id' to different enumeration type 'enum mt76_txq_id' [-Wenum-conversion] txq = MT_MCUQ_FWDL; ~ ^~~~~~~~~~~~ drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:278:9: warning: implicit conversion from enumeration type 'enum mt76_mcuq_id' to different enumeration type 'enum mt76_txq_id' [-Wenum-conversion] txq = MT_MCUQ_WA; ~ ^~~~~~~~~~ drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:282:9: warning: implicit conversion from enumeration type 'enum mt76_mcuq_id' to different enumeration type 'enum mt76_txq_id' [-Wenum-conversion] txq = MT_MCUQ_WM; ~ ^~~~~~~~~~ 3 warnings generated. drivers/net/wireless/mediatek/mt76/mt7615/mcu.c:238:9: warning: implicit conversion from enumeration type 'enum mt76_mcuq_id' to different enumeration type 'enum mt76_txq_id' [-Wenum-conversion] qid = MT_MCUQ_WM; ~ ^~~~~~~~~~ drivers/net/wireless/mediatek/mt76/mt7615/mcu.c:240:9: warning: implicit conversion from enumeration type 'enum mt76_mcuq_id' to different enumeration type 'enum mt76_txq_id' [-Wenum-conversion] qid = MT_MCUQ_FWDL; ~ ^~~~~~~~~~~~ 2 warnings generated. Use the proper type for the queue ID variables to fix these warnings. Additionally, rename the txq variable in mt7915_mcu_send_message to be more neutral like mt7615_mcu_send_message. Fixes: e637763b606b ("mt76: move mcu queues to mt76_dev q_mcu array") Link: https://github.com/ClangBuiltLinux/linux/issues/1229 Signed-off-by: Nathan Chancellor Acked-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201229211548.1348077-1-natechancellor@gmail.com --- drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c index a44b7766dec64..c13547841a4e9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c @@ -231,7 +231,7 @@ mt7615_mcu_send_message(struct mt76_dev *mdev, struct sk_buff *skb, int cmd, int *seq) { struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76); - enum mt76_txq_id qid; + enum mt76_mcuq_id qid; mt7615_mcu_fill_msg(dev, skb, cmd, seq); if (test_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state)) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 5fdd1a6d32ee1..e211a2bd4d3c0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -256,7 +256,7 @@ mt7915_mcu_send_message(struct mt76_dev *mdev, struct sk_buff *skb, struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76); struct mt7915_mcu_txd *mcu_txd; u8 seq, pkt_fmt, qidx; - enum mt76_txq_id txq; + enum mt76_mcuq_id qid; __le32 *txd; u32 val; @@ -268,18 +268,18 @@ mt7915_mcu_send_message(struct mt76_dev *mdev, struct sk_buff *skb, seq = ++dev->mt76.mcu.msg_seq & 0xf; if (cmd == -MCU_CMD_FW_SCATTER) { - txq = MT_MCUQ_FWDL; + qid = MT_MCUQ_FWDL; goto exit; } mcu_txd = (struct mt7915_mcu_txd *)skb_push(skb, sizeof(*mcu_txd)); if (test_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state)) { - txq = MT_MCUQ_WA; + qid = MT_MCUQ_WA; qidx = MT_TX_MCU_PORT_RX_Q0; pkt_fmt = MT_TX_TYPE_CMD; } else { - txq = MT_MCUQ_WM; + qid = MT_MCUQ_WM; qidx = MT_TX_MCU_PORT_RX_Q0; pkt_fmt = MT_TX_TYPE_CMD; } @@ -326,7 +326,7 @@ exit: if (wait_seq) *wait_seq = seq; - return mt76_tx_queue_skb_raw(dev, mdev->q_mcu[txq], skb, 0); + return mt76_tx_queue_skb_raw(dev, mdev->q_mcu[qid], skb, 0); } static void -- GitLab From b79f2dc5ffe17b03ec8c55f0d63f65e87bcac676 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Wed, 13 Jan 2021 14:16:59 +0200 Subject: [PATCH 0741/2012] RDMA/umem: Avoid undefined behavior of rounddown_pow_of_two() rounddown_pow_of_two() is undefined when the input is 0. Therefore we need to avoid it in ib_umem_find_best_pgsz and return 0. Otherwise, it could result in not rejecting an invalid page size which eventually causes a kernel oops due to the logical inconsistency. Fixes: 3361c29e9279 ("RDMA/umem: Use simpler logic for ib_umem_find_best_pgsz()") Link: https://lore.kernel.org/r/20210113121703.559778-2-leon@kernel.org Signed-off-by: Aharon Landau Reviewed-by: Jason Gunthorpe Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 7ca4112e3e8f7..917338db7ac13 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -135,7 +135,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, */ if (mask) pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0); - return rounddown_pow_of_two(pgsz_bitmap); + return pgsz_bitmap ? rounddown_pow_of_two(pgsz_bitmap) : 0; } EXPORT_SYMBOL(ib_umem_find_best_pgsz); -- GitLab From 2cb091f6293df898b47f4e0f2e54324e2bbaf816 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 13 Jan 2021 14:17:00 +0200 Subject: [PATCH 0742/2012] IB/mlx5: Fix error unwinding when set_has_smi_cap fails When set_has_smi_cap() fails, multiport master cleanup is missed. Fix it by doing the correct error unwinding goto. Fixes: a989ea01cb10 ("RDMA/mlx5: Move SMI caps logic") Link: https://lore.kernel.org/r/20210113121703.559778-3-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3bae9ba0ead85..fbe3b75f866b1 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3956,7 +3956,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) err = set_has_smi_cap(dev); if (err) - return err; + goto err_mp; if (!mlx5_core_mp_enabled(mdev)) { for (i = 1; i <= dev->num_ports; i++) { -- GitLab From 1c3aa6bd0b823105c2030af85d92d158e815d669 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 13 Jan 2021 14:17:03 +0200 Subject: [PATCH 0743/2012] RDMA/mlx5: Fix wrong free of blue flame register on error If the allocation of the fast path blue flame register fails, the driver should free the regular blue flame register allocated a statement above, not the one that it just failed to allocate. Fixes: 16c1975f1032 ("IB/mlx5: Create profile infrastructure to add and remove stages") Link: https://lore.kernel.org/r/20210113121703.559778-6-leon@kernel.org Reported-by: Hans Petter Selasky Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index fbe3b75f866b1..d26f3f3e0462a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4319,7 +4319,7 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true); if (err) - mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); + mlx5_free_bfreg(dev->mdev, &dev->bfreg); return err; } -- GitLab From 7c7b3e5d9aeed31d35c5dab0bf9c0fd4c8923206 Mon Sep 17 00:00:00 2001 From: Neta Ostrovsky Date: Wed, 13 Jan 2021 15:02:14 +0200 Subject: [PATCH 0744/2012] RDMA/cma: Fix error flow in default_roce_mode_store In default_roce_mode_store(), we took a reference to cma_dev, but didn't return it with cma_dev_put in the error flow. Fixes: 1c15b4f2a42f ("RDMA/core: Modify enum ib_gid_type and enum rdma_network_type") Link: https://lore.kernel.org/r/20210113130214.562108-1-leon@kernel.org Signed-off-by: Neta Ostrovsky Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma_configfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 7f70e5a7de105..97a77ea8d3c9c 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -131,8 +131,10 @@ static ssize_t default_roce_mode_store(struct config_item *item, return ret; gid_type = ib_cache_gid_parse_type_str(buf); - if (gid_type < 0) + if (gid_type < 0) { + cma_configfs_params_put(cma_dev); return -EINVAL; + } ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type); -- GitLab From a6616bc9a0af7c65c0b0856a7508870a4a40c4ac Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 12 Jan 2021 14:24:48 +0100 Subject: [PATCH 0745/2012] iwlwifi: dbg: Don't touch the tlv data The commit ba8f6f4ae254 ("iwlwifi: dbg: add dumping special device memory") added a termination of name string just to be sure, and this seems causing a regression, a GPF triggered at firmware loading. Basically we shouldn't modify the firmware data that may be provided as read-only. This patch drops the code that caused the regression and keep the tlv data as is. Fixes: ba8f6f4ae254 ("iwlwifi: dbg: add dumping special device memory") BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1180344 BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=210733 Cc: stable@vger.kernel.org Signed-off-by: Takashi Iwai Acked-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210112132449.22243-2-tiwai@suse.de --- drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index a654147d3cd61..a80a35a7740f3 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@ -180,13 +180,6 @@ static int iwl_dbg_tlv_alloc_region(struct iwl_trans *trans, if (le32_to_cpu(tlv->length) < sizeof(*reg)) return -EINVAL; - /* For safe using a string from FW make sure we have a - * null terminator - */ - reg->name[IWL_FW_INI_MAX_NAME - 1] = 0; - - IWL_DEBUG_FW(trans, "WRT: parsing region: %s\n", reg->name); - if (id >= IWL_FW_INI_MAX_REGION_ID) { IWL_ERR(trans, "WRT: Invalid region id %u\n", id); return -EINVAL; -- GitLab From 8873e8f56f74013ecf7aca6d8e66c600dc40ce64 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Wed, 16 Dec 2020 21:04:42 +0800 Subject: [PATCH 0746/2012] rtw88: Delete useless kfree code The parameter of kfree function is NULL, so kfree code is useless, delete it. Signed-off-by: Zheng Yongjun Acked-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201216130442.13869-1-zhengyongjun3@huawei.com --- drivers/net/wireless/realtek/rtw88/main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c index e7c1ae454524b..16bacfadeb589 100644 --- a/drivers/net/wireless/realtek/rtw88/main.c +++ b/drivers/net/wireless/realtek/rtw88/main.c @@ -1276,7 +1276,6 @@ static void rtw_set_supported_band(struct ieee80211_hw *hw, err_out: rtw_err(rtwdev, "failed to set supported band\n"); - kfree(sband); } static void rtw_unset_supported_band(struct ieee80211_hw *hw, -- GitLab From ac9533d2a637464588c03d1a247567ea95d2bc59 Mon Sep 17 00:00:00 2001 From: Chin-Yen Lee Date: Mon, 28 Dec 2020 16:24:33 +0800 Subject: [PATCH 0747/2012] rtw88: reduce the log level for failure of tx report Sometimes driver does not get tx report from firmware because wifi environment is too noisy to get ack from AP about a TX frame, or firmware is too busy to report driver in a estimated time. But the condition will not affect wifi function or throughput. So we reduce the log level to rtw_debug instead of scary backtrace. Signed-off-by: Chin-Yen Lee Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201228082433.16431-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw88/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/tx.c b/drivers/net/wireless/realtek/rtw88/tx.c index ca8072177ae38..18ec0088bf416 100644 --- a/drivers/net/wireless/realtek/rtw88/tx.c +++ b/drivers/net/wireless/realtek/rtw88/tx.c @@ -158,7 +158,7 @@ void rtw_tx_report_purge_timer(struct timer_list *t) if (skb_queue_len(&tx_report->queue) == 0) return; - WARN(1, "purge skb(s) not reported by firmware\n"); + rtw_dbg(rtwdev, RTW_DBG_TX, "purge skb(s) not reported by firmware\n"); spin_lock_irqsave(&tx_report->q_lock, flags); skb_queue_purge(&tx_report->queue); -- GitLab From 840105e4f12fa3aa5f5c3c5b42dacb82ab953289 Mon Sep 17 00:00:00 2001 From: Vincent Fann Date: Mon, 28 Dec 2020 16:25:16 +0800 Subject: [PATCH 0748/2012] rtw88: 8821c: apply CCK PD level which calculates from dynamic mechanism Hal function must follow the value that calculates from dynamic mechanism. Force to set new_lvl to 4 damages receiving ability. System will not able to reconnect to the AP if wifi unexpected disconnecting at this moment. Signed-off-by: Vincent Fann Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201228082516.16488-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw88/rtw8821c.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index fbfd85439d1ff..74155c999ebb4 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -1022,12 +1022,6 @@ static void rtw8821c_phy_cck_pd_set(struct rtw_dev *rtwdev, u8 new_lvl) u8 pd[CCK_PD_LV_MAX] = {3, 7, 13, 13, 13}; u8 cck_n_rx; - if (dm_info->min_rssi > 60) { - new_lvl = 4; - pd[4] = 0x1d; - goto set_cck_pd; - } - rtw_dbg(rtwdev, RTW_DBG_PHY, "lv: (%d) -> (%d)\n", dm_info->cck_pd_lv[RTW_CHANNEL_WIDTH_20][RF_PATH_A], new_lvl); @@ -1044,7 +1038,6 @@ static void rtw8821c_phy_cck_pd_set(struct rtw_dev *rtwdev, u8 new_lvl) dm_info->cck_fa_avg = CCK_FA_AVG_RESET; -set_cck_pd: dm_info->cck_pd_lv[RTW_CHANNEL_WIDTH_20][RF_PATH_A] = new_lvl; rtw_write32_mask(rtwdev, REG_PWRTH, 0x3f0000, pd[new_lvl]); rtw_write32_mask(rtwdev, REG_PWRTH2, 0x1f0000, -- GitLab From 5f782c11569d5703c8ded6913481b55c254512f2 Mon Sep 17 00:00:00 2001 From: Ching-Te Ku Date: Tue, 12 Jan 2021 10:11:35 +0800 Subject: [PATCH 0749/2012] rtw88: coex: set 4 slot TDMA for BT link and WL busy To protect both of WL/BT performance while BT is under re-link state. 4-slot mode TDMA can make the re-link more sensitive and mitigate the WL throughput drop. Signed-off-by: Ching-Te Ku Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210112021135.3823-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw88/coex.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/coex.c b/drivers/net/wireless/realtek/rtw88/coex.c index 24530cafcba7e..ea2be1e25065a 100644 --- a/drivers/net/wireless/realtek/rtw88/coex.c +++ b/drivers/net/wireless/realtek/rtw88/coex.c @@ -1607,6 +1607,7 @@ static void rtw_coex_action_bt_relink(struct rtw_dev *rtwdev) struct rtw_efuse *efuse = &rtwdev->efuse; struct rtw_chip_info *chip = rtwdev->chip; u8 table_case, tdma_case; + u32 slot_type = 0; rtw_dbg(rtwdev, RTW_DBG_COEX, "[BTCoex], %s()\n", __func__); @@ -1618,6 +1619,7 @@ static void rtw_coex_action_bt_relink(struct rtw_dev *rtwdev) table_case = 26; if (coex_stat->bt_hid_exist && coex_stat->bt_profile_num == 1) { + slot_type = TDMA_4SLOT; tdma_case = 20; } else { tdma_case = 20; @@ -1635,7 +1637,7 @@ static void rtw_coex_action_bt_relink(struct rtw_dev *rtwdev) } rtw_coex_table(rtwdev, false, table_case); - rtw_coex_tdma(rtwdev, false, tdma_case); + rtw_coex_tdma(rtwdev, false, tdma_case | slot_type); } static void rtw_coex_action_bt_idle(struct rtw_dev *rtwdev) -- GitLab From d3a78c7a9daa6c5fa8529c53f07a909745cf7d02 Mon Sep 17 00:00:00 2001 From: YANG LI Date: Tue, 12 Jan 2021 17:50:40 +0800 Subject: [PATCH 0750/2012] rtw88: Simplify bool comparison Fix the following coccicheck warning: ./drivers/net/wireless/realtek/rtw88/debug.c:800:17-23: WARNING: Comparison of 0/1 to bool variable Reported-by: Abaci Robot Signed-off-by: YANG LI Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1610445040-23599-1-git-send-email-abaci-bugfix@linux.alibaba.com --- drivers/net/wireless/realtek/rtw88/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/debug.c b/drivers/net/wireless/realtek/rtw88/debug.c index 19fc2d8bf3e93..948cb79050ea9 100644 --- a/drivers/net/wireless/realtek/rtw88/debug.c +++ b/drivers/net/wireless/realtek/rtw88/debug.c @@ -800,7 +800,7 @@ static ssize_t rtw_debugfs_set_coex_enable(struct file *filp, } mutex_lock(&rtwdev->mutex); - coex->manual_control = enable == 0; + coex->manual_control = !enable; mutex_unlock(&rtwdev->mutex); return count; -- GitLab From 596c84c49f8a073c4e19acc0d5d9dd4bcc8bb57f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 11 Dec 2020 14:38:35 +0100 Subject: [PATCH 0751/2012] mwifiex: pcie: Drop bogus __refdata annotation As the Marvell PCIE WiFi-Ex driver does not have any code or data located in initmem, there is no need to annotate the mwifiex_pcie structure with __refdata. Drop the annotation, to avoid suppressing future section warnings. Signed-off-by: Geert Uytterhoeven Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201211133835.2970384-1-geert+renesas@glider.be --- drivers/net/wireless/marvell/mwifiex/pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 5f0a61b974ee5..94228b316df1b 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -571,7 +571,7 @@ static SIMPLE_DEV_PM_OPS(mwifiex_pcie_pm_ops, mwifiex_pcie_suspend, #endif /* PCI Device Driver */ -static struct pci_driver __refdata mwifiex_pcie = { +static struct pci_driver mwifiex_pcie = { .name = "mwifiex_pcie", .id_table = mwifiex_ids, .probe = mwifiex_pcie_probe, -- GitLab From e4c748ee4af1b00f390ef802c7d9186474ac0b5f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Dec 2020 11:58:08 +0000 Subject: [PATCH 0752/2012] wilc1000: fix spelling mistake in Kconfig "devision" -> "division" There is a spelling mistake in the Kconfig help text. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201216115808.12987-1-colin.king@canonical.com --- drivers/net/wireless/microchip/wilc1000/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/microchip/wilc1000/Kconfig b/drivers/net/wireless/microchip/wilc1000/Kconfig index 80c92e8bf8a59..7f15e42602dda 100644 --- a/drivers/net/wireless/microchip/wilc1000/Kconfig +++ b/drivers/net/wireless/microchip/wilc1000/Kconfig @@ -44,4 +44,4 @@ config WILC1000_HW_OOB_INTR chipset. This OOB interrupt is intended to provide a faster interrupt mechanism for SDIO host controllers that don't support SDIO interrupt. Select this option If the SDIO host controller in your platform - doesn't support SDIO time devision interrupt. + doesn't support SDIO time division interrupt. -- GitLab From f4add10399f9cba42a45ac4b0e0dc5e4943f8546 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 14 Dec 2020 11:15:53 +0100 Subject: [PATCH 0753/2012] brcmfmac: support BCM4365E with 43666 ChipCommon chip ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support for the BCM43666/4 which seems to be using the same firmware as BCM4366 (4366c0). I found it in the Netgear R8000P router. Signed-off-by: Rafał Miłecki Acked-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201214101553.32097-1-zajec5@gmail.com --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c | 1 + drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c | 1 + drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c index 5bf11e46fc49a..45037decba40f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/chip.c @@ -720,6 +720,7 @@ static u32 brcmf_chip_tcm_rambase(struct brcmf_chip_priv *ci) case BRCM_CC_4365_CHIP_ID: case BRCM_CC_4366_CHIP_ID: case BRCM_CC_43664_CHIP_ID: + case BRCM_CC_43666_CHIP_ID: return 0x200000; case BRCM_CC_4359_CHIP_ID: return (ci->pub.chiprev < 9) ? 0x180000 : 0x160000; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index 45bc502fcb341..ad79e3b7e74a3 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -77,6 +77,7 @@ static const struct brcmf_firmware_mapping brcmf_pcie_fwnames[] = { BRCMF_FW_ENTRY(BRCM_CC_4366_CHIP_ID, 0x0000000F, 4366B), BRCMF_FW_ENTRY(BRCM_CC_4366_CHIP_ID, 0xFFFFFFF0, 4366C), BRCMF_FW_ENTRY(BRCM_CC_43664_CHIP_ID, 0xFFFFFFF0, 4366C), + BRCMF_FW_ENTRY(BRCM_CC_43666_CHIP_ID, 0xFFFFFFF0, 4366C), BRCMF_FW_ENTRY(BRCM_CC_4371_CHIP_ID, 0xFFFFFFFF, 4371), }; diff --git a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h index c6c4be05159d4..00309b272a0ea 100644 --- a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h +++ b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h @@ -48,6 +48,7 @@ #define BRCM_CC_4365_CHIP_ID 0x4365 #define BRCM_CC_4366_CHIP_ID 0x4366 #define BRCM_CC_43664_CHIP_ID 43664 +#define BRCM_CC_43666_CHIP_ID 43666 #define BRCM_CC_4371_CHIP_ID 0x4371 #define CY_CC_4373_CHIP_ID 0x4373 #define CY_CC_43012_CHIP_ID 43012 -- GitLab From 0e40dbd56d67a5b01b13f4bfb62b470cd99125cd Mon Sep 17 00:00:00 2001 From: Zhi Han Date: Thu, 17 Dec 2020 17:13:02 +0100 Subject: [PATCH 0754/2012] mt7601u: process URBs in status EPROTO properly When the usb device being plugged out, before the usb_driver:disconnect called by e.g workqueue, it is possible that some URBs are still in processing, and being marked as EPROTO in host controller. Those URBs should not be scheduled in complete_rx callback function to get further processing. Signed-off-by: Zhi Han Acked-by: Jakub Kicinski Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201217161302.GA12562@E480 --- drivers/net/wireless/mediatek/mt7601u/dma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c index 5f99054f535b4..98733c23d408b 100644 --- a/drivers/net/wireless/mediatek/mt7601u/dma.c +++ b/drivers/net/wireless/mediatek/mt7601u/dma.c @@ -192,6 +192,7 @@ static void mt7601u_complete_rx(struct urb *urb) case -ECONNRESET: case -ESHUTDOWN: case -ENOENT: + case -EPROTO: return; default: dev_err_ratelimited(dev->dev, "rx urb failed: %d\n", -- GitLab From 1dac51269d0532bd0847f44f1c3ae7fd2851ac7d Mon Sep 17 00:00:00 2001 From: Zhi Han Date: Thu, 17 Dec 2020 17:16:57 +0100 Subject: [PATCH 0755/2012] mt7601u: check the status of device in calibration When the usb device being plugged out, before ieee80211 gets to know the hw being removed, it gets to know that the association status changed, and thus ask for the device to do the calibration. This causes error as the hw is absent. This can be avoid by checking the status of the device before sending the calibration request to the device. Signed-off-by: Zhi Han Acked-by: Jakub Kicinski Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201217161657.GB12562@E480 --- drivers/net/wireless/mediatek/mt7601u/phy.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt7601u/phy.c b/drivers/net/wireless/mediatek/mt7601u/phy.c index 28db24a2b5e5e..8a00f6a75ca9b 100644 --- a/drivers/net/wireless/mediatek/mt7601u/phy.c +++ b/drivers/net/wireless/mediatek/mt7601u/phy.c @@ -586,6 +586,9 @@ static void mt7601u_rxdc_cal(struct mt7601u_dev *dev) void mt7601u_phy_recalibrate_after_assoc(struct mt7601u_dev *dev) { + if (test_bit(MT7601U_STATE_REMOVED, &dev->state)) + return; + mt7601u_mcu_calibrate(dev, MCU_CAL_DPD, dev->curr_temp); mt7601u_rxdc_cal(dev); -- GitLab From 0924ba9fbc26860b4b57b424e402d4b16d40cc91 Mon Sep 17 00:00:00 2001 From: Amey Narkhede Date: Mon, 21 Dec 2020 13:27:35 +0530 Subject: [PATCH 0756/2012] qtnfmac_pcie: Use module_pci_driver Use module_pci_driver for drivers whose init and exit functions only register and unregister, respectively. Signed-off-by: Amey Narkhede Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201221075735.197255-1-ameynarkhede03@gmail.com --- drivers/net/wireless/quantenna/qtnfmac/pcie/pcie.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/pcie/pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pcie/pcie.c index 0f328ce47fee3..5d93c874d6669 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/pcie/pcie.c +++ b/drivers/net/wireless/quantenna/qtnfmac/pcie/pcie.c @@ -480,18 +480,7 @@ static struct pci_driver qtnf_pcie_drv_data = { #endif }; -static int __init qtnf_pcie_register(void) -{ - return pci_register_driver(&qtnf_pcie_drv_data); -} - -static void __exit qtnf_pcie_exit(void) -{ - pci_unregister_driver(&qtnf_pcie_drv_data); -} - -module_init(qtnf_pcie_register); -module_exit(qtnf_pcie_exit); +module_pci_driver(qtnf_pcie_drv_data) MODULE_AUTHOR("Quantenna Communications"); MODULE_DESCRIPTION("Quantenna PCIe bus driver for 802.11 wireless LAN."); -- GitLab From 73c655410181b7fc9a5ff321a5021a684ada99e7 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Tue, 22 Dec 2020 21:51:13 +0800 Subject: [PATCH 0757/2012] brcmfmac: Delete useless kfree code A null pointer will be passed to a kfree() call after a kzalloc() call failed. This code is useless. Thus delete the extra function call. A goto statement is also no longer needed. Thus adjust an if branch. Signed-off-by: Zheng Yongjun Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201222135113.20680-1-zhengyongjun3@huawei.com --- .../wireless/broadcom/brcm80211/brcmfmac/firmware.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c index d821a4758f8cf..d40104b8df555 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c @@ -319,8 +319,10 @@ static void brcmf_fw_strip_multi_v2(struct nvram_parser *nvp, u16 domain_nr, u8 *nvram; nvram = kzalloc(nvp->nvram_len + 1 + 3 + sizeof(u32), GFP_KERNEL); - if (!nvram) - goto fail; + if (!nvram) { + nvp->nvram_len = 0; + return; + } /* Copy all valid entries, release old nvram and assign new one. * Valid entries are of type pcie/X/Y/ where X = domain_nr and @@ -350,10 +352,6 @@ static void brcmf_fw_strip_multi_v2(struct nvram_parser *nvp, u16 domain_nr, kfree(nvp->nvram); nvp->nvram = nvram; nvp->nvram_len = j; - return; -fail: - kfree(nvram); - nvp->nvram_len = 0; } static void brcmf_fw_add_defaults(struct nvram_parser *nvp) -- GitLab From e862a3e4088070de352fdafe9bd9e3ae0a95a33c Mon Sep 17 00:00:00 2001 From: Luca Pesce Date: Thu, 24 Dec 2020 11:51:59 +0100 Subject: [PATCH 0758/2012] brcmfmac: clear EAP/association status bits on linkdown events This ensure that previous association attempts do not leave stale statuses on subsequent attempts. This fixes the WARN_ON(!cr->bss)) from __cfg80211_connect_result() when connecting to an AP after a previous connection failure (e.g. where EAP fails due to incorrect psk but association succeeded). In some scenarios, indeed, brcmf_is_linkup() was reporting a link up event too early due to stale BRCMF_VIF_STATUS_ASSOC_SUCCESS bit, thus reporting to cfg80211 a connection result with a zeroed bssid (vif->profile.bssid is still empty), causing the WARN_ON due to the call to cfg80211_get_bss() with the empty bssid. Signed-off-by: Luca Pesce Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1608807119-21785-1-git-send-email-luca.pesce@vimar.com --- .../net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 0ee421f30aa24..23e6422c2251b 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -5611,7 +5611,8 @@ static bool brcmf_is_linkup(struct brcmf_cfg80211_vif *vif, return false; } -static bool brcmf_is_linkdown(const struct brcmf_event_msg *e) +static bool brcmf_is_linkdown(struct brcmf_cfg80211_vif *vif, + const struct brcmf_event_msg *e) { u32 event = e->event_code; u16 flags = e->flags; @@ -5620,6 +5621,8 @@ static bool brcmf_is_linkdown(const struct brcmf_event_msg *e) (event == BRCMF_E_DISASSOC_IND) || ((event == BRCMF_E_LINK) && (!(flags & BRCMF_EVENT_MSG_LINK)))) { brcmf_dbg(CONN, "Processing link down\n"); + clear_bit(BRCMF_VIF_STATUS_EAP_SUCCESS, &vif->sme_state); + clear_bit(BRCMF_VIF_STATUS_ASSOC_SUCCESS, &vif->sme_state); return true; } return false; @@ -6067,7 +6070,7 @@ brcmf_notify_connect_status(struct brcmf_if *ifp, } else brcmf_bss_connect_done(cfg, ndev, e, true); brcmf_net_setcarrier(ifp, true); - } else if (brcmf_is_linkdown(e)) { + } else if (brcmf_is_linkdown(ifp->vif, e)) { brcmf_dbg(CONN, "Linkdown\n"); if (!brcmf_is_ibssmode(ifp->vif) && test_bit(BRCMF_VIF_STATUS_CONNECTED, -- GitLab From 098238e80bed1ea2347e2efb5533f1d0289ae6c3 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Fri, 25 Dec 2020 15:35:03 +0800 Subject: [PATCH 0759/2012] wilc1000: use flexible-array member instead of zero-length array Use flexible-array member introduced in C99 instead of zero-length array. Most of zero-length array was already taken care in previous patch [1]. [1]. https://patchwork.kernel.org/patch/11394197/ Signed-off-by: Tian Tao Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1608881703-37060-1-git-send-email-tiantao6@hisilicon.com --- drivers/net/wireless/microchip/wilc1000/fw.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/fw.h b/drivers/net/wireless/microchip/wilc1000/fw.h index a76e1dea43454..1114530d03e4f 100644 --- a/drivers/net/wireless/microchip/wilc1000/fw.h +++ b/drivers/net/wireless/microchip/wilc1000/fw.h @@ -44,20 +44,20 @@ struct wilc_drv_handler { struct wilc_wep_key { u8 index; u8 key_len; - u8 key[0]; + u8 key[]; } __packed; struct wilc_sta_wpa_ptk { u8 mac_addr[ETH_ALEN]; u8 key_len; - u8 key[0]; + u8 key[]; } __packed; struct wilc_ap_wpa_ptk { u8 mac_addr[ETH_ALEN]; u8 index; u8 key_len; - u8 key[0]; + u8 key[]; } __packed; struct wilc_gtk_key { @@ -65,7 +65,7 @@ struct wilc_gtk_key { u8 rsc[8]; u8 index; u8 key_len; - u8 key[0]; + u8 key[]; } __packed; struct wilc_op_mode { -- GitLab From 07ceefa3012f43512e93931980bd3bdf5af96344 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 1 Jan 2021 08:59:55 +0200 Subject: [PATCH 0760/2012] wlcore: Downgrade exceeded max RX BA sessions to debug We can get the following in the logs every few minutes or so: wlcore: ERROR exceeded max RX BA sessions Let's downgrade the message to a debug message as suggested by the TI support folks at: https://e2e.ti.com/support/wireless-connectivity/wifi/f/968/p/352435/1244754 "The WL127x firmware supports max of 3 BA sessions. It cannot be increased. I think the problem here is the peer trying to initiate a 4th BA session (ADDBA request)." Signed-off-by: Tony Lindgren Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210101065955.63386-1-tony@atomide.com --- drivers/net/wireless/ti/wlcore/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 122c7a4b374f1..fb0305d075dd3 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -5394,7 +5394,7 @@ static int wl1271_op_ampdu_action(struct ieee80211_hw *hw, if (wl->ba_rx_session_count >= wl->ba_rx_session_count_max) { ret = -EBUSY; - wl1271_error("exceeded max RX BA sessions"); + wl1271_debug(DEBUG_RX, "exceeded max RX BA sessions"); break; } -- GitLab From 47e4bb147a96f1c9b4e7691e7e994e53838bfff8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:29:47 -0800 Subject: [PATCH 0761/2012] net: sit: unregister_netdevice on newlink's error path We need to unregister the netdevice if config failed. .ndo_uninit takes care of most of the heavy lifting. This was uncovered by recent commit c269a24ce057 ("net: make free_netdev() more lenient with unregistering devices"). Previously the partially-initialized device would be left in the system. Reported-and-tested-by: syzbot+2393580080a2da190f04@syzkaller.appspotmail.com Fixes: e2f1f072db8d ("sit: allow to configure 6rd tunnels via netlink") Acked-by: Nicolas Dichtel Link: https://lore.kernel.org/r/20210114012947.2515313-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv6/sit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 2da0ee7037795..93636867aee28 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1645,8 +1645,11 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, } #ifdef CONFIG_IPV6_SIT_6RD - if (ipip6_netlink_6rd_parms(data, &ip6rd)) + if (ipip6_netlink_6rd_parms(data, &ip6rd)) { err = ipip6_tunnel_update_6rd(nt, &ip6rd); + if (err < 0) + unregister_netdevice_queue(dev, NULL); + } #endif return err; -- GitLab From 25764779298f23a659f3daf39f9e2b5975a7a89d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 09:04:48 +0100 Subject: [PATCH 0762/2012] net: tip: fix a couple kernel-doc markups A function has a different name between their prototype and its kernel-doc markup: ../net/tipc/link.c:2551: warning: expecting prototype for link_reset_stats(). Prototype was for tipc_link_reset_stats() instead ../net/tipc/node.c:1678: warning: expecting prototype for is the general link level function for message sending(). Prototype was for tipc_node_xmit() instead Signed-off-by: Mauro Carvalho Chehab Acked-by: Jon Maloy Signed-off-by: Jakub Kicinski --- net/tipc/link.c | 2 +- net/tipc/node.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index a6a694b789274..1151092594302 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2544,7 +2544,7 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win) } /** - * link_reset_stats - reset link statistics + * tipc_link_reset_stats - reset link statistics * @l: pointer to link */ void tipc_link_reset_stats(struct tipc_link *l) diff --git a/net/tipc/node.c b/net/tipc/node.c index 83d9eb8305928..008670d1f43e1 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1665,7 +1665,7 @@ static void tipc_lxc_xmit(struct net *peer_net, struct sk_buff_head *list) } /** - * tipc_node_xmit() is the general link level function for message sending + * tipc_node_xmit() - general link level function for message sending * @net: the applicable net namespace * @list: chain of buffers containing message * @dnode: address of destination node -- GitLab From b76889ff51bfee318bea15891420e5aefd2833a0 Mon Sep 17 00:00:00 2001 From: Yannick Vignon Date: Wed, 13 Jan 2021 14:15:56 +0100 Subject: [PATCH 0763/2012] net: stmmac: fix taprio schedule configuration When configuring a 802.1Qbv schedule through the tc taprio qdisc on an NXP i.MX8MPlus device, the effective cycle time differed from the requested one by N*96ns, with N number of entries in the Qbv Gate Control List. This is because the driver was adding a 96ns margin to each interval of the GCL, apparently to account for the IPG. The problem was observed on NXP i.MX8MPlus devices but likely affected all devices relying on the same configuration callback (dwmac 4.00, 4.10, 5.10 variants). Fix the issue by removing the margins, and simply setup the MAC with the provided cycle time value. This is the behavior expected by the user-space API, as altering the Qbv schedule timings would break standards conformance. This is also the behavior of several other Ethernet MAC implementations supporting taprio, including the dwxgmac variant of stmmac. Fixes: 504723af0d85 ("net: stmmac: Add basic EST support for GMAC5+") Signed-off-by: Yannick Vignon Link: https://lore.kernel.org/r/20210113131557.24651-1-yannick.vignon@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac5.c | 52 ++------------------ 1 file changed, 4 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index 03e79a677c8bd..8f7ac24545efe 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -568,68 +568,24 @@ static int dwmac5_est_write(void __iomem *ioaddr, u32 reg, u32 val, bool gcl) int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg, unsigned int ptp_rate) { - u32 speed, total_offset, offset, ctrl, ctr_low; - u32 extcfg = readl(ioaddr + GMAC_EXT_CONFIG); - u32 mac_cfg = readl(ioaddr + GMAC_CONFIG); int i, ret = 0x0; - u64 total_ctr; - - if (extcfg & GMAC_CONFIG_EIPG_EN) { - offset = (extcfg & GMAC_CONFIG_EIPG) >> GMAC_CONFIG_EIPG_SHIFT; - offset = 104 + (offset * 8); - } else { - offset = (mac_cfg & GMAC_CONFIG_IPG) >> GMAC_CONFIG_IPG_SHIFT; - offset = 96 - (offset * 8); - } - - speed = mac_cfg & (GMAC_CONFIG_PS | GMAC_CONFIG_FES); - speed = speed >> GMAC_CONFIG_FES_SHIFT; - - switch (speed) { - case 0x0: - offset = offset * 1000; /* 1G */ - break; - case 0x1: - offset = offset * 400; /* 2.5G */ - break; - case 0x2: - offset = offset * 100000; /* 10M */ - break; - case 0x3: - offset = offset * 10000; /* 100M */ - break; - default: - return -EINVAL; - } - - offset = offset / 1000; + u32 ctrl; ret |= dwmac5_est_write(ioaddr, BTR_LOW, cfg->btr[0], false); ret |= dwmac5_est_write(ioaddr, BTR_HIGH, cfg->btr[1], false); ret |= dwmac5_est_write(ioaddr, TER, cfg->ter, false); ret |= dwmac5_est_write(ioaddr, LLR, cfg->gcl_size, false); + ret |= dwmac5_est_write(ioaddr, CTR_LOW, cfg->ctr[0], false); + ret |= dwmac5_est_write(ioaddr, CTR_HIGH, cfg->ctr[1], false); if (ret) return ret; - total_offset = 0; for (i = 0; i < cfg->gcl_size; i++) { - ret = dwmac5_est_write(ioaddr, i, cfg->gcl[i] + offset, true); + ret = dwmac5_est_write(ioaddr, i, cfg->gcl[i], true); if (ret) return ret; - - total_offset += offset; } - total_ctr = cfg->ctr[0] + cfg->ctr[1] * 1000000000ULL; - total_ctr += total_offset; - - ctr_low = do_div(total_ctr, 1000000000); - - ret |= dwmac5_est_write(ioaddr, CTR_LOW, ctr_low, false); - ret |= dwmac5_est_write(ioaddr, CTR_HIGH, total_ctr, false); - if (ret) - return ret; - ctrl = readl(ioaddr + MTL_EST_CONTROL); ctrl &= ~PTOV; ctrl |= ((1000000000 / ptp_rate) * 6) << PTOV_SHIFT; -- GitLab From fe28c53ed71d463e187748b6b10e1130dd72ceeb Mon Sep 17 00:00:00 2001 From: Yannick Vignon Date: Wed, 13 Jan 2021 14:15:57 +0100 Subject: [PATCH 0764/2012] net: stmmac: fix taprio configuration when base_time is in the past The Synopsys TSN MAC supports Qbv base times in the past, but only up to a certain limit. As a result, a taprio qdisc configuration with a small base time (for example when treating the base time as a simple phase offset) is not applied by the hardware and silently ignored. This was observed on an NXP i.MX8MPlus device, but likely affects all TSN-variants of the MAC. Fix the issue by making sure the base time is in the future, pushing it by an integer amount of cycle times if needed. (a similar check is already done in several other taprio implementations, see for example drivers/net/ethernet/intel/igc/igc_tsn.c#L116 or drivers/net/dsa/sja1105/sja1105_ptp.h#L39). Fixes: b60189e0392f ("net: stmmac: Integrate EST with TAPRIO scheduler API") Signed-off-by: Yannick Vignon Link: https://lore.kernel.org/r/20210113131557.24651-2-yannick.vignon@oss.nxp.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_tc.c | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index f5bed4d26e804..8ed3b2c834a09 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -599,7 +599,8 @@ static int tc_setup_taprio(struct stmmac_priv *priv, { u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep; struct plat_stmmacenet_data *plat = priv->plat; - struct timespec64 time; + struct timespec64 time, current_time; + ktime_t current_time_ns; bool fpe = false; int i, ret = 0; u64 ctr; @@ -694,7 +695,22 @@ static int tc_setup_taprio(struct stmmac_priv *priv, } /* Adjust for real system time */ - time = ktime_to_timespec64(qopt->base_time); + priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); + current_time_ns = timespec64_to_ktime(current_time); + if (ktime_after(qopt->base_time, current_time_ns)) { + time = ktime_to_timespec64(qopt->base_time); + } else { + ktime_t base_time; + s64 n; + + n = div64_s64(ktime_sub_ns(current_time_ns, qopt->base_time), + qopt->cycle_time); + base_time = ktime_add_ns(qopt->base_time, + (n + 1) * qopt->cycle_time); + + time = ktime_to_timespec64(base_time); + } + priv->plat->est->btr[0] = (u32)time.tv_nsec; priv->plat->est->btr[1] = (u32)time.tv_sec; -- GitLab From 7da17624e7948d5d9660b910f8079d26d26ce453 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 13 Jan 2021 15:43:09 +0100 Subject: [PATCH 0765/2012] nt: usb: USB_RTL8153_ECM should not default to y In general, device drivers should not be enabled by default. Fixes: 657bc1d10bfc23ac ("r8153_ecm: avoid to be prior to r8152 driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20210113144309.1384615-1-geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- drivers/net/usb/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 1e37190287808..fbbe786436319 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -631,7 +631,6 @@ config USB_NET_AQC111 config USB_RTL8153_ECM tristate "RTL8153 ECM support" depends on USB_NET_CDCETHER && (USB_RTL8152 || USB_RTL8152=n) - default y help This option supports ECM mode for RTL8153 ethernet adapter, when CONFIG_USB_RTL8152 is not set, or the RTL8153 device is not -- GitLab From 3226b158e67cfaa677fd180152bfb28989cb2fac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 13 Jan 2021 08:18:19 -0800 Subject: [PATCH 0766/2012] net: avoid 32 x truesize under-estimation for tiny skbs Both virtio net and napi_get_frags() allocate skbs with a very small skb->head While using page fragments instead of a kmalloc backed skb->head might give a small performance improvement in some cases, there is a huge risk of under estimating memory usage. For both GOOD_COPY_LEN and GRO_MAX_HEAD, we can fit at least 32 allocations per page (order-3 page in x86), or even 64 on PowerPC We have been tracking OOM issues on GKE hosts hitting tcp_mem limits but consuming far more memory for TCP buffers than instructed in tcp_mem[2] Even if we force napi_alloc_skb() to only use order-0 pages, the issue would still be there on arches with PAGE_SIZE >= 32768 This patch makes sure that small skb head are kmalloc backed, so that other objects in the slab page can be reused instead of being held as long as skbs are sitting in socket queues. Note that we might in the future use the sk_buff napi cache, instead of going through a more expensive __alloc_skb() Another idea would be to use separate page sizes depending on the allocated length (to never have more than 4 frags per page) I would like to thank Greg Thelen for his precious help on this matter, analysing crash dumps is always a time consuming task. Fixes: fd11a83dd363 ("net: Pull out core bits of __netdev_alloc_skb and add __napi_alloc_skb") Signed-off-by: Eric Dumazet Cc: Paolo Abeni Cc: Greg Thelen Reviewed-by: Alexander Duyck Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20210113161819.1155526-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0da035c1e53f1..c1a6f262636a1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -501,13 +501,17 @@ EXPORT_SYMBOL(__netdev_alloc_skb); struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, gfp_t gfp_mask) { - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); + struct napi_alloc_cache *nc; struct sk_buff *skb; void *data; len += NET_SKB_PAD + NET_IP_ALIGN; - if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || + /* If requested length is either too small or too big, + * we use kmalloc() for skb->head allocation. + */ + if (len <= SKB_WITH_OVERHEAD(1024) || + len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); if (!skb) @@ -515,6 +519,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, goto skb_success; } + nc = this_cpu_ptr(&napi_alloc_cache); len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); len = SKB_DATA_ALIGN(len); -- GitLab From 93089de91e85743942a5f804850d4f0846e5402b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:06 -0800 Subject: [PATCH 0767/2012] MAINTAINERS: altx: move Jay Cliburn to CREDITS Jay was not active in recent years and does not have plans to return to work on ATLX drivers. Subsystem ATLX ETHERNET DRIVERS Changes 20 / 116 (17%) Last activity: 2020-02-24 Jay Cliburn : Chris Snook : Tags ea973742140b 2020-02-24 00:00:00 1 Top reviewers: [4]: andrew@lunn.ch [2]: kuba@kernel.org [2]: o.rempel@pengutronix.de INACTIVE MAINTAINER Jay Cliburn Acked-by: Chris Snook Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 090ed4b004a5b..59a704a45170d 100644 --- a/CREDITS +++ b/CREDITS @@ -710,6 +710,10 @@ S: Las Cuevas 2385 - Bo Guemes S: Las Heras, Mendoza CP 5539 S: Argentina +N: Jay Cliburn +E: jcliburn@gmail.com +D: ATLX Ethernet drivers + N: Steven P. Cole E: scole@lanl.gov E: elenstev@mesatop.com diff --git a/MAINTAINERS b/MAINTAINERS index b15514a770e3c..57e17762d4110 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2942,7 +2942,6 @@ S: Maintained F: drivers/hwmon/asus_atk0110.c ATLX ETHERNET DRIVERS -M: Jay Cliburn M: Chris Snook L: netdev@vger.kernel.org S: Maintained -- GitLab From 09cd3f4683a901d572ad17f0564cc9e3e989f0f4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:07 -0800 Subject: [PATCH 0768/2012] MAINTAINERS: net: move Alexey Kuznetsov to CREDITS Move Alexey to CREDITS. I am probably not giving him enough justice with the description line.. Subsystem NETWORKING [IPv4/IPv6] Changes 1535 / 5111 (30%) Last activity: 2020-12-10 "David S. Miller" : Author b7e4ba9a91df 2020-12-09 00:00:00 407 Committer e0fecb289ad3 2020-12-10 00:00:00 3992 Tags e0fecb289ad3 2020-12-10 00:00:00 3978 Alexey Kuznetsov : Hideaki YOSHIFUJI : Tags d5d8760b78d0 2016-06-16 00:00:00 8 Top reviewers: [225]: edumazet@google.com [222]: dsahern@gmail.com [176]: ncardwell@google.com INACTIVE MAINTAINER Alexey Kuznetsov Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 59a704a45170d..3dceea7376947 100644 --- a/CREDITS +++ b/CREDITS @@ -2142,6 +2142,10 @@ E: seasons@falcon.sch.bme.hu E: seasons@makosteszta.sote.hu D: Original author of software suspend +N: Alexey Kuznetsov +E: kuznet@ms2.inr.ac.ru +D: Author and maintainer of large parts of the networking stack + N: Jaroslav Kysela E: perex@perex.cz W: https://www.perex.cz diff --git a/MAINTAINERS b/MAINTAINERS index 57e17762d4110..c6e7f6bf7f6db 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12417,7 +12417,6 @@ F: tools/testing/selftests/net/ipsec.c NETWORKING [IPv4/IPv6] M: "David S. Miller" -M: Alexey Kuznetsov M: Hideaki YOSHIFUJI L: netdev@vger.kernel.org S: Maintained -- GitLab From 5e62d124f75aae0e96fd8a588ad31659a2468710 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:08 -0800 Subject: [PATCH 0769/2012] MAINTAINERS: vrf: move Shrijeet to CREDITS Shrijeet has moved on from VRF-related work. Subsystem VRF Changes 30 / 120 (25%) Last activity: 2020-12-09 David Ahern : Author 1b6687e31a2d 2020-07-23 00:00:00 1 Tags 9125abe7b9cb 2020-12-09 00:00:00 4 Shrijeet Mukherjee : Top reviewers: [13]: dsahern@gmail.com [4]: dsa@cumulusnetworks.com INACTIVE MAINTAINER Shrijeet Mukherjee Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 3dceea7376947..98e7485ec1067 100644 --- a/CREDITS +++ b/CREDITS @@ -2704,6 +2704,10 @@ N: Wolfgang Muees E: wolfgang@iksw-muees.de D: Auerswald USB driver +N: Shrijeet Mukherjee +E: shrijeet@gmail.com +D: Network routing domains (VRF). + N: Paul Mundt E: paul.mundt@gmail.com D: SuperH maintainer diff --git a/MAINTAINERS b/MAINTAINERS index c6e7f6bf7f6db..a06faf9e20184 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19056,7 +19056,6 @@ K: regulator_get_optional VRF M: David Ahern -M: Shrijeet Mukherjee L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/vrf.rst -- GitLab From c41efbf2ad56280762d19a531eb7edbf2e6a9f84 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:09 -0800 Subject: [PATCH 0770/2012] MAINTAINERS: ena: remove Zorik Machulsky from reviewers While ENA has 3 reviewers and 2 maintainers, we mostly see review tags and comments from the maintainers. While we very much appreciate Zorik's invovment in the community let's trim the reviewer list down to folks we've seen tags from. Subsystem AMAZON ETHERNET DRIVERS Changes 13 / 269 (4%) Last activity: 2020-11-24 Netanel Belgazal : Author 24dee0c7478d 2019-12-10 00:00:00 43 Tags 0e3a3f6dacf0 2020-07-21 00:00:00 47 Arthur Kiyanovski : Author 0e3a3f6dacf0 2020-07-21 00:00:00 79 Tags 09323b3bca95 2020-11-24 00:00:00 104 Guy Tzalik : Tags 713865da3c62 2020-09-10 00:00:00 3 Saeed Bishara : Tags 470793a78ce3 2020-02-11 00:00:00 2 Zorik Machulsky : Top reviewers: [4]: sameehj@amazon.com [3]: snelson@pensando.io [3]: shayagr@amazon.com INACTIVE MAINTAINER Zorik Machulsky Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a06faf9e20184..64dd19dfc9c3b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -820,7 +820,6 @@ M: Netanel Belgazal M: Arthur Kiyanovski R: Guy Tzalik R: Saeed Bishara -R: Zorik Machulsky L: netdev@vger.kernel.org S: Supported F: Documentation/networking/device_drivers/ethernet/amazon/ena.rst -- GitLab From 0e4ed0b62b5a1f60b72ab7aaa29efd735d4cb6a6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:10 -0800 Subject: [PATCH 0771/2012] MAINTAINERS: tls: move Aviad to CREDITS Aviad wrote parts of the initial TLS implementation but hasn't been contributing to TLS since. Subsystem NETWORKING [TLS] Changes 123 / 308 (39%) Last activity: 2020-12-01 Boris Pismenny : Tags 138559b9f99d 2020-11-17 00:00:00 1 Aviad Yehezkel : John Fastabend : Author e91de6afa81c 2020-06-01 00:00:00 22 Tags e91de6afa81c 2020-06-01 00:00:00 29 Daniel Borkmann : Author c16ee04c9b30 2018-10-20 00:00:00 7 Committer b8e202d1d1d0 2020-02-21 00:00:00 19 Tags b8e202d1d1d0 2020-02-21 00:00:00 28 Jakub Kicinski : Author 5c39f26e67c9 2020-11-27 00:00:00 89 Committer d31c08007523 2020-12-01 00:00:00 15 Tags d31c08007523 2020-12-01 00:00:00 117 Top reviewers: [50]: dirk.vandermerwe@netronome.com [26]: simon.horman@netronome.com [14]: john.hurley@netronome.com INACTIVE MAINTAINER Aviad Yehezkel Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 98e7485ec1067..90384691876c5 100644 --- a/CREDITS +++ b/CREDITS @@ -4122,6 +4122,10 @@ S: B-1206 Jingmao Guojigongyu S: 16 Baliqiao Nanjie, Beijing 101100 S: People's Repulic of China +N: Aviad Yehezkel +E: aviadye@nvidia.com +D: Kernel TLS implementation and offload support. + N: Victor Yodaiken E: yodaiken@fsmlabs.com D: RTLinux (RealTime Linux) diff --git a/MAINTAINERS b/MAINTAINERS index 64dd19dfc9c3b..92fdc134ca14f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12472,7 +12472,6 @@ F: net/ipv6/tcp*.c NETWORKING [TLS] M: Boris Pismenny -M: Aviad Yehezkel M: John Fastabend M: Daniel Borkmann M: Jakub Kicinski -- GitLab From 4f3786e011940d83d7a9c365730936db96a0b233 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:11 -0800 Subject: [PATCH 0772/2012] MAINTAINERS: ipvs: move Wensong Zhang to CREDITS Move Wensong Zhang to credits, we haven't heard from him in years. Subsystem IPVS Changes 83 / 226 (36%) Last activity: 2020-11-27 Wensong Zhang : Simon Horman : Committer c24b75e0f923 2019-10-24 00:00:00 33 Tags 7980d2eabde8 2020-10-12 00:00:00 76 Julian Anastasov : Author 7980d2eabde8 2020-10-12 00:00:00 26 Tags 4bc3c8dc9f5f 2020-11-27 00:00:00 78 Top reviewers: [6]: horms+renesas@verge.net.au INACTIVE MAINTAINER Wensong Zhang Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CREDITS b/CREDITS index 90384691876c5..ce8eae8c5aa48 100644 --- a/CREDITS +++ b/CREDITS @@ -4183,6 +4183,10 @@ S: 1507 145th Place SE #B5 S: Bellevue, Washington 98007 S: USA +N: Wensong Zhang +E: wensong@linux-vs.org +D: IP virtual server (IPVS). + N: Haojian Zhuang E: haojian.zhuang@gmail.com D: MMP support diff --git a/MAINTAINERS b/MAINTAINERS index 92fdc134ca14f..18e75e29c6729 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9325,7 +9325,6 @@ W: http://www.adaptec.com/ F: drivers/scsi/ips* IPVS -M: Wensong Zhang M: Simon Horman M: Julian Anastasov L: netdev@vger.kernel.org -- GitLab From 054c4610bd05e7bf677efefa880da2da340599fc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 13 Jan 2021 17:49:12 -0800 Subject: [PATCH 0773/2012] MAINTAINERS: dccp: move Gerrit Renker to CREDITS As far as I can tell we haven't heard from Gerrit for roughly 5 years now. DCCP patch would really benefit from some review. Gerrit was the last maintainer so mark this entry as orphaned. Subsystem DCCP PROTOCOL Changes 38 / 166 (22%) (No activity) Top reviewers: [6]: kstewart@linuxfoundation.org [6]: allison@lohutok.net [5]: edumazet@google.com INACTIVE MAINTAINER Gerrit Renker Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CREDITS b/CREDITS index ce8eae8c5aa48..9add7e6a4fa02 100644 --- a/CREDITS +++ b/CREDITS @@ -1288,6 +1288,10 @@ D: Major kbuild rework during the 2.5 cycle D: ISDN Maintainer S: USA +N: Gerrit Renker +E: gerrit@erg.abdn.ac.uk +D: DCCP protocol support. + N: Philip Gladstone E: philip@gladstonefamily.net D: Kernel / timekeeping stuff diff --git a/MAINTAINERS b/MAINTAINERS index 18e75e29c6729..2a6dc5bfa08c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4920,9 +4920,8 @@ F: Documentation/scsi/dc395x.rst F: drivers/scsi/dc395x.* DCCP PROTOCOL -M: Gerrit Renker L: dccp@vger.kernel.org -S: Maintained +S: Orphan W: http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp F: include/linux/dccp.h F: include/linux/tfrc.h -- GitLab From 25537d71e2d007faf42a244a75e5a2bb7c356234 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 14 Jan 2021 17:12:15 +0200 Subject: [PATCH 0774/2012] net: Allow NETIF_F_HW_TLS_TX if IP_CSUM && IPV6_CSUM Cited patch below blocked the TLS TX device offload unless HW_CSUM is set. This broke devices that use IP_CSUM && IP6_CSUM. Here we fix it. Note that the single HW_TLS_TX feature flag indicates support for both IPv4/6, hence it should still be disabled in case only one of (IP_CSUM | IPV6_CSUM) is set. Fixes: ae0b04b238e2 ("net: Disable NETIF_F_HW_TLS_TX when HW_CSUM is disabled") Signed-off-by: Tariq Toukan Reported-by: Rohit Maheshwari Reviewed-by: Maxim Mikityanskiy Link: https://lore.kernel.org/r/20210114151215.7061-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/networking/tls-offload.rst | 2 +- net/core/dev.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst index 0f55c6d540f97..9af3334d9ad08 100644 --- a/Documentation/networking/tls-offload.rst +++ b/Documentation/networking/tls-offload.rst @@ -530,7 +530,7 @@ TLS device feature flags only control adding of new TLS connection offloads, old connections will remain active after flags are cleared. TLS encryption cannot be offloaded to devices without checksum calculation -offload. Hence, TLS TX device feature flag requires NETIF_F_HW_CSUM being set. +offload. Hence, TLS TX device feature flag requires TX csum offload being set. Disabling the latter implies clearing the former. Disabling TX checksum offload should not affect old connections, and drivers should make sure checksum calculation does not break for them. diff --git a/net/core/dev.c b/net/core/dev.c index 0071a11a6dc3c..c360bb5367e24 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9661,9 +9661,15 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } - if ((features & NETIF_F_HW_TLS_TX) && !(features & NETIF_F_HW_CSUM)) { - netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n"); - features &= ~NETIF_F_HW_TLS_TX; + if (features & NETIF_F_HW_TLS_TX) { + bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) == + (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + bool hw_csum = features & NETIF_F_HW_CSUM; + + if (!ip_csum && !hw_csum) { + netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n"); + features &= ~NETIF_F_HW_TLS_TX; + } } return features; -- GitLab From 4369376ba91c97a1b2dd74abeec18c0c0ddf4ac9 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Fri, 8 Jan 2021 16:34:31 +0800 Subject: [PATCH 0775/2012] drm/amdgpu: set power brake sequence Add function to set power brake sequence. Signed-off-by: Likun Gao Reviewed-by: Kenneth Feng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 32 ++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 34141b785aa4c..619d34c041ee0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -164,6 +164,9 @@ #define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid 0x15db #define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX 0 +#define mmGC_THROTTLE_CTRL_Sienna_Cichlid 0x2030 +#define mmGC_THROTTLE_CTRL_Sienna_Cichlid_BASE_IDX 0 + MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi10_me.bin"); @@ -3328,6 +3331,7 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); static u32 gfx_v10_3_get_disabled_sa(struct amdgpu_device *adev); static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev); +static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev); static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -7196,6 +7200,9 @@ static int gfx_v10_0_hw_init(void *handle) if (adev->asic_type == CHIP_SIENNA_CICHLID) gfx_v10_3_program_pbb_mode(adev); + if (adev->asic_type >= CHIP_SIENNA_CICHLID) + gfx_v10_3_set_power_brake_sequence(adev); + return r; } @@ -9181,6 +9188,31 @@ static void gfx_v10_3_program_pbb_mode(struct amdgpu_device *adev) } } +static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev) +{ + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, + (0x1 << GRBM_GFX_INDEX__SA_BROADCAST_WRITES__SHIFT) | + (0x1 << GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES__SHIFT) | + (0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT)); + + WREG32_SOC15(GC, 0, mmGC_CAC_IND_INDEX, ixPWRBRK_STALL_PATTERN_CTRL); + WREG32_SOC15(GC, 0, mmGC_CAC_IND_DATA, + (0x1 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_STEP_INTERVAL__SHIFT) | + (0x12 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_BEGIN_STEP__SHIFT) | + (0x13 << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_END_STEP__SHIFT) | + (0xf << PWRBRK_STALL_PATTERN_CTRL__PWRBRK_THROTTLE_PATTERN_BIT_NUMS__SHIFT)); + + WREG32_SOC15(GC, 0, mmGC_THROTTLE_CTRL_Sienna_Cichlid, + (0x1 << GC_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT) | + (0x1 << GC_THROTTLE_CTRL__PATTERN_MODE__SHIFT) | + (0x5 << GC_THROTTLE_CTRL__RELEASE_STEP_INTERVAL__SHIFT)); + + WREG32_SOC15(GC, 0, mmDIDT_IND_INDEX, ixDIDT_SQ_THROTTLE_CTRL); + + WREG32_SOC15(GC, 0, mmDIDT_IND_DATA, + (0x1 << DIDT_SQ_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT)); +} + const struct amdgpu_ip_block_version gfx_v10_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, -- GitLab From 55df908bd663ead7d85bd64dd49562d5ac3889ef Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 7 Jan 2021 14:50:51 -0500 Subject: [PATCH 0776/2012] Revert "drm/amd/display: Fix unused variable warning" This reverts commit f01afd1ee48816457fb22e201f1d0cfb14589904. Cc: Wayne Lin Cc: Alexander Deucher Cc: Harry Wentland Cc: Roman Li Cc: Bindu R Cc: Daniel Vetter Acked-by: Daniel Vetter Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +++- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 146486071d01d..8fac6116591bf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8378,7 +8378,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) acrtc->dm_irq_params.stream = dm_new_crtc_state->stream; manage_dm_interrupts(adev, acrtc, true); } - if (IS_ENABLED(CONFIG_DEBUG_FS) && new_crtc_state->active && +#ifdef CONFIG_DEBUG_FS + if (new_crtc_state->active && amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) { /** * Frontend may have changed so reapply the CRC capture @@ -8399,6 +8400,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) amdgpu_dm_crtc_configure_crc_source( crtc, dm_new_crtc_state, dm_new_crtc_state->crc_src); } +#endif } for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index eba2f1d35d075..0235bfb246e5d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -46,13 +46,13 @@ static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source } /* amdgpu_dm_crc.c */ +#ifdef CONFIG_DEBUG_FS bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state); bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state, struct dm_crtc_state *dm_old_crtc_state); int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, struct dm_crtc_state *dm_crtc_state, enum amdgpu_dm_pipe_crc_source source); -#ifdef CONFIG_DEBUG_FS int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name); int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, const char *src_name, -- GitLab From 3c517ca5212faab4604e1725b4d31e290945ff87 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 7 Jan 2021 14:51:49 -0500 Subject: [PATCH 0777/2012] Revert "drm/amdgpu/disply: fix documentation warnings in display manager" This reverts commit 6ae09fa49147e557eb6aebbb5b2059b63706d454. Cc: Wayne Lin Cc: Alexander Deucher Cc: Harry Wentland Cc: Roman Li Cc: Bindu R Cc: Daniel Vetter Acked-by: Daniel Vetter Acked-by: Alex Deucher Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 21 +------------------ 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 2ee6edb3df931..0b31779a04856 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -337,29 +337,10 @@ struct amdgpu_display_manager { const struct gpu_info_soc_bounding_box_v1_0 *soc_bounding_box; #ifdef CONFIG_DEBUG_FS - /** - * @crc_win_x_start_property: - * - * X start of the crc calculation window - */ + /* set the crc calculation window*/ struct drm_property *crc_win_x_start_property; - /** - * @crc_win_y_start_property: - * - * Y start of the crc calculation window - */ struct drm_property *crc_win_y_start_property; - /** - * @crc_win_x_end_property: - * - * X end of the crc calculation window - */ struct drm_property *crc_win_x_end_property; - /** - * @crc_win_y_end_property: - * - * Y end of the crc calculation window - */ struct drm_property *crc_win_y_end_property; #endif /** -- GitLab From a7ddd22151fc2910c7b2faad64680cc2bb699b03 Mon Sep 17 00:00:00 2001 From: Rodrigo Siqueira Date: Thu, 7 Jan 2021 15:09:30 -0500 Subject: [PATCH 0778/2012] Revert "drm/amd/display: Expose new CRC window property" This reverts commit c920888c604d72799d057bbcd9e28a6c003ccfbe. Cc: Wayne Lin Cc: Alexander Deucher Cc: Harry Wentland Cc: Roman Li Cc: Bindu R Cc: Daniel Vetter Acked-by: Daniel Vetter Acked-by: Alex Deucher Reviewed-by: Wayne Lin Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 142 +----------------- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 19 --- .../drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 56 +------ .../drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h | 3 - 4 files changed, 10 insertions(+), 210 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 8fac6116591bf..c6da89df055de 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -939,41 +939,6 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_ } #endif -#ifdef CONFIG_DEBUG_FS -static int create_crtc_crc_properties(struct amdgpu_display_manager *dm) -{ - dm->crc_win_x_start_property = - drm_property_create_range(adev_to_drm(dm->adev), - DRM_MODE_PROP_ATOMIC, - "AMD_CRC_WIN_X_START", 0, U16_MAX); - if (!dm->crc_win_x_start_property) - return -ENOMEM; - - dm->crc_win_y_start_property = - drm_property_create_range(adev_to_drm(dm->adev), - DRM_MODE_PROP_ATOMIC, - "AMD_CRC_WIN_Y_START", 0, U16_MAX); - if (!dm->crc_win_y_start_property) - return -ENOMEM; - - dm->crc_win_x_end_property = - drm_property_create_range(adev_to_drm(dm->adev), - DRM_MODE_PROP_ATOMIC, - "AMD_CRC_WIN_X_END", 0, U16_MAX); - if (!dm->crc_win_x_end_property) - return -ENOMEM; - - dm->crc_win_y_end_property = - drm_property_create_range(adev_to_drm(dm->adev), - DRM_MODE_PROP_ATOMIC, - "AMD_CRC_WIN_Y_END", 0, U16_MAX); - if (!dm->crc_win_y_end_property) - return -ENOMEM; - - return 0; -} -#endif - static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; @@ -1120,10 +1085,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) dc_init_callbacks(adev->dm.dc, &init_params); } -#endif -#ifdef CONFIG_DEBUG_FS - if (create_crtc_crc_properties(&adev->dm)) - DRM_ERROR("amdgpu: failed to create crc property.\n"); #endif if (amdgpu_dm_initialize_drm_device(adev)) { DRM_ERROR( @@ -5333,64 +5294,12 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc) state->crc_src = cur->crc_src; state->cm_has_degamma = cur->cm_has_degamma; state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; -#ifdef CONFIG_DEBUG_FS - state->crc_window = cur->crc_window; -#endif + /* TODO Duplicate dc_stream after objects are stream object is flattened */ return &state->base; } -#ifdef CONFIG_DEBUG_FS -static int amdgpu_dm_crtc_atomic_set_property(struct drm_crtc *crtc, - struct drm_crtc_state *crtc_state, - struct drm_property *property, - uint64_t val) -{ - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct dm_crtc_state *dm_new_state = - to_dm_crtc_state(crtc_state); - - if (property == adev->dm.crc_win_x_start_property) - dm_new_state->crc_window.x_start = val; - else if (property == adev->dm.crc_win_y_start_property) - dm_new_state->crc_window.y_start = val; - else if (property == adev->dm.crc_win_x_end_property) - dm_new_state->crc_window.x_end = val; - else if (property == adev->dm.crc_win_y_end_property) - dm_new_state->crc_window.y_end = val; - else - return -EINVAL; - - return 0; -} - -static int amdgpu_dm_crtc_atomic_get_property(struct drm_crtc *crtc, - const struct drm_crtc_state *state, - struct drm_property *property, - uint64_t *val) -{ - struct drm_device *dev = crtc->dev; - struct amdgpu_device *adev = drm_to_adev(dev); - struct dm_crtc_state *dm_state = - to_dm_crtc_state(state); - - if (property == adev->dm.crc_win_x_start_property) - *val = dm_state->crc_window.x_start; - else if (property == adev->dm.crc_win_y_start_property) - *val = dm_state->crc_window.y_start; - else if (property == adev->dm.crc_win_x_end_property) - *val = dm_state->crc_window.x_end; - else if (property == adev->dm.crc_win_y_end_property) - *val = dm_state->crc_window.y_end; - else - return -EINVAL; - - return 0; -} -#endif - static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable) { enum dc_irq_source irq_source; @@ -5457,10 +5366,6 @@ static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = { .enable_vblank = dm_enable_vblank, .disable_vblank = dm_disable_vblank, .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, -#ifdef CONFIG_DEBUG_FS - .atomic_set_property = amdgpu_dm_crtc_atomic_set_property, - .atomic_get_property = amdgpu_dm_crtc_atomic_get_property, -#endif }; static enum drm_connector_status @@ -6662,25 +6567,6 @@ static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, return 0; } -#ifdef CONFIG_DEBUG_FS -static void attach_crtc_crc_properties(struct amdgpu_display_manager *dm, - struct amdgpu_crtc *acrtc) -{ - drm_object_attach_property(&acrtc->base.base, - dm->crc_win_x_start_property, - 0); - drm_object_attach_property(&acrtc->base.base, - dm->crc_win_y_start_property, - 0); - drm_object_attach_property(&acrtc->base.base, - dm->crc_win_x_end_property, - 0); - drm_object_attach_property(&acrtc->base.base, - dm->crc_win_y_end_property, - 0); -} -#endif - static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, struct drm_plane *plane, uint32_t crtc_index) @@ -6728,9 +6614,7 @@ static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES, true, MAX_COLOR_LUT_ENTRIES); drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); -#ifdef CONFIG_DEBUG_FS - attach_crtc_crc_properties(dm, acrtc); -#endif + return 0; fail: @@ -8367,7 +8251,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) */ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); - bool configure_crc = false; dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); @@ -8377,30 +8260,21 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) dc_stream_retain(dm_new_crtc_state->stream); acrtc->dm_irq_params.stream = dm_new_crtc_state->stream; manage_dm_interrupts(adev, acrtc, true); - } + #ifdef CONFIG_DEBUG_FS - if (new_crtc_state->active && - amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) { /** * Frontend may have changed so reapply the CRC capture * settings for the stream. */ dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); - dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); - - if (amdgpu_dm_crc_window_is_default(dm_new_crtc_state)) { - if (!old_crtc_state->active || drm_atomic_crtc_needs_modeset(new_crtc_state)) - configure_crc = true; - } else { - if (amdgpu_dm_crc_window_changed(dm_new_crtc_state, dm_old_crtc_state)) - configure_crc = true; - } - if (configure_crc) + if (amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) { amdgpu_dm_crtc_configure_crc_source( - crtc, dm_new_crtc_state, dm_new_crtc_state->crc_src); - } + crtc, dm_new_crtc_state, + dm_new_crtc_state->crc_src); + } #endif + } } for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 0b31779a04856..1182dafcef022 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -336,13 +336,6 @@ struct amdgpu_display_manager { */ const struct gpu_info_soc_bounding_box_v1_0 *soc_bounding_box; -#ifdef CONFIG_DEBUG_FS - /* set the crc calculation window*/ - struct drm_property *crc_win_x_start_property; - struct drm_property *crc_win_y_start_property; - struct drm_property *crc_win_x_end_property; - struct drm_property *crc_win_y_end_property; -#endif /** * @mst_encoders: * @@ -429,15 +422,6 @@ struct dm_plane_state { struct dc_plane_state *dc_state; }; -#ifdef CONFIG_DEBUG_FS -struct crc_rec { - uint16_t x_start; - uint16_t y_start; - uint16_t x_end; - uint16_t y_end; - }; -#endif - struct dm_crtc_state { struct drm_crtc_state base; struct dc_stream_state *stream; @@ -460,9 +444,6 @@ struct dm_crtc_state { struct dc_info_packet vrr_infopacket; int abm_level; -#ifdef CONFIG_DEBUG_FS - struct crc_rec crc_window; -#endif }; #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index 7b886a779a8ca..c29dc11619f79 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -81,41 +81,6 @@ const char *const *amdgpu_dm_crtc_get_crc_sources(struct drm_crtc *crtc, return pipe_crc_sources; } -static void amdgpu_dm_set_crc_window_default(struct dm_crtc_state *dm_crtc_state) -{ - dm_crtc_state->crc_window.x_start = 0; - dm_crtc_state->crc_window.y_start = 0; - dm_crtc_state->crc_window.x_end = 0; - dm_crtc_state->crc_window.y_end = 0; -} - -bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state) -{ - bool ret = true; - - if ((dm_crtc_state->crc_window.x_start != 0) || - (dm_crtc_state->crc_window.y_start != 0) || - (dm_crtc_state->crc_window.x_end != 0) || - (dm_crtc_state->crc_window.y_end != 0)) - ret = false; - - return ret; -} - -bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state, - struct dm_crtc_state *dm_old_crtc_state) -{ - bool ret = false; - - if ((dm_new_crtc_state->crc_window.x_start != dm_old_crtc_state->crc_window.x_start) || - (dm_new_crtc_state->crc_window.y_start != dm_old_crtc_state->crc_window.y_start) || - (dm_new_crtc_state->crc_window.x_end != dm_old_crtc_state->crc_window.x_end) || - (dm_new_crtc_state->crc_window.y_end != dm_old_crtc_state->crc_window.y_end)) - ret = true; - - return ret; -} - int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, const char *src_name, size_t *values_cnt) @@ -140,7 +105,6 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, struct dc_stream_state *stream_state = dm_crtc_state->stream; bool enable = amdgpu_dm_is_valid_crc_source(source); int ret = 0; - struct crc_params *crc_window = NULL, tmp_window; /* Configuration will be deferred to stream enable. */ if (!stream_state) @@ -149,25 +113,9 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, mutex_lock(&adev->dm.dc_lock); /* Enable CRTC CRC generation if necessary. */ - if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) { - if (!enable) - amdgpu_dm_set_crc_window_default(dm_crtc_state); - - if (!amdgpu_dm_crc_window_is_default(dm_crtc_state)) { - crc_window = &tmp_window; - - tmp_window.windowa_x_start = dm_crtc_state->crc_window.x_start; - tmp_window.windowa_y_start = dm_crtc_state->crc_window.y_start; - tmp_window.windowa_x_end = dm_crtc_state->crc_window.x_end; - tmp_window.windowa_y_end = dm_crtc_state->crc_window.y_end; - tmp_window.windowb_x_start = dm_crtc_state->crc_window.x_start; - tmp_window.windowb_y_start = dm_crtc_state->crc_window.y_start; - tmp_window.windowb_x_end = dm_crtc_state->crc_window.x_end; - tmp_window.windowb_y_end = dm_crtc_state->crc_window.y_end; - } - + if (dm_is_crc_source_crtc(source)) { if (!dc_stream_configure_crc(stream_state->ctx->dc, - stream_state, crc_window, enable, enable)) { + stream_state, NULL, enable, enable)) { ret = -EINVAL; goto unlock; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index 0235bfb246e5d..f7d731797d3fc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -47,9 +47,6 @@ static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source /* amdgpu_dm_crc.c */ #ifdef CONFIG_DEBUG_FS -bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state); -bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state, - struct dm_crtc_state *dm_old_crtc_state); int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, struct dm_crtc_state *dm_crtc_state, enum amdgpu_dm_pipe_crc_source source); -- GitLab From 2f0fa789f7b9fb022440f8f846cae175233987aa Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Tue, 24 Nov 2020 19:57:03 +0800 Subject: [PATCH 0779/2012] drm/amd/display: Fix to be able to stop crc calculation [Why] Find out when we try to disable CRC calculation, crc generation is still enabled. Main reason is that dc_stream_configure_crc() will never get called when the source is AMDGPU_DM_PIPE_CRC_SOURCE_NONE. [How] Add checking condition that when source is AMDGPU_DM_PIPE_CRC_SOURCE_NONE, we should also call dc_stream_configure_crc() to disable crc calculation. Acked-by: Alex Deucher Reviewed-by: Rodrigo Siqueira Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index c29dc11619f79..66cb8730586b1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -113,7 +113,7 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, mutex_lock(&adev->dm.dc_lock); /* Enable CRTC CRC generation if necessary. */ - if (dm_is_crc_source_crtc(source)) { + if (dm_is_crc_source_crtc(source) || source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE) { if (!dc_stream_configure_crc(stream_state->ctx->dc, stream_state, NULL, enable, enable)) { ret = -EINVAL; -- GitLab From 13a9499e833387fcc7a53915bbe5cddf3c336b59 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 14 Jan 2021 16:37:37 +0100 Subject: [PATCH 0780/2012] mptcp: fix locking in mptcp_disconnect() tcp_disconnect() expects the caller acquires the sock lock, but mptcp_disconnect() is not doing that. Add the missing required lock. Reported-by: Eric Dumazet Fixes: 76e2a55d1625 ("mptcp: better msk-level shutdown.") Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/f818e82b58a556feeb71dcccc8bf1c87aafc6175.1610638176.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 81faeff8f3bb7..f998a077c7dd0 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2646,8 +2646,13 @@ static int mptcp_disconnect(struct sock *sk, int flags) struct mptcp_sock *msk = mptcp_sk(sk); __mptcp_flush_join_list(msk); - mptcp_for_each_subflow(msk, subflow) - tcp_disconnect(mptcp_subflow_tcp_sock(subflow), flags); + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + lock_sock(ssk); + tcp_disconnect(ssk, flags); + release_sock(ssk); + } return 0; } -- GitLab From 7a84665619bb5da8c8b6517157875a1fd7632014 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Sun, 10 Jan 2021 14:09:05 +0200 Subject: [PATCH 0781/2012] nvmet-rdma: Fix NULL deref when setting pi_enable and traddr INADDR_ANY When setting port traddr to INADDR_ANY, the listening cm_id->device is NULL. The associate IB device is known only when a connect request event arrives, so checking T10-PI device capability should be done at this stage. Fixes: b09160c3996c ("nvmet-rdma: add metadata/T10-PI support") Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index bdfc22eb2a10f..06b6b742bb213 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1220,6 +1220,14 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) } ndev->inline_data_size = nport->inline_data_size; ndev->inline_page_count = inline_page_count; + + if (nport->pi_enable && !(cm_id->device->attrs.device_cap_flags & + IB_DEVICE_INTEGRITY_HANDOVER)) { + pr_warn("T10-PI is not supported by device %s. Disabling it\n", + cm_id->device->name); + nport->pi_enable = false; + } + ndev->device = cm_id->device; kref_init(&ndev->ref); @@ -1855,14 +1863,6 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) goto out_destroy_id; } - if (port->nport->pi_enable && - !(cm_id->device->attrs.device_cap_flags & - IB_DEVICE_INTEGRITY_HANDOVER)) { - pr_err("T10-PI is not supported for %pISpcs\n", addr); - ret = -EINVAL; - goto out_destroy_id; - } - port->cm_id = cm_id; return 0; -- GitLab From ada831772188192243f9ea437c46e37e97a5975d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Jan 2021 14:03:04 -0800 Subject: [PATCH 0782/2012] nvme-tcp: Fix warning with CONFIG_DEBUG_PREEMPT We shouldn't call smp_processor_id() in a preemptible context, but this is advisory at best, so instead call __smp_processor_id(). Fixes: db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq context") Reported-by: Or Gerlitz Reported-by: Yi Zhang Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 979ee31b8dd1c..b2e0865785ef4 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -286,7 +286,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, * directly, otherwise queue io_work. Also, only do that if we * are on the same cpu, so we don't introduce contention. */ - if (queue->io_cpu == smp_processor_id() && + if (queue->io_cpu == __smp_processor_id() && sync && empty && mutex_trylock(&queue->send_mutex)) { queue->more_requests = !last; nvme_tcp_send_all(queue); -- GitLab From ca1ff67d0fb14f39cf0cc5102b1fbcc3b14f6fb9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Jan 2021 13:56:57 -0800 Subject: [PATCH 0783/2012] nvme-tcp: fix possible data corruption with bio merges When a bio merges, we can get a request that spans multiple bios, and the overall request payload size is the sum of all bios. When we calculate how much we need to send from the existing bio (and bvec), we did not take into account the iov_iter byte count cap. Since multipage bvecs support, bvecs can split in the middle which means that when we account for the last bvec send we should also take the iov_iter byte count cap as it might be lower than the last bvec size. Reported-by: Hao Wang Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Tested-by: Hao Wang Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b2e0865785ef4..216619926563e 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -201,7 +201,7 @@ static inline size_t nvme_tcp_req_cur_offset(struct nvme_tcp_request *req) static inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req) { - return min_t(size_t, req->iter.bvec->bv_len - req->iter.iov_offset, + return min_t(size_t, iov_iter_single_seg_count(&req->iter), req->pdu_len - req->pdu_sent); } -- GitLab From 5ab25a32cd90ce561ac28b9302766e565d61304c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 13 Jan 2021 16:00:22 -0800 Subject: [PATCH 0784/2012] nvme: don't intialize hwmon for discovery controllers Discovery controllers usually don't support smart log page command. So when we connect to the discovery controller we see this warning: nvme nvme0: Failed to read smart log (error 24577) nvme nvme0: new ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery", addr 192.168.123.1:8009 nvme nvme0: Removing ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery" Introduce a new helper to understand if the controller is a discovery controller and use this helper to skip nvme_init_hwmon (also use it in other places that we check if the controller is a discovery controller). Fixes: 400b6a7b13a3 ("nvme: Add hardware monitoring support") Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f320273fc6727..200bdd672c281 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2856,6 +2856,11 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = { NULL, }; +static inline bool nvme_discovery_ctrl(struct nvme_ctrl *ctrl) +{ + return ctrl->opts && ctrl->opts->discovery_nqn; +} + static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { @@ -2875,7 +2880,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, } if ((id->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || - (ctrl->opts && ctrl->opts->discovery_nqn)) + nvme_discovery_ctrl(ctrl)) continue; dev_err(ctrl->device, @@ -3144,7 +3149,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) goto out_free; } - if (!ctrl->opts->discovery_nqn && !ctrl->kas) { + if (!nvme_discovery_ctrl(ctrl) && !ctrl->kas) { dev_err(ctrl->device, "keep-alive support is mandatory for fabrics\n"); ret = -EINVAL; @@ -3184,7 +3189,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret < 0) return ret; - if (!ctrl->identified) { + if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) { ret = nvme_hwmon_init(ctrl); if (ret < 0) return ret; -- GitLab From a06b63a1200bd40fd20fa695739e479e2b2ae948 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 1 Dec 2020 10:03:28 +0300 Subject: [PATCH 0785/2012] iio: sx9310: Off by one in sx9310_read_thresh() This > should be >= to prevent reading one element beyond the end of the sx9310_pthresh_codes[] array. Fixes: ad2b473e2ba3 ("iio: sx9310: Support setting proximity thresholds") Signed-off-by: Dan Carpenter Reviewed-by: Stephen Boyd Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/X8XqwK0z//8sSWJR@mwanda Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/sx9310.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/proximity/sx9310.c b/drivers/iio/proximity/sx9310.c index a2f820997afc2..62eacb22e9bcb 100644 --- a/drivers/iio/proximity/sx9310.c +++ b/drivers/iio/proximity/sx9310.c @@ -601,7 +601,7 @@ static int sx9310_read_thresh(struct sx9310_data *data, return ret; regval = FIELD_GET(SX9310_REG_PROX_CTRL8_9_PTHRESH_MASK, regval); - if (regval > ARRAY_SIZE(sx9310_pthresh_codes)) + if (regval >= ARRAY_SIZE(sx9310_pthresh_codes)) return -EINVAL; *val = sx9310_pthresh_codes[regval]; -- GitLab From b6bc1b4ffad4a55c9461707833dc45de2e4367cc Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 2 Dec 2020 09:35:51 +0100 Subject: [PATCH 0786/2012] dt-bindings: iio: accel: bma255: Fix bmc150/bmi055 compatible The bmc150-accel-i2c.c driver has an "_accel" suffix for the compatibles of BMC150 and BMI055. This is necessary because BMC150 contains both accelerometer (bosch,bmc150_accel) and magnetometer (bosch,bmc150_magn) and therefore "bosch,bmc150" would be ambiguous. However, the binding documentation suggests using "bosch,bmc150". Add the "_accel" suffix for BMC150 and BMI055 so the binding docs match what is expected by the driver. Fixes: 6259551cf19b ("iio: accel: bmc150-accel: Add DT bindings") Signed-off-by: Stephan Gerhold Reviewed-by: Linus Walleij Reviewed-by: Rob Herring Cc: Linus Walleij Link: https://lore.kernel.org/r/20201202083551.7753-1-stephan@gerhold.net Signed-off-by: Jonathan Cameron --- Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml b/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml index 6eef3480ea8fc..c2efbb813ca27 100644 --- a/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml +++ b/Documentation/devicetree/bindings/iio/accel/bosch,bma255.yaml @@ -16,8 +16,8 @@ description: properties: compatible: enum: - - bosch,bmc150 - - bosch,bmi055 + - bosch,bmc150_accel + - bosch,bmi055_accel - bosch,bma255 - bosch,bma250e - bosch,bma222 -- GitLab From 7e6d9788aa02333a4353058816d52b9a90aae0d3 Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Thu, 3 Dec 2020 09:26:50 +0200 Subject: [PATCH 0787/2012] iio: adc: ti_am335x_adc: remove omitted iio_kfifo_free() When the conversion was done to use devm_iio_kfifo_allocate(), a call to iio_kfifo_free() was omitted (to be removed). This change removes it. Fixes: 3c5308058899 ("iio: adc: ti_am335x_adc: alloc kfifo & IRQ via devm_ functions") Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201203072650.24128-1-alexandru.ardelean@analog.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti_am335x_adc.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index b11c8c47ba2aa..e946903b09936 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -397,16 +397,12 @@ static int tiadc_iio_buffered_hardware_setup(struct device *dev, ret = devm_request_threaded_irq(dev, irq, pollfunc_th, pollfunc_bh, flags, indio_dev->name, indio_dev); if (ret) - goto error_kfifo_free; + return ret; indio_dev->setup_ops = setup_ops; indio_dev->modes |= INDIO_BUFFER_SOFTWARE; return 0; - -error_kfifo_free: - iio_kfifo_free(indio_dev->buffer); - return ret; } static const char * const chan_name_ain[] = { -- GitLab From cf5b1385d748b2f91b0c05bb301fcaf9bdbad385 Mon Sep 17 00:00:00 2001 From: Slaveyko Slaveykov Date: Wed, 16 Dec 2020 13:57:20 +0200 Subject: [PATCH 0788/2012] drivers: iio: temperature: Add delay after the addressed reset command in mlx90632.c After an I2C reset command, the mlx90632 needs some time before responding to other I2C commands. Without that delay, there is a chance that the I2C command(s) after the reset will not be accepted. Signed-off-by: Slaveyko Slaveykov Reviewed-by: Andy Shevchenko Reviewed-by: Crt Mori Fixes: e02472f74a81 ("iio:temperature:mlx90632: Adding extended calibration option") Link: https://lore.kernel.org/r/20201216115720.12404-2-sis@melexis.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/mlx90632.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c index 503fe54a0bb93..608ccb1d8bc82 100644 --- a/drivers/iio/temperature/mlx90632.c +++ b/drivers/iio/temperature/mlx90632.c @@ -248,6 +248,12 @@ static int mlx90632_set_meas_type(struct regmap *regmap, u8 type) if (ret < 0) return ret; + /* + * Give the mlx90632 some time to reset properly before sending a new I2C command + * if this is not done, the following I2C command(s) will not be accepted. + */ + usleep_range(150, 200); + ret = regmap_write_bits(regmap, MLX90632_REG_CONTROL, (MLX90632_CFG_MTYP_MASK | MLX90632_CFG_PWR_MASK), (MLX90632_MTYP_STATUS(type) | MLX90632_PWR_STATUS_HALT)); -- GitLab From 49a9565a7a7ce168e3e6482fb24e62d12f72ab81 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Sun, 13 Dec 2020 18:09:27 -0600 Subject: [PATCH 0789/2012] counter:ti-eqep: remove floor The hardware doesn't support this. QPOSINIT is an initialization value that is triggered by other things. When the counter overflows, it always wraps around to zero. Fixes: f213729f6796 "counter: new TI eQEP driver" Signed-off-by: David Lechner Acked-by: William Breathitt Gray Link: https://lore.kernel.org/r/20201214000927.1793062-1-david@lechnology.com Cc: Signed-off-by: Jonathan Cameron --- drivers/counter/ti-eqep.c | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/drivers/counter/ti-eqep.c b/drivers/counter/ti-eqep.c index a60aee1a1a291..65df9ef5b5bc0 100644 --- a/drivers/counter/ti-eqep.c +++ b/drivers/counter/ti-eqep.c @@ -235,36 +235,6 @@ static ssize_t ti_eqep_position_ceiling_write(struct counter_device *counter, return len; } -static ssize_t ti_eqep_position_floor_read(struct counter_device *counter, - struct counter_count *count, - void *ext_priv, char *buf) -{ - struct ti_eqep_cnt *priv = counter->priv; - u32 qposinit; - - regmap_read(priv->regmap32, QPOSINIT, &qposinit); - - return sprintf(buf, "%u\n", qposinit); -} - -static ssize_t ti_eqep_position_floor_write(struct counter_device *counter, - struct counter_count *count, - void *ext_priv, const char *buf, - size_t len) -{ - struct ti_eqep_cnt *priv = counter->priv; - int err; - u32 res; - - err = kstrtouint(buf, 0, &res); - if (err < 0) - return err; - - regmap_write(priv->regmap32, QPOSINIT, res); - - return len; -} - static ssize_t ti_eqep_position_enable_read(struct counter_device *counter, struct counter_count *count, void *ext_priv, char *buf) @@ -301,11 +271,6 @@ static struct counter_count_ext ti_eqep_position_ext[] = { .read = ti_eqep_position_ceiling_read, .write = ti_eqep_position_ceiling_write, }, - { - .name = "floor", - .read = ti_eqep_position_floor_read, - .write = ti_eqep_position_floor_write, - }, { .name = "enable", .read = ti_eqep_position_enable_read, -- GitLab From efd597b2839a9895e8a98fcb0b76d2f545802cd4 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 9 Dec 2020 11:46:49 +0100 Subject: [PATCH 0790/2012] iio: ad5504: Fix setting power-down state The power-down mask of the ad5504 is actually a power-up mask. Meaning if a bit is set the corresponding channel is powered up and if it is not set the channel is powered down. The driver currently has this the wrong way around, resulting in the channel being powered up when requested to be powered down and vice versa. Fixes: 3bbbf150ffde ("staging:iio:dac:ad5504: Use strtobool for boolean values") Signed-off-by: Lars-Peter Clausen Acked-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20201209104649.5794-1-lars@metafoo.de Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/dac/ad5504.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/dac/ad5504.c b/drivers/iio/dac/ad5504.c index 28921b62e6420..e9297c25d4ef6 100644 --- a/drivers/iio/dac/ad5504.c +++ b/drivers/iio/dac/ad5504.c @@ -187,9 +187,9 @@ static ssize_t ad5504_write_dac_powerdown(struct iio_dev *indio_dev, return ret; if (pwr_down) - st->pwr_down_mask |= (1 << chan->channel); - else st->pwr_down_mask &= ~(1 << chan->channel); + else + st->pwr_down_mask |= (1 << chan->channel); ret = ad5504_spi_write(st, AD5504_ADDR_CTRL, AD5504_DAC_PWRDWN_MODE(st->pwr_down_mode) | -- GitLab From 40c48fb79b9798954691f24b8ece1d3a7eb1b353 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 8 Dec 2020 15:36:40 +0100 Subject: [PATCH 0791/2012] iio: common: st_sensors: fix possible infinite loop in st_sensors_irq_thread Return a boolean value in st_sensors_new_samples_available routine in order to avoid an infinite loop in st_sensors_irq_thread if stat_drdy.addr is not defined or stat_drdy read fails Fixes: 90efe05562921 ("iio: st_sensors: harden interrupt handling") Reported-by: Jonathan Cameron Signed-off-by: Lorenzo Bianconi Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/c9ec69ed349e7200c779fd7a5bf04c1aaa2817aa.1607438132.git.lorenzo@kernel.org Cc: Signed-off-by: Jonathan Cameron --- .../common/st_sensors/st_sensors_trigger.c | 31 ++++++++++--------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c index 0507283bd4c1d..2dbd2646e44e9 100644 --- a/drivers/iio/common/st_sensors/st_sensors_trigger.c +++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c @@ -23,35 +23,31 @@ * @sdata: Sensor data. * * returns: - * 0 - no new samples available - * 1 - new samples available - * negative - error or unknown + * false - no new samples available or read error + * true - new samples available */ -static int st_sensors_new_samples_available(struct iio_dev *indio_dev, - struct st_sensor_data *sdata) +static bool st_sensors_new_samples_available(struct iio_dev *indio_dev, + struct st_sensor_data *sdata) { int ret, status; /* How would I know if I can't check it? */ if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr) - return -EINVAL; + return true; /* No scan mask, no interrupt */ if (!indio_dev->active_scan_mask) - return 0; + return false; ret = regmap_read(sdata->regmap, sdata->sensor_settings->drdy_irq.stat_drdy.addr, &status); if (ret < 0) { dev_err(sdata->dev, "error checking samples available\n"); - return ret; + return false; } - if (status & sdata->sensor_settings->drdy_irq.stat_drdy.mask) - return 1; - - return 0; + return !!(status & sdata->sensor_settings->drdy_irq.stat_drdy.mask); } /** @@ -180,9 +176,15 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, /* Tell the interrupt handler that we're dealing with edges */ if (irq_trig == IRQF_TRIGGER_FALLING || - irq_trig == IRQF_TRIGGER_RISING) + irq_trig == IRQF_TRIGGER_RISING) { + if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr) { + dev_err(&indio_dev->dev, + "edge IRQ not supported w/o stat register.\n"); + err = -EOPNOTSUPP; + goto iio_trigger_free; + } sdata->edge_irq = true; - else + } else { /* * If we're not using edges (i.e. level interrupts) we * just mask off the IRQ, handle one interrupt, then @@ -190,6 +192,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev, * interrupt handler top half again and start over. */ irq_trig |= IRQF_ONESHOT; + } /* * If the interrupt pin is Open Drain, by definition this -- GitLab From b8653aff1c8876142f965fc69e12ba217da13182 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 2 Dec 2020 12:02:52 -0800 Subject: [PATCH 0792/2012] iio: sx9310: Fix semtech,avg-pos-strength setting when > 16 This DT property can be 0, 16, and then 64, but not 32. The math here doesn't recognize this slight bump in the power of 2 numbers and translates a DT property of 64 into the register value '3' when it really should be '2'. Fix it by subtracting one more if the number being translated is larger than 31. Also use clamp() because we're here. Cc: Daniel Campello Cc: Lars-Peter Clausen Cc: Peter Meerwald-Stadler Cc: Douglas Anderson Cc: Gwendal Grignou Cc: Evan Green Signed-off-by: Stephen Boyd Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20201202200252.986230-1-swboyd@chromium.org Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/sx9310.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iio/proximity/sx9310.c b/drivers/iio/proximity/sx9310.c index 62eacb22e9bcb..37fd0b65a0140 100644 --- a/drivers/iio/proximity/sx9310.c +++ b/drivers/iio/proximity/sx9310.c @@ -1305,7 +1305,8 @@ sx9310_get_default_reg(struct sx9310_data *data, int i, if (ret) break; - pos = min(max(ilog2(pos), 3), 10) - 3; + /* Powers of 2, except for a gap between 16 and 64 */ + pos = clamp(ilog2(pos), 3, 11) - (pos >= 32 ? 4 : 3); reg_def->def &= ~SX9310_REG_PROX_CTRL7_AVGPOSFILT_MASK; reg_def->def |= FIELD_PREP(SX9310_REG_PROX_CTRL7_AVGPOSFILT_MASK, pos); -- GitLab From c2083e280a3d4f71941c9c57992d4e621e4e33c5 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 14 Jan 2021 09:04:49 +0100 Subject: [PATCH 0793/2012] cfg80211: fix a kerneldoc markup A function has a different name between their prototype and its kernel-doc markup: ../include/net/cfg80211.h:1766: warning: expecting prototype for struct cfg80211_sar_chan_ranges. Prototype was for struct cfg80211_sar_freq_ranges instead Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/c7ed4bc4d9e992ead16d3d2df246f3b56dbfb1fb.1610610937.git.mchehab+huawei@kernel.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1b3954afcda42..0d6f7ec860615 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1756,7 +1756,7 @@ struct cfg80211_sar_specs { /** - * struct cfg80211_sar_chan_ranges - sar frequency ranges + * struct cfg80211_sar_freq_ranges - sar frequency ranges * @start_freq: start range edge frequency * @end_freq: end range edge frequency */ -- GitLab From 6020d534fa012b80c6d13811dc4d2dfedca2e403 Mon Sep 17 00:00:00 2001 From: Shayne Chen Date: Tue, 12 Jan 2021 11:20:28 +0800 Subject: [PATCH 0794/2012] mac80211: fix incorrect strlen of .write in debugfs This fixes strlen mismatch problems happening in some .write callbacks of debugfs. When trying to configure airtime_flags in debugfs, an error appeared: ash: write error: Invalid argument The error is returned from kstrtou16() since a wrong length makes it miss the real end of input string. To fix this, use count as the string length, and set proper end of string for a char buffer. The debug print is shown - airtime_flags_write: count = 2, len = 8, where the actual length is 2, but "len = strlen(buf)" gets 8. Also cleanup the other similar cases for the sake of consistency. Signed-off-by: Sujuan Chen Signed-off-by: Ryder Lee Signed-off-by: Shayne Chen Link: https://lore.kernel.org/r/20210112032028.7482-1-shayne.chen@mediatek.com Signed-off-by: Johannes Berg --- net/mac80211/debugfs.c | 44 +++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 48f144f107d53..9e723d9434219 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -120,18 +120,17 @@ static ssize_t aqm_write(struct file *file, { struct ieee80211_local *local = file->private_data; char buf[100]; - size_t len; - if (count > sizeof(buf)) + if (count >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, user_buf, count)) return -EFAULT; - buf[sizeof(buf) - 1] = '\0'; - len = strlen(buf); - if (len > 0 && buf[len-1] == '\n') - buf[len-1] = 0; + if (count && buf[count - 1] == '\n') + buf[count - 1] = '\0'; + else + buf[count] = '\0'; if (sscanf(buf, "fq_limit %u", &local->fq.limit) == 1) return count; @@ -177,18 +176,17 @@ static ssize_t airtime_flags_write(struct file *file, { struct ieee80211_local *local = file->private_data; char buf[16]; - size_t len; - if (count > sizeof(buf)) + if (count >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, user_buf, count)) return -EFAULT; - buf[sizeof(buf) - 1] = 0; - len = strlen(buf); - if (len > 0 && buf[len - 1] == '\n') - buf[len - 1] = 0; + if (count && buf[count - 1] == '\n') + buf[count - 1] = '\0'; + else + buf[count] = '\0'; if (kstrtou16(buf, 0, &local->airtime_flags)) return -EINVAL; @@ -237,20 +235,19 @@ static ssize_t aql_txq_limit_write(struct file *file, { struct ieee80211_local *local = file->private_data; char buf[100]; - size_t len; u32 ac, q_limit_low, q_limit_high, q_limit_low_old, q_limit_high_old; struct sta_info *sta; - if (count > sizeof(buf)) + if (count >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, user_buf, count)) return -EFAULT; - buf[sizeof(buf) - 1] = 0; - len = strlen(buf); - if (len > 0 && buf[len - 1] == '\n') - buf[len - 1] = 0; + if (count && buf[count - 1] == '\n') + buf[count - 1] = '\0'; + else + buf[count] = '\0'; if (sscanf(buf, "%u %u %u", &ac, &q_limit_low, &q_limit_high) != 3) return -EINVAL; @@ -306,18 +303,17 @@ static ssize_t force_tx_status_write(struct file *file, { struct ieee80211_local *local = file->private_data; char buf[3]; - size_t len; - if (count > sizeof(buf)) + if (count >= sizeof(buf)) return -EINVAL; if (copy_from_user(buf, user_buf, count)) return -EFAULT; - buf[sizeof(buf) - 1] = '\0'; - len = strlen(buf); - if (len > 0 && buf[len - 1] == '\n') - buf[len - 1] = 0; + if (count && buf[count - 1] == '\n') + buf[count - 1] = '\0'; + else + buf[count] = '\0'; if (buf[0] == '0' && buf[1] == '\0') local->force_tx_status = 0; -- GitLab From 622d3b4e39381262da7b18ca1ed1311df227de86 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 18 Dec 2020 19:47:17 +0100 Subject: [PATCH 0795/2012] mac80211: fix fast-rx encryption check When using WEP, the default unicast key needs to be selected, instead of the STA PTK. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20201218184718.93650-5-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 13b9bcc4865de..972895e9f22dc 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4176,6 +4176,8 @@ void ieee80211_check_fast_rx(struct sta_info *sta) rcu_read_lock(); key = rcu_dereference(sta->ptk[sta->ptk_idx]); + if (!key) + key = rcu_dereference(sdata->default_unicast_key); if (key) { switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_TKIP: -- GitLab From b101dd2d22f45d203010b40c739df346a0cbebef Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 18 Dec 2020 19:47:16 +0100 Subject: [PATCH 0796/2012] mac80211: fix encryption key selection for 802.3 xmit When using WEP, the default unicast key needs to be selected, instead of the STA PTK. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20201218184718.93650-4-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6422da6690f79..2d606dbab37c3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4251,7 +4251,6 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb, struct ethhdr *ehdr = (struct ethhdr *)skb->data; struct ieee80211_key *key; struct sta_info *sta; - bool offload = true; if (unlikely(skb->len < ETH_HLEN)) { kfree_skb(skb); @@ -4267,18 +4266,22 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb, if (unlikely(IS_ERR_OR_NULL(sta) || !sta->uploaded || !test_sta_flag(sta, WLAN_STA_AUTHORIZED) || - sdata->control_port_protocol == ehdr->h_proto)) - offload = false; - else if ((key = rcu_dereference(sta->ptk[sta->ptk_idx])) && - (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) || - key->conf.cipher == WLAN_CIPHER_SUITE_TKIP)) - offload = false; - - if (offload) - ieee80211_8023_xmit(sdata, dev, sta, key, skb); - else - ieee80211_subif_start_xmit(skb, dev); + sdata->control_port_protocol == ehdr->h_proto)) + goto skip_offload; + + key = rcu_dereference(sta->ptk[sta->ptk_idx]); + if (!key) + key = rcu_dereference(sdata->default_unicast_key); + + if (key && (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) || + key->conf.cipher == WLAN_CIPHER_SUITE_TKIP)) + goto skip_offload; + + ieee80211_8023_xmit(sdata, dev, sta, key, skb); + goto out; +skip_offload: + ieee80211_subif_start_xmit(skb, dev); out: rcu_read_unlock(); -- GitLab From 2463ec86cd0338a2c2edbfb0b9d50c52ff76ff43 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 18 Dec 2020 20:15:25 +0100 Subject: [PATCH 0797/2012] mac80211: do not drop tx nulldata packets on encrypted links ieee80211_tx_h_select_key drops any non-mgmt packets without a key when encryption is used. This is wrong for nulldata packets that can't be encrypted and are sent out for probing clients and indicating 4-address mode. Reported-by: Sebastian Gottschall Fixes: a0761a301746 ("mac80211: drop data frames without key on encrypted links") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20201218191525.1168-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2d606dbab37c3..de8325c79dd5b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -649,7 +649,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) if (!skip_hw && tx->key && tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) info->control.hw_key = &tx->key->conf; - } else if (!ieee80211_is_mgmt(hdr->frame_control) && tx->sta && + } else if (ieee80211_is_data_present(hdr->frame_control) && tx->sta && test_sta_flag(tx->sta, WLAN_STA_USES_ENCRYPTION)) { return TX_DROP; } -- GitLab From c13cf5c159660451c8fbdc37efb998b198e1d305 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 26 Dec 2020 10:39:08 +0100 Subject: [PATCH 0798/2012] mac80211: check if atf has been disabled in __ieee80211_schedule_txq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check if atf has been disabled in __ieee80211_schedule_txq() in order to avoid a given sta is always put to the beginning of the active_txqs list and never moved to the end since deficit is not decremented in ieee80211_sta_register_airtime() Fixes: b4809e9484da1 ("mac80211: Add airtime accounting and scheduling to TXQs") Signed-off-by: Lorenzo Bianconi Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/93889406c50f1416214c079ca0b8c9faecc5143e.1608975195.git.lorenzo@kernel.org Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index de8325c79dd5b..ebb3228ce9718 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3809,7 +3809,7 @@ void __ieee80211_schedule_txq(struct ieee80211_hw *hw, * get immediately moved to the back of the list on the next * call to ieee80211_next_txq(). */ - if (txqi->txq.sta && + if (txqi->txq.sta && local->airtime_flags && wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) list_add(&txqi->schedule_order, -- GitLab From 3c51fa5d2afe7a4909b53af5019635326389dd29 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 12 Jan 2021 22:59:43 +0000 Subject: [PATCH 0799/2012] net: phy: ar803x: disable extended next page bit This bit is enabled by default and advertises support for extended next page support. XNP is only needed for 10GBase-T and MultiGig support which is not supported. Additionally, Cisco MultiGig switches will read this bit and attempt 10Gb negotiation even though Next Page support is disabled. This will cause timeouts when the interface is forced to 100Mbps and auto-negotiation will fail. The interfaces are only 1000Base-T and supporting auto-negotiation for this only requires the Next Page bit to be set. Taken from: https://github.com/SolidRun/linux-stable/commit/7406c5244b7ea6bc17a2afe8568277a8c4b126a9 and adapted to mainline kernels by rmk. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/E1kzSdb-000417-FJ@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/at803x.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 9636edb8d6187..c8d276d96798d 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -587,7 +587,13 @@ static int at803x_config_init(struct phy_device *phydev) return ret; } - return 0; + /* Ar803x extended next page bit is enabled by default. Cisco + * multigig switches read this bit and attempt to negotiate 10Gbps + * rates even if the next page bit is disabled. This is incorrect + * behaviour but we still need to accommodate it. XNP is only needed + * for 10Gbps support, so disable XNP. + */ + return phy_modify(phydev, MII_ADVERTISE, MDIO_AN_CTRL1_XNP, 0); } static int at803x_ack_interrupt(struct phy_device *phydev) -- GitLab From b1ae3587d16a8c8fc9453e147c8708d6f006ffbb Mon Sep 17 00:00:00 2001 From: Bjarni Jonasson Date: Wed, 13 Jan 2021 12:56:25 +0100 Subject: [PATCH 0800/2012] net: phy: Add 100 base-x mode Sparx-5 supports this mode and it is missing in the PHY core. Signed-off-by: Bjarni Jonasson Reviewed-by: Russell King Signed-off-by: Jakub Kicinski --- Documentation/networking/phy.rst | 5 +++++ include/linux/phy.h | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst index b2f7ec794bc8b..399f17976a6c3 100644 --- a/Documentation/networking/phy.rst +++ b/Documentation/networking/phy.rst @@ -286,6 +286,11 @@ Some of the interface modes are described below: Note: due to legacy usage, some 10GBASE-R usage incorrectly makes use of this definition. +``PHY_INTERFACE_MODE_100BASEX`` + This defines IEEE 802.3 Clause 24. The link operates at a fixed data + rate of 125Mpbs using a 4B/5B encoding scheme, resulting in an underlying + data rate of 100Mpbs. + Pause frames / flow control =========================== diff --git a/include/linux/phy.h b/include/linux/phy.h index 9effb511acde3..24fcc6456a9e9 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -104,6 +104,7 @@ extern const int phy_10gbit_features_array[1]; * @PHY_INTERFACE_MODE_MOCA: Multimedia over Coax * @PHY_INTERFACE_MODE_QSGMII: Quad SGMII * @PHY_INTERFACE_MODE_TRGMII: Turbo RGMII + * @PHY_INTERFACE_MODE_100BASEX: 100 BaseX * @PHY_INTERFACE_MODE_1000BASEX: 1000 BaseX * @PHY_INTERFACE_MODE_2500BASEX: 2500 BaseX * @PHY_INTERFACE_MODE_RXAUI: Reduced XAUI @@ -135,6 +136,7 @@ typedef enum { PHY_INTERFACE_MODE_MOCA, PHY_INTERFACE_MODE_QSGMII, PHY_INTERFACE_MODE_TRGMII, + PHY_INTERFACE_MODE_100BASEX, PHY_INTERFACE_MODE_1000BASEX, PHY_INTERFACE_MODE_2500BASEX, PHY_INTERFACE_MODE_RXAUI, @@ -217,6 +219,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "usxgmii"; case PHY_INTERFACE_MODE_10GKR: return "10gbase-kr"; + case PHY_INTERFACE_MODE_100BASEX: + return "100base-x"; default: return "unknown"; } -- GitLab From 6e12f35cef6b8a458d7ecf507ae330e0bffaad8c Mon Sep 17 00:00:00 2001 From: Bjarni Jonasson Date: Wed, 13 Jan 2021 12:56:26 +0100 Subject: [PATCH 0801/2012] sfp: add support for 100 base-x SFPs Add support for 100Base-FX, 100Base-LX, 100Base-PX and 100Base-BX10 modules This is needed for Sparx-5 switch. Signed-off-by: Bjarni Jonasson Reviewed-by: Russell King Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp-bus.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index cdfa0a190962e..3c67ad9951ab2 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -265,6 +265,12 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, br_min <= 1300 && br_max >= 1200) phylink_set(modes, 1000baseX_Full); + /* 100Base-FX, 100Base-LX, 100Base-PX, 100Base-BX10 */ + if (id->base.e100_base_fx || id->base.e100_base_lx) + phylink_set(modes, 100baseFX_Full); + if ((id->base.e_base_px || id->base.e_base_bx10) && br_nom == 100) + phylink_set(modes, 100baseFX_Full); + /* For active or passive cables, select the link modes * based on the bit rates and the cable compliance bytes. */ @@ -389,6 +395,9 @@ phy_interface_t sfp_select_interface(struct sfp_bus *bus, if (phylink_test(link_modes, 1000baseX_Full)) return PHY_INTERFACE_MODE_1000BASEX; + if (phylink_test(link_modes, 100baseFX_Full)) + return PHY_INTERFACE_MODE_100BASEX; + dev_warn(bus->sfp_dev, "Unable to ascertain link mode\n"); return PHY_INTERFACE_MODE_NA; -- GitLab From 402a89660e9dc880710b12773076a336c9dab3d7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0802/2012] drm/nouveau/bios: fix issue shadowing expansion ROMs This issue has generally been covered up by the presence of additional expansion ROMs after the ones we're interested in, with header fetches of subsequent images loading enough of the ROM to hide the issue. Noticed on GA102, which lacks a type 0x70 image compared to TU102,. [ 906.364197] nouveau 0000:09:00.0: bios: 00000000: type 00, 65024 bytes [ 906.381205] nouveau 0000:09:00.0: bios: 0000fe00: type 03, 91648 bytes [ 906.405213] nouveau 0000:09:00.0: bios: 00026400: type e0, 22016 bytes [ 906.410984] nouveau 0000:09:00.0: bios: 0002ba00: type e0, 366080 bytes vs [ 22.961901] nouveau 0000:09:00.0: bios: 00000000: type 00, 60416 bytes [ 22.984174] nouveau 0000:09:00.0: bios: 0000ec00: type 03, 71168 bytes [ 23.010446] nouveau 0000:09:00.0: bios: 00020200: type e0, 48128 bytes [ 23.028220] nouveau 0000:09:00.0: bios: 0002be00: type e0, 140800 bytes [ 23.080196] nouveau 0000:09:00.0: bios: 0004e400: type 70, 7168 bytes Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c index 7deb81b6dbac6..4b571cc6bc70f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c @@ -75,7 +75,7 @@ shadow_image(struct nvkm_bios *bios, int idx, u32 offset, struct shadow *mthd) nvkm_debug(subdev, "%08x: type %02x, %d bytes\n", image.base, image.type, image.size); - if (!shadow_fetch(bios, mthd, image.size)) { + if (!shadow_fetch(bios, mthd, image.base + image.size)) { nvkm_debug(subdev, "%08x: fetch failed\n", image.base); return 0; } -- GitLab From e05e06cd34f5311f677294a08b609acfbc315236 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0803/2012] drm/nouveau/privring: ack interrupts the same way as RM Whatever it is that we were doing before doesn't work on Ampere. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c | 10 +++++++--- drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c index 2340040942c93..1115376bc85f5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c @@ -22,6 +22,7 @@ * Authors: Ben Skeggs */ #include "priv.h" +#include static void gf100_ibus_intr_hub(struct nvkm_subdev *ibus, int i) @@ -31,7 +32,6 @@ gf100_ibus_intr_hub(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x122124 + (i * 0x0400)); u32 stat = nvkm_rd32(device, 0x122128 + (i * 0x0400)); nvkm_debug(ibus, "HUB%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x122128 + (i * 0x0400), 0x00000200, 0x00000000); } static void @@ -42,7 +42,6 @@ gf100_ibus_intr_rop(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x124124 + (i * 0x0400)); u32 stat = nvkm_rd32(device, 0x124128 + (i * 0x0400)); nvkm_debug(ibus, "ROP%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x124128 + (i * 0x0400), 0x00000200, 0x00000000); } static void @@ -53,7 +52,6 @@ gf100_ibus_intr_gpc(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x128124 + (i * 0x0400)); u32 stat = nvkm_rd32(device, 0x128128 + (i * 0x0400)); nvkm_debug(ibus, "GPC%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x128128 + (i * 0x0400), 0x00000200, 0x00000000); } void @@ -90,6 +88,12 @@ gf100_ibus_intr(struct nvkm_subdev *ibus) intr1 &= ~stat; } } + + nvkm_mask(device, 0x121c4c, 0x0000003f, 0x00000002); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x121c4c) & 0x0000003f)) + break; + ); } static int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c index f3915f85838ed..22e487b493ad1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c @@ -22,6 +22,7 @@ * Authors: Ben Skeggs */ #include "priv.h" +#include static void gk104_ibus_intr_hub(struct nvkm_subdev *ibus, int i) @@ -31,7 +32,6 @@ gk104_ibus_intr_hub(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x122124 + (i * 0x0800)); u32 stat = nvkm_rd32(device, 0x122128 + (i * 0x0800)); nvkm_debug(ibus, "HUB%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x122128 + (i * 0x0800), 0x00000200, 0x00000000); } static void @@ -42,7 +42,6 @@ gk104_ibus_intr_rop(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x124124 + (i * 0x0800)); u32 stat = nvkm_rd32(device, 0x124128 + (i * 0x0800)); nvkm_debug(ibus, "ROP%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x124128 + (i * 0x0800), 0x00000200, 0x00000000); } static void @@ -53,7 +52,6 @@ gk104_ibus_intr_gpc(struct nvkm_subdev *ibus, int i) u32 data = nvkm_rd32(device, 0x128124 + (i * 0x0800)); u32 stat = nvkm_rd32(device, 0x128128 + (i * 0x0800)); nvkm_debug(ibus, "GPC%d: %06x %08x (%08x)\n", i, addr, data, stat); - nvkm_mask(device, 0x128128 + (i * 0x0800), 0x00000200, 0x00000000); } void @@ -90,6 +88,12 @@ gk104_ibus_intr(struct nvkm_subdev *ibus) intr1 &= ~stat; } } + + nvkm_mask(device, 0x12004c, 0x0000003f, 0x00000002); + nvkm_msec(device, 2000, + if (!(nvkm_rd32(device, 0x12004c) & 0x0000003f)) + break; + ); } static int -- GitLab From b5510d1e21d80e2fa2286468ca8c2922f5895ef8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0804/2012] drm/nouveau/i2c/gk110: split out from i2c/gk104 No functional changes here yet. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/subdev/i2c.h | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 12 +++--- .../gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c | 38 +++++++++++++++++++ 4 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h index 81b977319640a..640f649ce497e 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h @@ -92,6 +92,7 @@ int g94_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); int gf117_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); int gf119_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); int gk104_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); +int gk110_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); int gm200_i2c_new(struct nvkm_device *, int, struct nvkm_i2c **); static inline int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 7851bec5f0e5f..ffada13c416c5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1815,7 +1815,7 @@ nvf0_chipset = { .fb = gk110_fb_new, .fuse = gf100_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1853,7 +1853,7 @@ nvf1_chipset = { .fb = gk110_fb_new, .fuse = gf100_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1891,7 +1891,7 @@ nv106_chipset = { .fb = gk110_fb_new, .fuse = gf100_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1929,7 +1929,7 @@ nv108_chipset = { .fb = gk110_fb_new, .fuse = gf100_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -1967,7 +1967,7 @@ nv117_chipset = { .fb = gm107_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, @@ -2003,7 +2003,7 @@ nv118_chipset = { .fb = gm107_fb_new, .fuse = gm107_fuse_new, .gpio = gk104_gpio_new, - .i2c = gk104_i2c_new, + .i2c = gk110_i2c_new, .ibus = gk104_ibus_new, .iccsense = gf100_iccsense_new, .imem = nv50_instmem_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild index 723d0284caefc..819703913a00c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/Kbuild @@ -7,6 +7,7 @@ nvkm-y += nvkm/subdev/i2c/g94.o nvkm-y += nvkm/subdev/i2c/gf117.o nvkm-y += nvkm/subdev/i2c/gf119.o nvkm-y += nvkm/subdev/i2c/gk104.o +nvkm-y += nvkm/subdev/i2c/gk110.o nvkm-y += nvkm/subdev/i2c/gm200.o nvkm-y += nvkm/subdev/i2c/pad.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c new file mode 100644 index 0000000000000..10eaf1d70f623 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c @@ -0,0 +1,38 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" +#include "pad.h" + +static const struct nvkm_i2c_func +gk110_i2c = { + .pad_x_new = gf119_i2c_pad_x_new, + .pad_s_new = gf119_i2c_pad_s_new, + .aux = 4, + .aux_stat = gk104_aux_stat, + .aux_mask = gk104_aux_mask, +}; + +int +gk110_i2c_new(struct nvkm_device *device, int index, struct nvkm_i2c **pi2c) +{ + return nvkm_i2c_new_(&gk110_i2c, device, index, pi2c); +} -- GitLab From 8ad95edc39100c22c29ab1d2588332b99f387c8e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0805/2012] drm/nouveau/i2c/gk110-: disable hw-initiated dpcd reads RM does this around transactions, and it seemed to help while debugging AUXCH issues on GA102. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h | 7 +++++++ drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c | 10 +++++++--- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c | 9 +++++++-- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c | 7 +++++++ drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c | 7 +++++++ drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h | 4 ++++ 7 files changed, 40 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h index 30b48896965eb..f920eabf8628d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h @@ -3,6 +3,13 @@ #define __NVKM_I2C_AUX_H__ #include "pad.h" +static inline void +nvkm_i2c_aux_autodpcd(struct nvkm_i2c *i2c, int aux, bool enable) +{ + if (i2c->func->aux_autodpcd) + i2c->func->aux_autodpcd(i2c, aux, false); +} + struct nvkm_i2c_aux_func { bool address_only; int (*xfer)(struct nvkm_i2c_aux *, bool retry, u8 type, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c index db7769cb33eba..47068f6f9c55d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c @@ -77,7 +77,8 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, u8 type, u32 addr, u8 *data, u8 *size) { struct g94_i2c_aux *aux = g94_i2c_aux(obj); - struct nvkm_device *device = aux->base.pad->i2c->subdev.device; + struct nvkm_i2c *i2c = aux->base.pad->i2c; + struct nvkm_device *device = i2c->subdev.device; const u32 base = aux->ch * 0x50; u32 ctrl, stat, timeout, retries = 0; u32 xbuf[4] = {}; @@ -96,6 +97,8 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, goto out; } + nvkm_i2c_aux_autodpcd(i2c, aux->ch, false); + if (!(type & 1)) { memcpy(xbuf, data, *size); for (i = 0; i < 16; i += 4) { @@ -128,7 +131,7 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, if (!timeout--) { AUX_ERR(&aux->base, "timeout %08x", ctrl); ret = -EIO; - goto out; + goto out_err; } } while (ctrl & 0x00010000); ret = 0; @@ -154,7 +157,8 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, memcpy(data, xbuf, *size); *size = stat & 0x0000001f; } - +out_err: + nvkm_i2c_aux_autodpcd(i2c, aux->ch, true); out: g94_i2c_aux_fini(aux); return ret < 0 ? ret : (stat & 0x000f0000) >> 16; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c index edb6148cbca04..ab67b67fd2a53 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c @@ -77,7 +77,8 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, u8 type, u32 addr, u8 *data, u8 *size) { struct gm200_i2c_aux *aux = gm200_i2c_aux(obj); - struct nvkm_device *device = aux->base.pad->i2c->subdev.device; + struct nvkm_i2c *i2c = aux->base.pad->i2c; + struct nvkm_device *device = i2c->subdev.device; const u32 base = aux->ch * 0x50; u32 ctrl, stat, timeout, retries = 0; u32 xbuf[4] = {}; @@ -96,6 +97,8 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, goto out; } + nvkm_i2c_aux_autodpcd(i2c, aux->ch, false); + if (!(type & 1)) { memcpy(xbuf, data, *size); for (i = 0; i < 16; i += 4) { @@ -128,7 +131,7 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, if (!timeout--) { AUX_ERR(&aux->base, "timeout %08x", ctrl); ret = -EIO; - goto out; + goto out_err; } } while (ctrl & 0x00010000); ret = 0; @@ -155,6 +158,8 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, *size = stat & 0x0000001f; } +out_err: + nvkm_i2c_aux_autodpcd(i2c, aux->ch, true); out: gm200_i2c_aux_fini(aux); return ret < 0 ? ret : (stat & 0x000f0000) >> 16; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c index 10eaf1d70f623..8e3bfa1af52a2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gk110.c @@ -22,6 +22,12 @@ #include "priv.h" #include "pad.h" +static void +gk110_aux_autodpcd(struct nvkm_i2c *i2c, int aux, bool enable) +{ + nvkm_mask(i2c->subdev.device, 0x00e4f8 + (aux * 0x50), 0x00010000, enable << 16); +} + static const struct nvkm_i2c_func gk110_i2c = { .pad_x_new = gf119_i2c_pad_x_new, @@ -29,6 +35,7 @@ gk110_i2c = { .aux = 4, .aux_stat = gk104_aux_stat, .aux_mask = gk104_aux_mask, + .aux_autodpcd = gk110_aux_autodpcd, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c index a23c5f315221c..7b2375bff8a9c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/gm200.c @@ -24,6 +24,12 @@ #include "priv.h" #include "pad.h" +static void +gm200_aux_autodpcd(struct nvkm_i2c *i2c, int aux, bool enable) +{ + nvkm_mask(i2c->subdev.device, 0x00d968 + (aux * 0x50), 0x00010000, enable << 16); +} + static const struct nvkm_i2c_func gm200_i2c = { .pad_x_new = gf119_i2c_pad_x_new, @@ -31,6 +37,7 @@ gm200_i2c = { .aux = 8, .aux_stat = gk104_aux_stat, .aux_mask = gk104_aux_mask, + .aux_autodpcd = gm200_aux_autodpcd, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h index 461016814f4f2..44b7bb7d47776 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/pad.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: MIT */ #ifndef __NVKM_I2C_PAD_H__ #define __NVKM_I2C_PAD_H__ -#include +#include "priv.h" struct nvkm_i2c_pad { const struct nvkm_i2c_pad_func *func; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h index bd86bc298ebe5..e35f6036fcfcb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/priv.h @@ -23,6 +23,10 @@ struct nvkm_i2c_func { /* mask on/off interrupt types for a given set of auxch */ void (*aux_mask)(struct nvkm_i2c *, u32, u32, u32); + + /* enable/disable HW-initiated DPCD reads + */ + void (*aux_autodpcd)(struct nvkm_i2c *, int aux, bool enable); }; void g94_aux_stat(struct nvkm_i2c *, u32 *, u32 *, u32 *, u32 *); -- GitLab From ba6e9ab0fcf3d76e3952deb12b5f993991621d9c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0806/2012] drm/nouveau/i2c/gm200: increase width of aux semaphore owner fields Noticed while debugging GA102. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c index ab67b67fd2a53..8bd1d442e4654 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c @@ -33,7 +33,7 @@ static void gm200_i2c_aux_fini(struct gm200_i2c_aux *aux) { struct nvkm_device *device = aux->base.pad->i2c->subdev.device; - nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00310000, 0x00000000); + nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00710000, 0x00000000); } static int @@ -54,10 +54,10 @@ gm200_i2c_aux_init(struct gm200_i2c_aux *aux) AUX_ERR(&aux->base, "begin idle timeout %08x", ctrl); return -EBUSY; } - } while (ctrl & 0x03010000); + } while (ctrl & 0x07010000); /* set some magic, and wait up to 1ms for it to appear */ - nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00300000, ureq); + nvkm_mask(device, 0x00d954 + (aux->ch * 0x50), 0x00700000, ureq); timeout = 1000; do { ctrl = nvkm_rd32(device, 0x00d954 + (aux->ch * 0x50)); @@ -67,7 +67,7 @@ gm200_i2c_aux_init(struct gm200_i2c_aux *aux) gm200_i2c_aux_fini(aux); return -EBUSY; } - } while ((ctrl & 0x03000000) != urep); + } while ((ctrl & 0x07000000) != urep); return 0; } -- GitLab From add42781ad76c5ae65127bf13852a4c6b2f08849 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0807/2012] drm/nouveau/mmu: fix vram heap sizing Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c index de91e9a261725..6d5212ae2fd57 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c @@ -316,9 +316,9 @@ nvkm_mmu_vram(struct nvkm_mmu *mmu) { struct nvkm_device *device = mmu->subdev.device; struct nvkm_mm *mm = &device->fb->ram->vram; - const u32 sizeN = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NORMAL); - const u32 sizeU = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NOMAP); - const u32 sizeM = nvkm_mm_heap_size(mm, NVKM_RAM_MM_MIXED); + const u64 sizeN = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NORMAL); + const u64 sizeU = nvkm_mm_heap_size(mm, NVKM_RAM_MM_NOMAP); + const u64 sizeM = nvkm_mm_heap_size(mm, NVKM_RAM_MM_MIXED); u8 type = NVKM_MEM_KIND * !!mmu->func->kind; u8 heap = NVKM_MEM_VRAM; int heapM, heapN, heapU; -- GitLab From 3b050680c84153d8e6f5ae3785922cd417f4b071 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0808/2012] drm/nouveau/core: recognise GA10[024] GA100 hidden behind a module option, as it's not been as well verified since initial bring-up and may need additional changes. There's no display anyway, so this can wait for a bit. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvif/cl0080.h | 1 + .../drm/nouveau/include/nvkm/core/device.h | 1 + drivers/gpu/drm/nouveau/nouveau_backlight.c | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 35 +++++++++++++++++-- .../gpu/drm/nouveau/nvkm/engine/device/user.c | 1 + 5 files changed, 36 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h index cd9a2e687bb61..57d4f457a7d4a 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h @@ -33,6 +33,7 @@ struct nv_device_info_v0 { #define NV_DEVICE_INFO_V0_PASCAL 0x0a #define NV_DEVICE_INFO_V0_VOLTA 0x0b #define NV_DEVICE_INFO_V0_TURING 0x0c +#define NV_DEVICE_INFO_V0_AMPERE 0x0d __u8 family; __u8 pad06[2]; __u64 ram_size; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h index 5c007ce62fc34..c920939a14679 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h @@ -120,6 +120,7 @@ struct nvkm_device { GP100 = 0x130, GV100 = 0x140, TU100 = 0x160, + GA100 = 0x170, } card_type; u32 chipset; u8 chiprev; diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index c7a94c94dbf37..72f35a2babcb2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -256,6 +256,7 @@ nouveau_backlight_init(struct drm_connector *connector) case NV_DEVICE_INFO_V0_PASCAL: case NV_DEVICE_INFO_V0_VOLTA: case NV_DEVICE_INFO_V0_TURING: + case NV_DEVICE_INFO_V0_AMPERE: //XXX: not confirmed ret = nv50_backlight_init(nv_encoder, &props, &ops); break; default: diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index ffada13c416c5..b6b094bbd562f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2652,6 +2652,21 @@ nv168_chipset = { .sec2 = tu102_sec2_new, }; +static const struct nvkm_device_chip +nv170_chipset = { + .name = "GA100", +}; + +static const struct nvkm_device_chip +nv172_chipset = { + .name = "GA102", +}; + +static const struct nvkm_device_chip +nv174_chipset = { + .name = "GA104", +}; + static int nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size, struct nvkm_notify *notify) @@ -3063,6 +3078,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x130: device->card_type = GP100; break; case 0x140: device->card_type = GV100; break; case 0x160: device->card_type = TU100; break; + case 0x170: device->card_type = GA100; break; default: break; } @@ -3160,10 +3176,23 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x166: device->chip = &nv166_chipset; break; case 0x167: device->chip = &nv167_chipset; break; case 0x168: device->chip = &nv168_chipset; break; + case 0x172: device->chip = &nv172_chipset; break; + case 0x174: device->chip = &nv174_chipset; break; default: - nvdev_error(device, "unknown chipset (%08x)\n", boot0); - ret = -ENODEV; - goto done; + if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) { + switch (device->chipset) { + case 0x170: device->chip = &nv170_chipset; break; + default: + break; + } + } + + if (!device->chip) { + nvdev_error(device, "unknown chipset (%08x)\n", boot0); + ret = -ENODEV; + goto done; + } + break; } nvdev_info(device, "NVIDIA %s (%08x)\n", diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c index 03c6d9aef075c..1478947987860 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c @@ -176,6 +176,7 @@ nvkm_udevice_info(struct nvkm_udevice *udev, void *data, u32 size) case GP100: args->v0.family = NV_DEVICE_INFO_V0_PASCAL; break; case GV100: args->v0.family = NV_DEVICE_INFO_V0_VOLTA; break; case TU100: args->v0.family = NV_DEVICE_INFO_V0_TURING; break; + case GA100: args->v0.family = NV_DEVICE_INFO_V0_AMPERE; break; default: args->v0.family = 0; break; -- GitLab From caeb6ab899c3d36a74cda6e299c6e1c9c4e2a22e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0809/2012] drm/nouveau/kms/nv50-: fix case where notifier buffer is at offset 0 VRAM offset 0 is a valid address, triggered on GA102. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 4 ++-- drivers/gpu/drm/nouveau/dispnv50/disp.h | 2 +- drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 33fff388dd83c..c6367035970eb 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -222,7 +222,7 @@ nv50_dmac_wait(struct nvif_push *push, u32 size) int nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, - const s32 *oclass, u8 head, void *data, u32 size, u64 syncbuf, + const s32 *oclass, u8 head, void *data, u32 size, s64 syncbuf, struct nv50_dmac *dmac) { struct nouveau_cli *cli = (void *)device->object.client; @@ -271,7 +271,7 @@ nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, if (ret) return ret; - if (!syncbuf) + if (syncbuf < 0) return 0; ret = nvif_object_ctor(&dmac->base.user, "kmsSyncCtxDma", NV50_DISP_HANDLE_SYNCBUF, diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.h b/drivers/gpu/drm/nouveau/dispnv50/disp.h index 92bddc0836171..38dec11e7dda5 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.h +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.h @@ -95,7 +95,7 @@ struct nv50_outp_atom { int nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp, const s32 *oclass, u8 head, void *data, u32 size, - u64 syncbuf, struct nv50_dmac *dmac); + s64 syncbuf, struct nv50_dmac *dmac); void nv50_dmac_destroy(struct nv50_dmac *); /* diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c index 685b708713242..b390029c69ec1 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wimmc37b.c @@ -76,7 +76,7 @@ wimmc37b_init_(const struct nv50_wimm_func *func, struct nouveau_drm *drm, int ret; ret = nv50_dmac_create(&drm->client.device, &disp->disp->object, - &oclass, 0, &args, sizeof(args), 0, + &oclass, 0, &args, sizeof(args), -1, &wndw->wimm); if (ret) { NV_ERROR(drm, "wimm%04x allocation failed: %d\n", oclass, ret); -- GitLab From 70afbe4bdc0a7ccdb462a38216f5abc3db7e5c1b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0810/2012] drm/nouveau/pci/ga10[024]: initial support Appears to be compatible with GP100 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index b6b094bbd562f..95c470d13cb9f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2655,16 +2655,19 @@ nv168_chipset = { static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", + .pci = gp100_pci_new, }; static const struct nvkm_device_chip nv172_chipset = { .name = "GA102", + .pci = gp100_pci_new, }; static const struct nvkm_device_chip nv174_chipset = { .name = "GA104", + .pci = gp100_pci_new, }; static int -- GitLab From a34632482f1ea768429a9d4c79a10d12f5093405 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0811/2012] drm/nouveau/bios/ga10[024]: initial support Forcing PRAMIN-shadowing off for GA100, as it requires display, and we don't know if/where the fuse register for detecting its presence is. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 95c470d13cb9f..7c35c32cdf734 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2655,18 +2655,21 @@ nv168_chipset = { static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", + .bios = nvkm_bios_new, .pci = gp100_pci_new, }; static const struct nvkm_device_chip nv172_chipset = { .name = "GA102", + .bios = nvkm_bios_new, .pci = gp100_pci_new, }; static const struct nvkm_device_chip nv174_chipset = { .name = "GA104", + .bios = nvkm_bios_new, .pci = gp100_pci_new, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c index 3634cd0630b81..023ddc7c5399a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowramin.c @@ -64,6 +64,9 @@ pramin_init(struct nvkm_bios *bios, const char *name) return NULL; /* we can't get the bios image pointer without PDISP */ + if (device->card_type >= GA100) + addr = device->chipset == 0x170; /*XXX: find the fuse reg for this */ + else if (device->card_type >= GM100) addr = nvkm_rd32(device, 0x021c04); else -- GitLab From 7ddf5e9597faa6f939370e294e0f6d9516d2a431 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0812/2012] drm/nouveau/devinit/ga10[024]: initial support VPLL regs changed a bit. There's more stuff to do around these, but it's less invasive to stick those changes into disp for now. None of that belongs here anymore anyhow - fix that someday. Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/subdev/devinit.h | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 3 + .../drm/nouveau/nvkm/subdev/devinit/Kbuild | 1 + .../drm/nouveau/nvkm/subdev/devinit/ga100.c | 76 +++++++++++++++++++ .../drm/nouveau/nvkm/subdev/devinit/priv.h | 1 + .../drm/nouveau/nvkm/subdev/devinit/tu102.c | 2 +- 6 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h index 1a39e52e09e36..50cc7c05eac49 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h @@ -32,4 +32,5 @@ int gm107_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); int gm200_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); int gv100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); int tu102_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); +int ga100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 7c35c32cdf734..76f1b2504f3a2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2656,6 +2656,7 @@ static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", .bios = nvkm_bios_new, + .devinit = ga100_devinit_new, .pci = gp100_pci_new, }; @@ -2663,6 +2664,7 @@ static const struct nvkm_device_chip nv172_chipset = { .name = "GA102", .bios = nvkm_bios_new, + .devinit = ga100_devinit_new, .pci = gp100_pci_new, }; @@ -2670,6 +2672,7 @@ static const struct nvkm_device_chip nv174_chipset = { .name = "GA104", .bios = nvkm_bios_new, + .devinit = ga100_devinit_new, .pci = gp100_pci_new, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild index b3429371ed824..d1abb64841dac 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild @@ -15,3 +15,4 @@ nvkm-y += nvkm/subdev/devinit/gm107.o nvkm-y += nvkm/subdev/devinit/gm200.o nvkm-y += nvkm/subdev/devinit/gv100.o nvkm-y += nvkm/subdev/devinit/tu102.o +nvkm-y += nvkm/subdev/devinit/ga100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c new file mode 100644 index 0000000000000..636a92128f6c8 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/ga100.c @@ -0,0 +1,76 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "nv50.h" + +#include +#include +#include + +static int +ga100_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq) +{ + struct nvkm_subdev *subdev = &init->subdev; + struct nvkm_device *device = subdev->device; + struct nvbios_pll info; + int head = type - PLL_VPLL0; + int N, fN, M, P; + int ret; + + ret = nvbios_pll_parse(device->bios, type, &info); + if (ret) + return ret; + + ret = gt215_pll_calc(subdev, &info, freq, &N, &fN, &M, &P); + if (ret < 0) + return ret; + + switch (info.type) { + case PLL_VPLL0: + case PLL_VPLL1: + case PLL_VPLL2: + case PLL_VPLL3: + nvkm_wr32(device, 0x00ef00 + (head * 0x40), 0x02080004); + nvkm_wr32(device, 0x00ef18 + (head * 0x40), (N << 16) | fN); + nvkm_wr32(device, 0x00ef04 + (head * 0x40), (P << 16) | M); + nvkm_wr32(device, 0x00e9c0 + (head * 0x04), 0x00000001); + break; + default: + nvkm_warn(subdev, "%08x/%dKhz unimplemented\n", type, freq); + ret = -EINVAL; + break; + } + + return ret; +} + +static const struct nvkm_devinit_func +ga100_devinit = { + .init = nv50_devinit_init, + .post = tu102_devinit_post, + .pll_set = ga100_devinit_pll_set, +}; + +int +ga100_devinit_new(struct nvkm_device *device, int index, struct nvkm_devinit **pinit) +{ + return nv50_devinit_new_(&ga100_devinit, device, index, pinit); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h index 94723352137a7..05961e6242647 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/priv.h @@ -19,4 +19,5 @@ void nvkm_devinit_ctor(const struct nvkm_devinit_func *, struct nvkm_device *, int index, struct nvkm_devinit *); int nv04_devinit_post(struct nvkm_devinit *, bool); +int tu102_devinit_post(struct nvkm_devinit *, bool); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c index 397670e72fff9..9a469bf482f2f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c @@ -65,7 +65,7 @@ tu102_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq) return ret; } -static int +int tu102_devinit_post(struct nvkm_devinit *base, bool post) { struct nv50_devinit *init = nv50_devinit(base); -- GitLab From 5961c62d20753009408df4752e22991097386aa9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0813/2012] drm/nouveau/mc/ga10[024]: initial support Fortunately, all the interrupts we need to bring up basic display support are contained in a single leaf register, allowing this basic (but hackish) implementation. There's a bunch more invasive patches to come implementing all this in a better/more complete way, but trying to get a minimal series out first. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/subdev/mc.h | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 3 + drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/subdev/mc/ga100.c | 74 +++++++++++++++++++ 4 files changed, 79 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h index 6641fe4c252c6..e45ca45839670 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h @@ -32,4 +32,5 @@ int gk20a_mc_new(struct nvkm_device *, int, struct nvkm_mc **); int gp100_mc_new(struct nvkm_device *, int, struct nvkm_mc **); int gp10b_mc_new(struct nvkm_device *, int, struct nvkm_mc **); int tu102_mc_new(struct nvkm_device *, int, struct nvkm_mc **); +int ga100_mc_new(struct nvkm_device *, int, struct nvkm_mc **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 76f1b2504f3a2..fb551edc6a357 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2657,6 +2657,7 @@ nv170_chipset = { .name = "GA100", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2665,6 +2666,7 @@ nv172_chipset = { .name = "GA102", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2673,6 +2675,7 @@ nv174_chipset = { .name = "GA104", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .mc = ga100_mc_new, .pci = gp100_pci_new, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild index 2585ef07532ac..ac2b34e9ac6ad 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild @@ -14,3 +14,4 @@ nvkm-y += nvkm/subdev/mc/gk20a.o nvkm-y += nvkm/subdev/mc/gp100.o nvkm-y += nvkm/subdev/mc/gp10b.o nvkm-y += nvkm/subdev/mc/tu102.o +nvkm-y += nvkm/subdev/mc/ga100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c new file mode 100644 index 0000000000000..967eb3af11eb0 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c @@ -0,0 +1,74 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" + +static void +ga100_mc_intr_unarm(struct nvkm_mc *mc) +{ + nvkm_wr32(mc->subdev.device, 0xb81610, 0x00000004); +} + +static void +ga100_mc_intr_rearm(struct nvkm_mc *mc) +{ + nvkm_wr32(mc->subdev.device, 0xb81608, 0x00000004); +} + +static void +ga100_mc_intr_mask(struct nvkm_mc *mc, u32 mask, u32 intr) +{ + nvkm_wr32(mc->subdev.device, 0xb81210, mask & intr ); + nvkm_wr32(mc->subdev.device, 0xb81410, mask & ~(mask & intr)); +} + +static u32 +ga100_mc_intr_stat(struct nvkm_mc *mc) +{ + u32 intr_top = nvkm_rd32(mc->subdev.device, 0xb81600), intr = 0x00000000; + if (intr_top & 0x00000004) + intr = nvkm_mask(mc->subdev.device, 0xb81010, 0x00000000, 0x00000000); + return intr; +} + +static void +ga100_mc_init(struct nvkm_mc *mc) +{ + nv50_mc_init(mc); + nvkm_wr32(mc->subdev.device, 0xb81210, 0xffffffff); +} + +static const struct nvkm_mc_func +ga100_mc = { + .init = ga100_mc_init, + .intr = gp100_mc_intr, + .intr_unarm = ga100_mc_intr_unarm, + .intr_rearm = ga100_mc_intr_rearm, + .intr_mask = ga100_mc_intr_mask, + .intr_stat = ga100_mc_intr_stat, + .reset = gk104_mc_reset, +}; + +int +ga100_mc_new(struct nvkm_device *device, int index, struct nvkm_mc **pmc) +{ + return nvkm_mc_new_(&ga100_mc, device, index, pmc); +} -- GitLab From e0df4bbfc3365d7699e32bebb24647dc7a09b00c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0814/2012] drm/nouveau/privring/ga10[024]: initial support Appears to be compatible with GM200 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index fb551edc6a357..2f6e1b1ce0bb3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2657,6 +2657,7 @@ nv170_chipset = { .name = "GA100", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .ibus = gm200_ibus_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2666,6 +2667,7 @@ nv172_chipset = { .name = "GA102", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .ibus = gm200_ibus_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2675,6 +2677,7 @@ nv174_chipset = { .name = "GA104", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .ibus = gm200_ibus_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; -- GitLab From de4781d0f22b54fdbe7ac459eb67b585ca3ee430 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0815/2012] drm/nouveau/imem/ga10[024]: initial support Appears to be compatible with NV50 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 2f6e1b1ce0bb3..d2be48fc5db1d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2658,6 +2658,7 @@ nv170_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .ibus = gm200_ibus_new, + .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2668,6 +2669,7 @@ nv172_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .ibus = gm200_ibus_new, + .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; @@ -2678,6 +2680,7 @@ nv174_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .ibus = gm200_ibus_new, + .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, }; -- GitLab From 41ba806f40a9a4c4f4c04a474bf368160f1baa2c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0816/2012] drm/nouveau/fb/ga10[024]: initial support No VPR scrub. GA102 and GA104 have a new VRAM size detection method. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/subdev/fb.h | 2 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 3 ++ drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild | 3 ++ .../gpu/drm/nouveau/nvkm/subdev/fb/ga100.c | 40 +++++++++++++++++++ .../gpu/drm/nouveau/nvkm/subdev/fb/ga102.c | 40 +++++++++++++++++++ .../gpu/drm/nouveau/nvkm/subdev/fb/gv100.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h | 2 + drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h | 1 + .../gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c | 40 +++++++++++++++++++ 9 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h index 34b56b10218a8..2ecd52aec1d12 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h @@ -86,6 +86,8 @@ int gp100_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gp102_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gp10b_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gv100_fb_new(struct nvkm_device *, int, struct nvkm_fb **); +int ga100_fb_new(struct nvkm_device *, int, struct nvkm_fb **); +int ga102_fb_new(struct nvkm_device *, int, struct nvkm_fb **); #include #include diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index d2be48fc5db1d..4a5d43fe3f3ab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2657,6 +2657,7 @@ nv170_chipset = { .name = "GA100", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .fb = ga100_fb_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2668,6 +2669,7 @@ nv172_chipset = { .name = "GA102", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .fb = ga102_fb_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2679,6 +2681,7 @@ nv174_chipset = { .name = "GA104", .bios = nvkm_bios_new, .devinit = ga100_devinit_new, + .fb = ga102_fb_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild index 43a42159a3d00..5d0bab8ecb433 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild @@ -32,6 +32,8 @@ nvkm-y += nvkm/subdev/fb/gp100.o nvkm-y += nvkm/subdev/fb/gp102.o nvkm-y += nvkm/subdev/fb/gp10b.o nvkm-y += nvkm/subdev/fb/gv100.o +nvkm-y += nvkm/subdev/fb/ga100.o +nvkm-y += nvkm/subdev/fb/ga102.o nvkm-y += nvkm/subdev/fb/ram.o nvkm-y += nvkm/subdev/fb/ramnv04.o @@ -52,6 +54,7 @@ nvkm-y += nvkm/subdev/fb/ramgk104.o nvkm-y += nvkm/subdev/fb/ramgm107.o nvkm-y += nvkm/subdev/fb/ramgm200.o nvkm-y += nvkm/subdev/fb/ramgp100.o +nvkm-y += nvkm/subdev/fb/ramga102.o nvkm-y += nvkm/subdev/fb/sddr2.o nvkm-y += nvkm/subdev/fb/sddr3.o nvkm-y += nvkm/subdev/fb/gddr3.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c new file mode 100644 index 0000000000000..bf82686851cd4 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c @@ -0,0 +1,40 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" +#include "ram.h" + +static const struct nvkm_fb_func +ga100_fb = { + .dtor = gf100_fb_dtor, + .oneinit = gf100_fb_oneinit, + .init = gp100_fb_init, + .init_page = gv100_fb_init_page, + .init_unkn = gp100_fb_init_unkn, + .ram_new = gp100_ram_new, + .default_bigpage = 16, +}; + +int +ga100_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb) +{ + return gp102_fb_new_(&ga100_fb, device, index, pfb); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c new file mode 100644 index 0000000000000..bcecf84a6e679 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c @@ -0,0 +1,40 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "gf100.h" +#include "ram.h" + +static const struct nvkm_fb_func +ga102_fb = { + .dtor = gf100_fb_dtor, + .oneinit = gf100_fb_oneinit, + .init = gp100_fb_init, + .init_page = gv100_fb_init_page, + .init_unkn = gp100_fb_init_unkn, + .ram_new = ga102_ram_new, + .default_bigpage = 16, +}; + +int +ga102_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb) +{ + return gp102_fb_new_(&ga102_fb, device, index, pfb); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c index 10ff5d053f7ea..feda86a5fba85 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c @@ -22,7 +22,7 @@ #include "gf100.h" #include "ram.h" -static int +int gv100_fb_init_page(struct nvkm_fb *fb) { return (fb->page == 16) ? 0 : -EINVAL; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h index 5be9c563350d7..66932ac10d15c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h @@ -82,4 +82,6 @@ int gp102_fb_new_(const struct nvkm_fb_func *, struct nvkm_device *, int, struct nvkm_fb **); bool gp102_fb_vpr_scrub_required(struct nvkm_fb *); int gp102_fb_vpr_scrub(struct nvkm_fb *); + +int gv100_fb_init_page(struct nvkm_fb *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h index d723a9b4e3c47..ea7d66f3dd825 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h @@ -70,4 +70,5 @@ int gk104_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gm107_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gm200_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gp100_ram_new(struct nvkm_fb *, struct nvkm_ram **); +int ga102_ram_new(struct nvkm_fb *, struct nvkm_ram **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c new file mode 100644 index 0000000000000..298c136cefe0c --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c @@ -0,0 +1,40 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ram.h" + +#include +#include +#include + +static const struct nvkm_ram_func +ga102_ram = { +}; + +int +ga102_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) +{ + struct nvkm_device *device = fb->subdev.device; + enum nvkm_ram_type type = nvkm_fb_bios_memtype(device->bios); + u32 size = nvkm_rd32(device, 0x1183a4); + + return nvkm_ram_new_(&ga102_ram, fb, type, (u64)size << 20, pram); +} -- GitLab From 6f300e0a0ba8873f1225959089f8bb2897d93ec6 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0817/2012] drm/nouveau/timer/ga10[024]: initial support Appears to be compatible with GK20A code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 4a5d43fe3f3ab..6a1920965c019 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2662,6 +2662,7 @@ nv170_chipset = { .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, + .timer = gk20a_timer_new, }; static const struct nvkm_device_chip @@ -2674,6 +2675,7 @@ nv172_chipset = { .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, + .timer = gk20a_timer_new, }; static const struct nvkm_device_chip @@ -2686,6 +2688,7 @@ nv174_chipset = { .imem = nv50_instmem_new, .mc = ga100_mc_new, .pci = gp100_pci_new, + .timer = gk20a_timer_new, }; static int -- GitLab From a3abc23ac40111c76708119013d63451169e7838 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0818/2012] drm/nouveau/mmu/ga10[024]: initial support Appears to be compatible with TU102 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 6a1920965c019..ad4cece038b47 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2661,6 +2661,7 @@ nv170_chipset = { .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, + .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, }; @@ -2674,6 +2675,7 @@ nv172_chipset = { .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, + .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, }; @@ -2687,6 +2689,7 @@ nv174_chipset = { .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, + .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, }; -- GitLab From f5cbe7c8bd1ac6f8c91179de381e10ee5f0f8809 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0819/2012] drm/nouveau/bar/ga10[024]: initial support Appears to be compatible with TU102 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index ad4cece038b47..c9cc08ba14443 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2655,6 +2655,7 @@ nv168_chipset = { static const struct nvkm_device_chip nv170_chipset = { .name = "GA100", + .bar = tu102_bar_new, .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga100_fb_new, @@ -2669,6 +2670,7 @@ nv170_chipset = { static const struct nvkm_device_chip nv172_chipset = { .name = "GA102", + .bar = tu102_bar_new, .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga102_fb_new, @@ -2683,6 +2685,7 @@ nv172_chipset = { static const struct nvkm_device_chip nv174_chipset = { .name = "GA104", + .bar = tu102_bar_new, .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga102_fb_new, -- GitLab From c28efb15f9e51a96c6bce2b92c0f3a4da87db877 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0820/2012] drm/nouveau/gpio/ga10[024]: initial support GA100 appears to be compatible with GK104 code, the others have some register moves. Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/subdev/gpio.h | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 3 + .../gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c | 118 ++++++++++++++++++ 4 files changed, 123 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h index eaacf8d80527c..cdcce5ece6ff5 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h @@ -37,4 +37,5 @@ int nv50_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **); int g94_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **); int gf119_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **); int gk104_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **); +int ga102_gpio_new(struct nvkm_device *, int, struct nvkm_gpio **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index c9cc08ba14443..20d947808ef77 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2659,6 +2659,7 @@ nv170_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga100_fb_new, + .gpio = gk104_gpio_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2674,6 +2675,7 @@ nv172_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga102_fb_new, + .gpio = ga102_gpio_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2689,6 +2691,7 @@ nv174_chipset = { .bios = nvkm_bios_new, .devinit = ga100_devinit_new, .fb = ga102_fb_new, + .gpio = ga102_gpio_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild index b2ad5922a1c2d..efbbaa080de51 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/Kbuild @@ -5,3 +5,4 @@ nvkm-y += nvkm/subdev/gpio/nv50.o nvkm-y += nvkm/subdev/gpio/g94.o nvkm-y += nvkm/subdev/gpio/gf119.o nvkm-y += nvkm/subdev/gpio/gk104.o +nvkm-y += nvkm/subdev/gpio/ga102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c new file mode 100644 index 0000000000000..62c791baf4008 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/ga102.c @@ -0,0 +1,118 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" + +static void +ga102_gpio_reset(struct nvkm_gpio *gpio, u8 match) +{ + struct nvkm_device *device = gpio->subdev.device; + struct nvkm_bios *bios = device->bios; + u8 ver, len; + u16 entry; + int ent = -1; + + while ((entry = dcb_gpio_entry(bios, 0, ++ent, &ver, &len))) { + u32 data = nvbios_rd32(bios, entry); + u8 line = (data & 0x0000003f); + u8 defs = !!(data & 0x00000080); + u8 func = (data & 0x0000ff00) >> 8; + u8 unk0 = (data & 0x00ff0000) >> 16; + u8 unk1 = (data & 0x1f000000) >> 24; + + if ( func == DCB_GPIO_UNUSED || + (match != DCB_GPIO_UNUSED && match != func)) + continue; + + nvkm_gpio_set(gpio, 0, func, line, defs); + + nvkm_mask(device, 0x021200 + (line * 4), 0xff, unk0); + if (unk1--) + nvkm_mask(device, 0x00d740 + (unk1 * 4), 0xff, line); + } +} + +static int +ga102_gpio_drive(struct nvkm_gpio *gpio, int line, int dir, int out) +{ + struct nvkm_device *device = gpio->subdev.device; + u32 data = ((dir ^ 1) << 13) | (out << 12); + nvkm_mask(device, 0x021200 + (line * 4), 0x00003000, data); + nvkm_mask(device, 0x00d604, 0x00000001, 0x00000001); /* update? */ + return 0; +} + +static int +ga102_gpio_sense(struct nvkm_gpio *gpio, int line) +{ + struct nvkm_device *device = gpio->subdev.device; + return !!(nvkm_rd32(device, 0x021200 + (line * 4)) & 0x00004000); +} + +static void +ga102_gpio_intr_stat(struct nvkm_gpio *gpio, u32 *hi, u32 *lo) +{ + struct nvkm_device *device = gpio->subdev.device; + u32 intr0 = nvkm_rd32(device, 0x021640); + u32 intr1 = nvkm_rd32(device, 0x02164c); + u32 stat0 = nvkm_rd32(device, 0x021648) & intr0; + u32 stat1 = nvkm_rd32(device, 0x021654) & intr1; + *lo = (stat1 & 0xffff0000) | (stat0 >> 16); + *hi = (stat1 << 16) | (stat0 & 0x0000ffff); + nvkm_wr32(device, 0x021640, intr0); + nvkm_wr32(device, 0x02164c, intr1); +} + +static void +ga102_gpio_intr_mask(struct nvkm_gpio *gpio, u32 type, u32 mask, u32 data) +{ + struct nvkm_device *device = gpio->subdev.device; + u32 inte0 = nvkm_rd32(device, 0x021648); + u32 inte1 = nvkm_rd32(device, 0x021654); + if (type & NVKM_GPIO_LO) + inte0 = (inte0 & ~(mask << 16)) | (data << 16); + if (type & NVKM_GPIO_HI) + inte0 = (inte0 & ~(mask & 0xffff)) | (data & 0xffff); + mask >>= 16; + data >>= 16; + if (type & NVKM_GPIO_LO) + inte1 = (inte1 & ~(mask << 16)) | (data << 16); + if (type & NVKM_GPIO_HI) + inte1 = (inte1 & ~mask) | data; + nvkm_wr32(device, 0x021648, inte0); + nvkm_wr32(device, 0x021654, inte1); +} + +static const struct nvkm_gpio_func +ga102_gpio = { + .lines = 32, + .intr_stat = ga102_gpio_intr_stat, + .intr_mask = ga102_gpio_intr_mask, + .drive = ga102_gpio_drive, + .sense = ga102_gpio_sense, + .reset = ga102_gpio_reset, +}; + +int +ga102_gpio_new(struct nvkm_device *device, int index, struct nvkm_gpio **pgpio) +{ + return nvkm_gpio_new_(&ga102_gpio, device, index, pgpio); +} -- GitLab From 8a0412265f06490d93724bf8badf220180790ad1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0821/2012] drm/nouveau/i2c/ga10[024]: initial support Appears to be compatible with GM200 code. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 20d947808ef77..946b0001ca548 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2660,6 +2660,7 @@ nv170_chipset = { .devinit = ga100_devinit_new, .fb = ga100_fb_new, .gpio = gk104_gpio_new, + .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2676,6 +2677,7 @@ nv172_chipset = { .devinit = ga100_devinit_new, .fb = ga102_fb_new, .gpio = ga102_gpio_new, + .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, @@ -2692,6 +2694,7 @@ nv174_chipset = { .devinit = ga100_devinit_new, .fb = ga102_fb_new, .gpio = ga102_gpio_new, + .i2c = gm200_i2c_new, .ibus = gm200_ibus_new, .imem = nv50_instmem_new, .mc = ga100_mc_new, -- GitLab From a6cf0320aad0c69a6b558dd41d3cb6891a6c9872 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0822/2012] drm/nouveau/dmaobj/ga10[24]: initial support Appears to be compatible with GV100 code, and not required on GA100, as it shouldn't have display. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 946b0001ca548..bea3daf085700 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2684,6 +2684,7 @@ nv172_chipset = { .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, + .dma = gv100_dma_new, }; static const struct nvkm_device_chip @@ -2701,6 +2702,7 @@ nv174_chipset = { .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, + .dma = gv100_dma_new, }; static int -- GitLab From 8ef23b6f6a79e6fa2a169081d2d76011fffa0482 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 13 Jan 2021 17:12:52 +1000 Subject: [PATCH 0823/2012] drm/nouveau/disp/ga10[24]: initial support UEFI/RM no longer use IED scripts from the VBIOS, though they appear to have been updated for use by the x86 VBIOS code, so we should be able to continue using them for the moment. Unfortunately, we require some hacks to do so, as the BeforeLinkTraining IED script became a pointer to an array of scripts instead, without a revbump of the relevant tables. There's also some changes to SOR clock divider fiddling, which are hopefully correct enough that things work as they should. AFAIK, GA100 shouldn't have display, so it hasn't been added. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/Kbuild | 1 + drivers/gpu/drm/nouveau/dispnv50/core.c | 1 + drivers/gpu/drm/nouveau/dispnv50/curs.c | 1 + drivers/gpu/drm/nouveau/dispnv50/wimm.c | 1 + drivers/gpu/drm/nouveau/dispnv50/wndw.c | 1 + drivers/gpu/drm/nouveau/dispnv50/wndw.h | 8 + drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c | 10 +- drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c | 106 +++++++++++++ drivers/gpu/drm/nouveau/include/nvif/class.h | 5 + .../drm/nouveau/include/nvkm/engine/disp.h | 1 + drivers/gpu/drm/nouveau/nvif/disp.c | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 2 + .../gpu/drm/nouveau/nvkm/engine/disp/Kbuild | 3 + drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c | 33 ++++- .../gpu/drm/nouveau/nvkm/engine/disp/ga102.c | 46 ++++++ .../gpu/drm/nouveau/nvkm/engine/disp/ior.h | 4 + .../gpu/drm/nouveau/nvkm/engine/disp/nv50.h | 2 + .../drm/nouveau/nvkm/engine/disp/rootga102.c | 52 +++++++ .../drm/nouveau/nvkm/engine/disp/rootnv50.h | 1 + .../drm/nouveau/nvkm/engine/disp/sorga102.c | 140 ++++++++++++++++++ .../drm/nouveau/nvkm/engine/disp/sortu102.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/disp/tu102.c | 2 +- 22 files changed, 410 insertions(+), 13 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c diff --git a/drivers/gpu/drm/nouveau/dispnv50/Kbuild b/drivers/gpu/drm/nouveau/dispnv50/Kbuild index 6fdddb266fb1b..4488e1c061b3d 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/Kbuild +++ b/drivers/gpu/drm/nouveau/dispnv50/Kbuild @@ -37,6 +37,7 @@ nouveau-y += dispnv50/wimmc37b.o nouveau-y += dispnv50/wndw.o nouveau-y += dispnv50/wndwc37e.o nouveau-y += dispnv50/wndwc57e.o +nouveau-y += dispnv50/wndwc67e.o nouveau-y += dispnv50/base.o nouveau-y += dispnv50/base507c.o diff --git a/drivers/gpu/drm/nouveau/dispnv50/core.c b/drivers/gpu/drm/nouveau/dispnv50/core.c index 27ea3f34706d4..abefc2343443b 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/core.c +++ b/drivers/gpu/drm/nouveau/dispnv50/core.c @@ -42,6 +42,7 @@ nv50_core_new(struct nouveau_drm *drm, struct nv50_core **pcore) int version; int (*new)(struct nouveau_drm *, s32, struct nv50_core **); } cores[] = { + { GA102_DISP_CORE_CHANNEL_DMA, 0, corec57d_new }, { TU102_DISP_CORE_CHANNEL_DMA, 0, corec57d_new }, { GV100_DISP_CORE_CHANNEL_DMA, 0, corec37d_new }, { GP102_DISP_CORE_CHANNEL_DMA, 0, core917d_new }, diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs.c b/drivers/gpu/drm/nouveau/dispnv50/curs.c index 121c24a18f111..31d8b2e4791dd 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/curs.c +++ b/drivers/gpu/drm/nouveau/dispnv50/curs.c @@ -31,6 +31,7 @@ nv50_curs_new(struct nouveau_drm *drm, int head, struct nv50_wndw **pwndw) int version; int (*new)(struct nouveau_drm *, int, s32, struct nv50_wndw **); } curses[] = { + { GA102_DISP_CURSOR, 0, cursc37a_new }, { TU102_DISP_CURSOR, 0, cursc37a_new }, { GV100_DISP_CURSOR, 0, cursc37a_new }, { GK104_DISP_CURSOR, 0, curs907a_new }, diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimm.c b/drivers/gpu/drm/nouveau/dispnv50/wimm.c index a1ac153d5e984..566fbddfc8d7f 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wimm.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wimm.c @@ -31,6 +31,7 @@ nv50_wimm_init(struct nouveau_drm *drm, struct nv50_wndw *wndw) int version; int (*init)(struct nouveau_drm *, s32, struct nv50_wndw *); } wimms[] = { + { GA102_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init }, { TU102_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init }, { GV100_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init }, {} diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 0356474ad6f6a..ce451242f79eb 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -784,6 +784,7 @@ nv50_wndw_new(struct nouveau_drm *drm, enum drm_plane_type type, int index, int (*new)(struct nouveau_drm *, enum drm_plane_type, int, s32, struct nv50_wndw **); } wndws[] = { + { GA102_DISP_WINDOW_CHANNEL_DMA, 0, wndwc67e_new }, { TU102_DISP_WINDOW_CHANNEL_DMA, 0, wndwc57e_new }, { GV100_DISP_WINDOW_CHANNEL_DMA, 0, wndwc37e_new }, {} diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h index 3278e28800343..f4e0c50800344 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.h +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h @@ -129,6 +129,14 @@ int wndwc37e_update(struct nv50_wndw *, u32 *); int wndwc57e_new(struct nouveau_drm *, enum drm_plane_type, int, s32, struct nv50_wndw **); +bool wndwc57e_ilut(struct nv50_wndw *, struct nv50_wndw_atom *, int); +int wndwc57e_ilut_set(struct nv50_wndw *, struct nv50_wndw_atom *); +int wndwc57e_ilut_clr(struct nv50_wndw *); +int wndwc57e_csc_set(struct nv50_wndw *, struct nv50_wndw_atom *); +int wndwc57e_csc_clr(struct nv50_wndw *); + +int wndwc67e_new(struct nouveau_drm *, enum drm_plane_type, int, s32, + struct nv50_wndw **); int nv50_wndw_new(struct nouveau_drm *, enum drm_plane_type, int index, struct nv50_wndw **); diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c b/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c index 429be0bb02220..abdd3bb658b38 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c @@ -80,7 +80,7 @@ wndwc57e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) return 0; } -static int +int wndwc57e_csc_clr(struct nv50_wndw *wndw) { struct nvif_push *push = wndw->wndw.push; @@ -98,7 +98,7 @@ wndwc57e_csc_clr(struct nv50_wndw *wndw) return 0; } -static int +int wndwc57e_csc_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) { struct nvif_push *push = wndw->wndw.push; @@ -111,7 +111,7 @@ wndwc57e_csc_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) return 0; } -static int +int wndwc57e_ilut_clr(struct nv50_wndw *wndw) { struct nvif_push *push = wndw->wndw.push; @@ -124,7 +124,7 @@ wndwc57e_ilut_clr(struct nv50_wndw *wndw) return 0; } -static int +int wndwc57e_ilut_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) { struct nvif_push *push = wndw->wndw.push; @@ -179,7 +179,7 @@ wndwc57e_ilut_load(struct drm_color_lut *in, int size, void __iomem *mem) writew(readw(mem - 4), mem + 4); } -static bool +bool wndwc57e_ilut(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw, int size) { if (size = size ? size : 1024, size != 256 && size != 1024) diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c b/drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c new file mode 100644 index 0000000000000..7a370fa1df20c --- /dev/null +++ b/drivers/gpu/drm/nouveau/dispnv50/wndwc67e.c @@ -0,0 +1,106 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "wndw.h" +#include "atom.h" + +#include + +#include + +static int +wndwc67e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) +{ + struct nvif_push *push = wndw->wndw.push; + int ret; + + if ((ret = PUSH_WAIT(push, 17))) + return ret; + + PUSH_MTHD(push, NVC57E, SET_PRESENT_CONTROL, + NVVAL(NVC57E, SET_PRESENT_CONTROL, MIN_PRESENT_INTERVAL, asyw->image.interval) | + NVVAL(NVC57E, SET_PRESENT_CONTROL, BEGIN_MODE, asyw->image.mode) | + NVDEF(NVC57E, SET_PRESENT_CONTROL, TIMESTAMP_MODE, DISABLE)); + + PUSH_MTHD(push, NVC57E, SET_SIZE, + NVVAL(NVC57E, SET_SIZE, WIDTH, asyw->image.w) | + NVVAL(NVC57E, SET_SIZE, HEIGHT, asyw->image.h), + + SET_STORAGE, + NVVAL(NVC57E, SET_STORAGE, BLOCK_HEIGHT, asyw->image.blockh), + + SET_PARAMS, + NVVAL(NVC57E, SET_PARAMS, FORMAT, asyw->image.format) | + NVDEF(NVC57E, SET_PARAMS, CLAMP_BEFORE_BLEND, DISABLE) | + NVDEF(NVC57E, SET_PARAMS, SWAP_UV, DISABLE) | + NVDEF(NVC57E, SET_PARAMS, FMT_ROUNDING_MODE, ROUND_TO_NEAREST), + + SET_PLANAR_STORAGE(0), + NVVAL(NVC57E, SET_PLANAR_STORAGE, PITCH, asyw->image.blocks[0]) | + NVVAL(NVC57E, SET_PLANAR_STORAGE, PITCH, asyw->image.pitch[0] >> 6)); + + PUSH_MTHD(push, NVC57E, SET_CONTEXT_DMA_ISO(0), asyw->image.handle, 1); + PUSH_MTHD(push, NVC57E, SET_OFFSET(0), asyw->image.offset[0] >> 8); + + PUSH_MTHD(push, NVC57E, SET_POINT_IN(0), + NVVAL(NVC57E, SET_POINT_IN, X, asyw->state.src_x >> 16) | + NVVAL(NVC57E, SET_POINT_IN, Y, asyw->state.src_y >> 16)); + + PUSH_MTHD(push, NVC57E, SET_SIZE_IN, + NVVAL(NVC57E, SET_SIZE_IN, WIDTH, asyw->state.src_w >> 16) | + NVVAL(NVC57E, SET_SIZE_IN, HEIGHT, asyw->state.src_h >> 16)); + + PUSH_MTHD(push, NVC57E, SET_SIZE_OUT, + NVVAL(NVC57E, SET_SIZE_OUT, WIDTH, asyw->state.crtc_w) | + NVVAL(NVC57E, SET_SIZE_OUT, HEIGHT, asyw->state.crtc_h)); + return 0; +} + +static const struct nv50_wndw_func +wndwc67e = { + .acquire = wndwc37e_acquire, + .release = wndwc37e_release, + .sema_set = wndwc37e_sema_set, + .sema_clr = wndwc37e_sema_clr, + .ntfy_set = wndwc37e_ntfy_set, + .ntfy_clr = wndwc37e_ntfy_clr, + .ntfy_reset = corec37d_ntfy_init, + .ntfy_wait_begun = base507c_ntfy_wait_begun, + .ilut = wndwc57e_ilut, + .ilut_identity = true, + .ilut_size = 1024, + .xlut_set = wndwc57e_ilut_set, + .xlut_clr = wndwc57e_ilut_clr, + .csc = base907c_csc, + .csc_set = wndwc57e_csc_set, + .csc_clr = wndwc57e_csc_clr, + .image_set = wndwc67e_image_set, + .image_clr = wndwc37e_image_clr, + .blend_set = wndwc37e_blend_set, + .update = wndwc37e_update, +}; + +int +wndwc67e_new(struct nouveau_drm *drm, enum drm_plane_type type, int index, + s32 oclass, struct nv50_wndw **pwndw) +{ + return wndwc37e_new_(&wndwc67e, drm, type, index, oclass, BIT(index >> 1), pwndw); +} diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h index 2c79beb41126f..ba2c28ea43d20 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/class.h +++ b/drivers/gpu/drm/nouveau/include/nvif/class.h @@ -88,6 +88,7 @@ #define GP102_DISP /* cl5070.h */ 0x00009870 #define GV100_DISP /* cl5070.h */ 0x0000c370 #define TU102_DISP /* cl5070.h */ 0x0000c570 +#define GA102_DISP /* cl5070.h */ 0x0000c670 #define GV100_DISP_CAPS 0x0000c373 @@ -103,6 +104,7 @@ #define GK104_DISP_CURSOR /* cl507a.h */ 0x0000917a #define GV100_DISP_CURSOR /* cl507a.h */ 0x0000c37a #define TU102_DISP_CURSOR /* cl507a.h */ 0x0000c57a +#define GA102_DISP_CURSOR /* cl507a.h */ 0x0000c67a #define NV50_DISP_OVERLAY /* cl507b.h */ 0x0000507b #define G82_DISP_OVERLAY /* cl507b.h */ 0x0000827b @@ -112,6 +114,7 @@ #define GV100_DISP_WINDOW_IMM_CHANNEL_DMA /* clc37b.h */ 0x0000c37b #define TU102_DISP_WINDOW_IMM_CHANNEL_DMA /* clc37b.h */ 0x0000c57b +#define GA102_DISP_WINDOW_IMM_CHANNEL_DMA /* clc37b.h */ 0x0000c67b #define NV50_DISP_BASE_CHANNEL_DMA /* cl507c.h */ 0x0000507c #define G82_DISP_BASE_CHANNEL_DMA /* cl507c.h */ 0x0000827c @@ -135,6 +138,7 @@ #define GP102_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000987d #define GV100_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000c37d #define TU102_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000c57d +#define GA102_DISP_CORE_CHANNEL_DMA /* cl507d.h */ 0x0000c67d #define NV50_DISP_OVERLAY_CHANNEL_DMA /* cl507e.h */ 0x0000507e #define G82_DISP_OVERLAY_CHANNEL_DMA /* cl507e.h */ 0x0000827e @@ -145,6 +149,7 @@ #define GV100_DISP_WINDOW_CHANNEL_DMA /* clc37e.h */ 0x0000c37e #define TU102_DISP_WINDOW_CHANNEL_DMA /* clc37e.h */ 0x0000c57e +#define GA102_DISP_WINDOW_CHANNEL_DMA /* clc37e.h */ 0x0000c67e #define NV50_TESLA 0x00005097 #define G82_TESLA 0x00008297 diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h index 5a96c942d912f..0f6fa6631a197 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h @@ -37,4 +37,5 @@ int gp100_disp_new(struct nvkm_device *, int, struct nvkm_disp **); int gp102_disp_new(struct nvkm_device *, int, struct nvkm_disp **); int gv100_disp_new(struct nvkm_device *, int, struct nvkm_disp **); int tu102_disp_new(struct nvkm_device *, int, struct nvkm_disp **); +int ga102_disp_new(struct nvkm_device *, int, struct nvkm_disp **); #endif diff --git a/drivers/gpu/drm/nouveau/nvif/disp.c b/drivers/gpu/drm/nouveau/nvif/disp.c index 8d0d30e08f57e..529cb60d5efb0 100644 --- a/drivers/gpu/drm/nouveau/nvif/disp.c +++ b/drivers/gpu/drm/nouveau/nvif/disp.c @@ -35,6 +35,7 @@ nvif_disp_ctor(struct nvif_device *device, const char *name, s32 oclass, struct nvif_disp *disp) { static const struct nvif_mclass disps[] = { + { GA102_DISP, -1 }, { TU102_DISP, -1 }, { GV100_DISP, -1 }, { GP102_DISP, -1 }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index bea3daf085700..cdcc851e06f9b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2684,6 +2684,7 @@ nv172_chipset = { .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, + .disp = ga102_disp_new, .dma = gv100_dma_new, }; @@ -2702,6 +2703,7 @@ nv174_chipset = { .mmu = tu102_mmu_new, .pci = gp100_pci_new, .timer = gk20a_timer_new, + .disp = ga102_disp_new, .dma = gv100_dma_new, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild index cf075311cdd27..b03f043efe261 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild @@ -17,6 +17,7 @@ nvkm-y += nvkm/engine/disp/gp100.o nvkm-y += nvkm/engine/disp/gp102.o nvkm-y += nvkm/engine/disp/gv100.o nvkm-y += nvkm/engine/disp/tu102.o +nvkm-y += nvkm/engine/disp/ga102.o nvkm-y += nvkm/engine/disp/vga.o nvkm-y += nvkm/engine/disp/head.o @@ -42,6 +43,7 @@ nvkm-y += nvkm/engine/disp/sorgm200.o nvkm-y += nvkm/engine/disp/sorgp100.o nvkm-y += nvkm/engine/disp/sorgv100.o nvkm-y += nvkm/engine/disp/sortu102.o +nvkm-y += nvkm/engine/disp/sorga102.o nvkm-y += nvkm/engine/disp/outp.o nvkm-y += nvkm/engine/disp/dp.o @@ -75,6 +77,7 @@ nvkm-y += nvkm/engine/disp/rootgp100.o nvkm-y += nvkm/engine/disp/rootgp102.o nvkm-y += nvkm/engine/disp/rootgv100.o nvkm-y += nvkm/engine/disp/roottu102.o +nvkm-y += nvkm/engine/disp/rootga102.o nvkm-y += nvkm/engine/disp/capsgv100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c index 3800aeb507d01..55fbfe28c6dc1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c @@ -33,6 +33,12 @@ #include +/* IED scripts are no longer used by UEFI/RM from Ampere, but have been updated for + * the x86 option ROM. However, the relevant VBIOS table versions weren't modified, + * so we're unable to detect this in a nice way. + */ +#define AMPERE_IED_HACK(disp) ((disp)->engine.subdev.device->card_type >= GA100) + struct lt_state { struct nvkm_dp *dp; u8 stat[6]; @@ -238,6 +244,19 @@ nvkm_dp_train_links(struct nvkm_dp *dp) dp->dpcd[DPCD_RC02] &= ~DPCD_RC02_TPS3_SUPPORTED; lt.pc2 = dp->dpcd[DPCD_RC02] & DPCD_RC02_TPS3_SUPPORTED; + if (AMPERE_IED_HACK(disp) && (lnkcmp = lt.dp->info.script[0])) { + /* Execute BeforeLinkTraining script from DP Info table. */ + while (ior->dp.bw < nvbios_rd08(bios, lnkcmp)) + lnkcmp += 3; + lnkcmp = nvbios_rd16(bios, lnkcmp + 1); + + nvbios_init(&dp->outp.disp->engine.subdev, lnkcmp, + init.outp = &dp->outp.info; + init.or = ior->id; + init.link = ior->asy.link; + ); + } + /* Set desired link configuration on the source. */ if ((lnkcmp = lt.dp->info.lnkcmp)) { if (dp->version < 0x30) { @@ -316,12 +335,14 @@ nvkm_dp_train_init(struct nvkm_dp *dp) ); } - /* Execute BeforeLinkTraining script from DP Info table. */ - nvbios_init(&dp->outp.disp->engine.subdev, dp->info.script[0], - init.outp = &dp->outp.info; - init.or = dp->outp.ior->id; - init.link = dp->outp.ior->asy.link; - ); + if (!AMPERE_IED_HACK(dp->outp.disp)) { + /* Execute BeforeLinkTraining script from DP Info table. */ + nvbios_init(&dp->outp.disp->engine.subdev, dp->info.script[0], + init.outp = &dp->outp.info; + init.or = dp->outp.ior->id; + init.link = dp->outp.ior->asy.link; + ); + } } static const struct dp_rates { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c new file mode 100644 index 0000000000000..aa2e5645fe365 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c @@ -0,0 +1,46 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "nv50.h" +#include "head.h" +#include "ior.h" +#include "channv50.h" +#include "rootnv50.h" + +static const struct nv50_disp_func +ga102_disp = { + .init = tu102_disp_init, + .fini = gv100_disp_fini, + .intr = gv100_disp_intr, + .uevent = &gv100_disp_chan_uevent, + .super = gv100_disp_super, + .root = &ga102_disp_root_oclass, + .wndw = { .cnt = gv100_disp_wndw_cnt }, + .head = { .cnt = gv100_head_cnt, .new = gv100_head_new }, + .sor = { .cnt = gv100_sor_cnt, .new = ga102_sor_new }, + .ramht_size = 0x2000, +}; + +int +ga102_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp) +{ + return nv50_disp_new_(&ga102_disp, device, index, pdisp); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h index 09f3038eff26f..9f0bb7c6b0100 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h @@ -150,6 +150,8 @@ void gv100_sor_dp_audio(struct nvkm_ior *, int, bool); void gv100_sor_dp_audio_sym(struct nvkm_ior *, int, u16, u32); void gv100_sor_dp_watermark(struct nvkm_ior *, int, u8); +void tu102_sor_dp_vcpi(struct nvkm_ior *, int, u8, u8, u16, u16); + void g84_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8); void gt215_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8); void gf119_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8); @@ -207,4 +209,6 @@ int gv100_sor_cnt(struct nvkm_disp *, unsigned long *); int gv100_sor_new(struct nvkm_disp *, int); int tu102_sor_new(struct nvkm_disp *, int); + +int ga102_sor_new(struct nvkm_disp *, int); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h index a677161c7f3a6..db31b37752a27 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h @@ -86,6 +86,8 @@ void gv100_disp_intr(struct nv50_disp *); void gv100_disp_super(struct work_struct *); int gv100_disp_wndw_cnt(struct nvkm_disp *, unsigned long *); +int tu102_disp_init(struct nv50_disp *); + void nv50_disp_dptmds_war_2(struct nv50_disp *, struct dcb_output *); void nv50_disp_dptmds_war_3(struct nv50_disp *, struct dcb_output *); void nv50_disp_update_sppll1(struct nv50_disp *); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c new file mode 100644 index 0000000000000..9af07c3cf9fc0 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootga102.c @@ -0,0 +1,52 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "rootnv50.h" +#include "channv50.h" + +#include + +static const struct nv50_disp_root_func +ga102_disp_root = { + .user = { + {{-1,-1,GV100_DISP_CAPS }, gv100_disp_caps_new }, + {{0,0,GA102_DISP_CURSOR }, gv100_disp_curs_new }, + {{0,0,GA102_DISP_WINDOW_IMM_CHANNEL_DMA}, gv100_disp_wimm_new }, + {{0,0,GA102_DISP_CORE_CHANNEL_DMA }, gv100_disp_core_new }, + {{0,0,GA102_DISP_WINDOW_CHANNEL_DMA }, gv100_disp_wndw_new }, + {} + }, +}; + +static int +ga102_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass, + void *data, u32 size, struct nvkm_object **pobject) +{ + return nv50_disp_root_new_(&ga102_disp_root, disp, oclass, data, size, pobject); +} + +const struct nvkm_disp_oclass +ga102_disp_root_oclass = { + .base.oclass = GA102_DISP, + .base.minver = -1, + .base.maxver = -1, + .ctor = ga102_disp_root_new, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h index 7070f5408d92b..27bb170d02930 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h @@ -41,4 +41,5 @@ extern const struct nvkm_disp_oclass gp100_disp_root_oclass; extern const struct nvkm_disp_oclass gp102_disp_root_oclass; extern const struct nvkm_disp_oclass gv100_disp_root_oclass; extern const struct nvkm_disp_oclass tu102_disp_root_oclass; +extern const struct nvkm_disp_oclass ga102_disp_root_oclass; #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c new file mode 100644 index 0000000000000..033827de91162 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorga102.c @@ -0,0 +1,140 @@ +/* + * Copyright 2021 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ior.h" + +#include + +static int +ga102_sor_dp_links(struct nvkm_ior *sor, struct nvkm_i2c_aux *aux) +{ + struct nvkm_device *device = sor->disp->engine.subdev.device; + const u32 soff = nv50_ior_base(sor); + const u32 loff = nv50_sor_link(sor); + u32 dpctrl = 0x00000000; + u32 clksor = 0x00000000; + + switch (sor->dp.bw) { + case 0x06: clksor |= 0x00000000; break; + case 0x0a: clksor |= 0x00040000; break; + case 0x14: clksor |= 0x00080000; break; + case 0x1e: clksor |= 0x000c0000; break; + default: + WARN_ON(1); + return -EINVAL; + } + + dpctrl |= ((1 << sor->dp.nr) - 1) << 16; + if (sor->dp.mst) + dpctrl |= 0x40000000; + if (sor->dp.ef) + dpctrl |= 0x00004000; + + nvkm_mask(device, 0x612300 + soff, 0x007c0000, clksor); + + /*XXX*/ + nvkm_msec(device, 40, NVKM_DELAY); + nvkm_mask(device, 0x612300 + soff, 0x00030000, 0x00010000); + nvkm_mask(device, 0x61c10c + loff, 0x00000003, 0x00000001); + + nvkm_mask(device, 0x61c10c + loff, 0x401f4000, dpctrl); + return 0; +} + +static void +ga102_sor_clock(struct nvkm_ior *sor) +{ + struct nvkm_device *device = sor->disp->engine.subdev.device; + u32 div2 = 0; + if (sor->asy.proto == TMDS) { + if (sor->tmds.high_speed) + div2 = 1; + } + nvkm_wr32(device, 0x00ec08 + (sor->id * 0x10), 0x00000000); + nvkm_wr32(device, 0x00ec04 + (sor->id * 0x10), div2); +} + +static const struct nvkm_ior_func +ga102_sor_hda = { + .route = { + .get = gm200_sor_route_get, + .set = gm200_sor_route_set, + }, + .state = gv100_sor_state, + .power = nv50_sor_power, + .clock = ga102_sor_clock, + .hdmi = { + .ctrl = gv100_hdmi_ctrl, + .scdc = gm200_hdmi_scdc, + }, + .dp = { + .lanes = { 0, 1, 2, 3 }, + .links = ga102_sor_dp_links, + .power = g94_sor_dp_power, + .pattern = gm107_sor_dp_pattern, + .drive = gm200_sor_dp_drive, + .vcpi = tu102_sor_dp_vcpi, + .audio = gv100_sor_dp_audio, + .audio_sym = gv100_sor_dp_audio_sym, + .watermark = gv100_sor_dp_watermark, + }, + .hda = { + .hpd = gf119_hda_hpd, + .eld = gf119_hda_eld, + .device_entry = gv100_hda_device_entry, + }, +}; + +static const struct nvkm_ior_func +ga102_sor = { + .route = { + .get = gm200_sor_route_get, + .set = gm200_sor_route_set, + }, + .state = gv100_sor_state, + .power = nv50_sor_power, + .clock = ga102_sor_clock, + .hdmi = { + .ctrl = gv100_hdmi_ctrl, + .scdc = gm200_hdmi_scdc, + }, + .dp = { + .lanes = { 0, 1, 2, 3 }, + .links = ga102_sor_dp_links, + .power = g94_sor_dp_power, + .pattern = gm107_sor_dp_pattern, + .drive = gm200_sor_dp_drive, + .vcpi = tu102_sor_dp_vcpi, + .audio = gv100_sor_dp_audio, + .audio_sym = gv100_sor_dp_audio_sym, + .watermark = gv100_sor_dp_watermark, + }, +}; + +int +ga102_sor_new(struct nvkm_disp *disp, int id) +{ + struct nvkm_device *device = disp->engine.subdev.device; + u32 hda = nvkm_rd32(device, 0x08a15c); + if (hda & BIT(id)) + return nvkm_ior_new_(&ga102_sor_hda, disp, SOR, id); + return nvkm_ior_new_(&ga102_sor, disp, SOR, id); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c index 59865a934c4b9..0cf9e8752d258 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c @@ -23,7 +23,7 @@ #include -static void +void tu102_sor_dp_vcpi(struct nvkm_ior *sor, int head, u8 slot, u8 slot_nr, u16 pbn, u16 aligned) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c index 883ae4151ff88..4c85d1d4fbd42 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c @@ -28,7 +28,7 @@ #include #include -static int +int tu102_disp_init(struct nv50_disp *disp) { struct nvkm_device *device = disp->base.engine.subdev.device; -- GitLab From 3a70a6451551c974b1e9237c9ceb04777c83a12e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 13 Jan 2021 20:09:37 -0700 Subject: [PATCH 0824/2012] selftests: Move device validation in nettest Later patch adds support for switching network namespaces before running client, server or both. Device validations need to be done after the network namespace switch, so add a helper to do it and invoke in server and client code versus inline with argument parsing. Move related argument checks as well. Signed-off-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/nettest.c | 64 ++++++++++++++++++--------- 1 file changed, 42 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index f75c53ce0a2d0..2bb06a3e68806 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -84,6 +84,7 @@ struct sock_args { unsigned int prefix_len; /* expected addresses and device index for connection */ + const char *expected_dev; int expected_ifindex; /* local address */ @@ -522,6 +523,33 @@ static int str_to_uint(const char *str, int min, int max, unsigned int *value) return -1; } +static int resolve_devices(struct sock_args *args) +{ + if (args->dev) { + args->ifindex = get_ifidx(args->dev); + if (args->ifindex < 0) { + log_error("Invalid device name\n"); + return 1; + } + } + + if (args->expected_dev) { + unsigned int tmp; + + if (str_to_uint(args->expected_dev, 0, INT_MAX, &tmp) == 0) { + args->expected_ifindex = (int)tmp; + } else { + args->expected_ifindex = get_ifidx(args->expected_dev); + if (args->expected_ifindex < 0) { + fprintf(stderr, "Invalid expected device\n"); + return 1; + } + } + } + + return 0; +} + static int expected_addr_match(struct sockaddr *sa, void *expected, const char *desc) { @@ -1190,6 +1218,9 @@ static int do_server(struct sock_args *args) fd_set rfds; int rc; + if (resolve_devices(args)) + return 1; + if (prog_timeout) ptval = &timeout; @@ -1375,6 +1406,16 @@ static int do_client(struct sock_args *args) return 1; } + if (resolve_devices(args)) + return 1; + + if ((args->use_setsockopt || args->use_cmsg) && !args->ifindex) { + fprintf(stderr, "Device binding not specified\n"); + return 1; + } + if (args->use_setsockopt || args->use_cmsg) + args->dev = NULL; + switch (args->version) { case AF_INET: sin.sin_port = htons(args->port); @@ -1703,11 +1744,6 @@ int main(int argc, char *argv[]) break; case 'd': args.dev = optarg; - args.ifindex = get_ifidx(optarg); - if (args.ifindex < 0) { - fprintf(stderr, "Invalid device name\n"); - return 1; - } break; case 'i': interactive = 1; @@ -1738,16 +1774,7 @@ int main(int argc, char *argv[]) break; case '2': - if (str_to_uint(optarg, 0, INT_MAX, &tmp) == 0) { - args.expected_ifindex = (int)tmp; - } else { - args.expected_ifindex = get_ifidx(optarg); - if (args.expected_ifindex < 0) { - fprintf(stderr, - "Invalid expected device\n"); - return 1; - } - } + args.expected_dev = optarg; break; case 'q': quiet = 1; @@ -1769,13 +1796,6 @@ int main(int argc, char *argv[]) return 1; } - if ((args.use_setsockopt || args.use_cmsg) && !args.ifindex) { - fprintf(stderr, "Device binding not specified\n"); - return 1; - } - if (args.use_setsockopt || args.use_cmsg) - args.dev = NULL; - if (iter == 0) { fprintf(stderr, "Invalid number of messages to send\n"); return 1; -- GitLab From 6fc90e18994c656a18c23a2a79bf8570d6278dce Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 13 Jan 2021 20:09:38 -0700 Subject: [PATCH 0825/2012] selftests: Move convert_addr up in nettest convert_addr needs to be invoked in a different location. Move the code up to avoid a forward declaration. Code move only. Signed-off-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/nettest.c | 252 +++++++++++++------------- 1 file changed, 126 insertions(+), 126 deletions(-) diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 2bb06a3e68806..337ae54e252d8 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -627,6 +627,132 @@ static int show_sockstat(int sd, struct sock_args *args) return rc; } +enum addr_type { + ADDR_TYPE_LOCAL, + ADDR_TYPE_REMOTE, + ADDR_TYPE_MCAST, + ADDR_TYPE_EXPECTED_LOCAL, + ADDR_TYPE_EXPECTED_REMOTE, + ADDR_TYPE_MD5_PREFIX, +}; + +static int convert_addr(struct sock_args *args, const char *_str, + enum addr_type atype) +{ + int pfx_len_max = args->version == AF_INET6 ? 128 : 32; + int family = args->version; + char *str, *dev, *sep; + struct in6_addr *in6; + struct in_addr *in; + const char *desc; + void *addr; + int rc = 0; + + str = strdup(_str); + if (!str) + return -ENOMEM; + + switch (atype) { + case ADDR_TYPE_LOCAL: + desc = "local"; + addr = &args->local_addr; + break; + case ADDR_TYPE_REMOTE: + desc = "remote"; + addr = &args->remote_addr; + break; + case ADDR_TYPE_MCAST: + desc = "mcast grp"; + addr = &args->grp; + break; + case ADDR_TYPE_EXPECTED_LOCAL: + desc = "expected local"; + addr = &args->expected_laddr; + break; + case ADDR_TYPE_EXPECTED_REMOTE: + desc = "expected remote"; + addr = &args->expected_raddr; + break; + case ADDR_TYPE_MD5_PREFIX: + desc = "md5 prefix"; + if (family == AF_INET) { + args->md5_prefix.v4.sin_family = AF_INET; + addr = &args->md5_prefix.v4.sin_addr; + } else if (family == AF_INET6) { + args->md5_prefix.v6.sin6_family = AF_INET6; + addr = &args->md5_prefix.v6.sin6_addr; + } else + return 1; + + sep = strchr(str, '/'); + if (sep) { + *sep = '\0'; + sep++; + if (str_to_uint(sep, 1, pfx_len_max, + &args->prefix_len) != 0) { + fprintf(stderr, "Invalid port\n"); + return 1; + } + } else { + args->prefix_len = pfx_len_max; + } + break; + default: + log_error("unknown address type"); + exit(1); + } + + switch (family) { + case AF_INET: + in = (struct in_addr *) addr; + if (str) { + if (inet_pton(AF_INET, str, in) == 0) { + log_error("Invalid %s IP address\n", desc); + rc = -1; + goto out; + } + } else { + in->s_addr = htonl(INADDR_ANY); + } + break; + + case AF_INET6: + dev = strchr(str, '%'); + if (dev) { + *dev = '\0'; + dev++; + } + + in6 = (struct in6_addr *) addr; + if (str) { + if (inet_pton(AF_INET6, str, in6) == 0) { + log_error("Invalid %s IPv6 address\n", desc); + rc = -1; + goto out; + } + } else { + *in6 = in6addr_any; + } + if (dev) { + args->scope_id = get_ifidx(dev); + if (args->scope_id < 0) { + log_error("Invalid scope on %s IPv6 address\n", + desc); + rc = -1; + goto out; + } + } + break; + + default: + log_error("Invalid address family\n"); + } + +out: + free(str); + return rc; +} + static int get_index_from_cmsg(struct msghdr *m) { struct cmsghdr *cm; @@ -1460,132 +1586,6 @@ out: return rc; } -enum addr_type { - ADDR_TYPE_LOCAL, - ADDR_TYPE_REMOTE, - ADDR_TYPE_MCAST, - ADDR_TYPE_EXPECTED_LOCAL, - ADDR_TYPE_EXPECTED_REMOTE, - ADDR_TYPE_MD5_PREFIX, -}; - -static int convert_addr(struct sock_args *args, const char *_str, - enum addr_type atype) -{ - int pfx_len_max = args->version == AF_INET6 ? 128 : 32; - int family = args->version; - char *str, *dev, *sep; - struct in6_addr *in6; - struct in_addr *in; - const char *desc; - void *addr; - int rc = 0; - - str = strdup(_str); - if (!str) - return -ENOMEM; - - switch (atype) { - case ADDR_TYPE_LOCAL: - desc = "local"; - addr = &args->local_addr; - break; - case ADDR_TYPE_REMOTE: - desc = "remote"; - addr = &args->remote_addr; - break; - case ADDR_TYPE_MCAST: - desc = "mcast grp"; - addr = &args->grp; - break; - case ADDR_TYPE_EXPECTED_LOCAL: - desc = "expected local"; - addr = &args->expected_laddr; - break; - case ADDR_TYPE_EXPECTED_REMOTE: - desc = "expected remote"; - addr = &args->expected_raddr; - break; - case ADDR_TYPE_MD5_PREFIX: - desc = "md5 prefix"; - if (family == AF_INET) { - args->md5_prefix.v4.sin_family = AF_INET; - addr = &args->md5_prefix.v4.sin_addr; - } else if (family == AF_INET6) { - args->md5_prefix.v6.sin6_family = AF_INET6; - addr = &args->md5_prefix.v6.sin6_addr; - } else - return 1; - - sep = strchr(str, '/'); - if (sep) { - *sep = '\0'; - sep++; - if (str_to_uint(sep, 1, pfx_len_max, - &args->prefix_len) != 0) { - fprintf(stderr, "Invalid port\n"); - return 1; - } - } else { - args->prefix_len = pfx_len_max; - } - break; - default: - log_error("unknown address type"); - exit(1); - } - - switch (family) { - case AF_INET: - in = (struct in_addr *) addr; - if (str) { - if (inet_pton(AF_INET, str, in) == 0) { - log_error("Invalid %s IP address\n", desc); - rc = -1; - goto out; - } - } else { - in->s_addr = htonl(INADDR_ANY); - } - break; - - case AF_INET6: - dev = strchr(str, '%'); - if (dev) { - *dev = '\0'; - dev++; - } - - in6 = (struct in6_addr *) addr; - if (str) { - if (inet_pton(AF_INET6, str, in6) == 0) { - log_error("Invalid %s IPv6 address\n", desc); - rc = -1; - goto out; - } - } else { - *in6 = in6addr_any; - } - if (dev) { - args->scope_id = get_ifidx(dev); - if (args->scope_id < 0) { - log_error("Invalid scope on %s IPv6 address\n", - desc); - rc = -1; - goto out; - } - } - break; - - default: - log_error("Invalid address family\n"); - } - -out: - free(str); - return rc; -} - static char *random_msg(int len) { int i, n = 0, olen = len + 1; -- GitLab From f2f575840a598ba9a6685cdafac498be4f118757 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 13 Jan 2021 20:09:39 -0700 Subject: [PATCH 0826/2012] selftests: Move address validation in nettest IPv6 addresses can have a device name to declare a scope (e.g., fe80::5054:ff:fe12:3456%eth0). The next patch adds support to switch network namespace before running client or server code (or both), so move the address validation to the server and client functions. IPv4 multicast groups do not have the device scope in the address specification, so they can be validated inline with option parsing. Signed-off-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/nettest.c | 60 +++++++++++++++++++-------- 1 file changed, 43 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 337ae54e252d8..3b083fad3577b 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -43,12 +43,14 @@ struct sock_args { /* local address */ + const char *local_addr_str; union { struct in_addr in; struct in6_addr in6; } local_addr; /* remote address */ + const char *remote_addr_str; union { struct in_addr in; struct in6_addr in6; @@ -77,6 +79,7 @@ struct sock_args { const char *password; /* prefix for MD5 password */ + const char *md5_prefix_str; union { struct sockaddr_in v4; struct sockaddr_in6 v6; @@ -88,12 +91,14 @@ struct sock_args { int expected_ifindex; /* local address */ + const char *expected_laddr_str; union { struct in_addr in; struct in6_addr in6; } expected_laddr; /* remote address */ + const char *expected_raddr_str; union { struct in_addr in; struct in6_addr in6; @@ -753,6 +758,34 @@ out: return rc; } +static int validate_addresses(struct sock_args *args) +{ + if (args->local_addr_str && + convert_addr(args, args->local_addr_str, ADDR_TYPE_LOCAL) < 0) + return 1; + + if (args->remote_addr_str && + convert_addr(args, args->remote_addr_str, ADDR_TYPE_REMOTE) < 0) + return 1; + + if (args->md5_prefix_str && + convert_addr(args, args->md5_prefix_str, + ADDR_TYPE_MD5_PREFIX) < 0) + return 1; + + if (args->expected_laddr_str && + convert_addr(args, args->expected_laddr_str, + ADDR_TYPE_EXPECTED_LOCAL)) + return 1; + + if (args->expected_raddr_str && + convert_addr(args, args->expected_raddr_str, + ADDR_TYPE_EXPECTED_REMOTE)) + return 1; + + return 0; +} + static int get_index_from_cmsg(struct msghdr *m) { struct cmsghdr *cm; @@ -1344,7 +1377,7 @@ static int do_server(struct sock_args *args) fd_set rfds; int rc; - if (resolve_devices(args)) + if (resolve_devices(args) || validate_addresses(args)) return 1; if (prog_timeout) @@ -1532,7 +1565,7 @@ static int do_client(struct sock_args *args) return 1; } - if (resolve_devices(args)) + if (resolve_devices(args) || validate_addresses(args)) return 1; if ((args->use_setsockopt || args->use_cmsg) && !args->ifindex) { @@ -1680,13 +1713,11 @@ int main(int argc, char *argv[]) break; case 'l': args.has_local_ip = 1; - if (convert_addr(&args, optarg, ADDR_TYPE_LOCAL) < 0) - return 1; + args.local_addr_str = optarg; break; case 'r': args.has_remote_ip = 1; - if (convert_addr(&args, optarg, ADDR_TYPE_REMOTE) < 0) - return 1; + args.remote_addr_str = optarg; break; case 'p': if (str_to_uint(optarg, 1, 65535, &tmp) != 0) { @@ -1733,8 +1764,7 @@ int main(int argc, char *argv[]) args.password = optarg; break; case 'm': - if (convert_addr(&args, optarg, ADDR_TYPE_MD5_PREFIX) < 0) - return 1; + args.md5_prefix_str = optarg; break; case 'S': args.use_setsockopt = 1; @@ -1762,16 +1792,11 @@ int main(int argc, char *argv[]) break; case '0': args.has_expected_laddr = 1; - if (convert_addr(&args, optarg, - ADDR_TYPE_EXPECTED_LOCAL)) - return 1; + args.expected_laddr_str = optarg; break; case '1': args.has_expected_raddr = 1; - if (convert_addr(&args, optarg, - ADDR_TYPE_EXPECTED_REMOTE)) - return 1; - + args.expected_raddr_str = optarg; break; case '2': args.expected_dev = optarg; @@ -1786,12 +1811,13 @@ int main(int argc, char *argv[]) } if (args.password && - ((!args.has_remote_ip && !args.prefix_len) || args.type != SOCK_STREAM)) { + ((!args.has_remote_ip && !args.md5_prefix_str) || + args.type != SOCK_STREAM)) { log_error("MD5 passwords apply to TCP only and require a remote ip for the password\n"); return 1; } - if (args.prefix_len && !args.password) { + if (args.md5_prefix_str && !args.password) { log_error("Prefix range for MD5 protection specified without a password\n"); return 1; } -- GitLab From 092e0ceb12f28450c8db095b5f417fde923abc07 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 13 Jan 2021 20:09:40 -0700 Subject: [PATCH 0827/2012] selftests: Add options to set network namespace to nettest Add options to specify server and client network namespace to use before running respective functions. Signed-off-by: Seth David Schoen Signed-off-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/nettest.c | 56 ++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 3b083fad3577b..cc9635b6461f5 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -34,6 +36,8 @@ #define DEFAULT_PORT 12345 +#define NS_PREFIX "/run/netns/" + #ifndef MAX #define MAX(a, b) ((a) > (b) ? (a) : (b)) #endif @@ -77,6 +81,9 @@ struct sock_args { const char *dev; int ifindex; + const char *clientns; + const char *serverns; + const char *password; /* prefix for MD5 password */ const char *md5_prefix_str; @@ -213,6 +220,27 @@ static void log_address(const char *desc, struct sockaddr *sa) fflush(stdout); } +static int switch_ns(const char *ns) +{ + char path[PATH_MAX]; + int fd, ret; + + if (geteuid()) + log_error("warning: likely need root to set netns %s!\n", ns); + + snprintf(path, sizeof(path), "%s%s", NS_PREFIX, ns); + fd = open(path, 0); + if (fd < 0) { + log_err_errno("Failed to open netns path; can not switch netns"); + return 1; + } + + ret = setns(fd, CLONE_NEWNET); + close(fd); + + return ret; +} + static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args) { int keylen = strlen(args->password); @@ -1377,6 +1405,15 @@ static int do_server(struct sock_args *args) fd_set rfds; int rc; + if (args->serverns) { + if (switch_ns(args->serverns)) { + log_error("Could not set server netns to %s\n", + args->serverns); + return 1; + } + log_msg("Switched server netns\n"); + } + if (resolve_devices(args) || validate_addresses(args)) return 1; @@ -1565,6 +1602,15 @@ static int do_client(struct sock_args *args) return 1; } + if (args->clientns) { + if (switch_ns(args->clientns)) { + log_error("Could not set client netns to %s\n", + args->clientns); + return 1; + } + log_msg("Switched client netns\n"); + } + if (resolve_devices(args) || validate_addresses(args)) return 1; @@ -1642,7 +1688,7 @@ static char *random_msg(int len) return m; } -#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:m:d:SCi6L:0:1:2:Fbq" +#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:m:d:N:O:SCi6L:0:1:2:Fbq" static void print_usage(char *prog) { @@ -1656,6 +1702,8 @@ static void print_usage(char *prog) " -t timeout seconds (default: none)\n" "\n" "Optional:\n" + " -N ns set client to network namespace ns (requires root)\n" + " -O ns set server to network namespace ns (requires root)\n" " -F Restart server loop\n" " -6 IPv6 (default is IPv4)\n" " -P proto protocol for socket: icmp, ospf (default: none)\n" @@ -1757,6 +1805,12 @@ int main(int argc, char *argv[]) case 'n': iter = atoi(optarg); break; + case 'N': + args.clientns = optarg; + break; + case 'O': + args.serverns = optarg; + break; case 'L': msg = random_msg(atoi(optarg)); break; -- GitLab From 6469403c97b486285365c590a8f9eaad9c72f5c5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 13 Jan 2021 20:09:41 -0700 Subject: [PATCH 0828/2012] selftests: Add support to nettest to run both client and server Add option to nettest to run both client and server within a single instance. Client forks a child process to run the server code. A pipe is used for the server to tell the client it has initialized and is ready or had an error. This avoid unnecessary sleeps to handle such race when the commands are separately launched. Signed-off-by: Seth David Schoen Signed-off-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/nettest.c | 93 ++++++++++++++++++++++++--- 1 file changed, 85 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index cc9635b6461f5..685cbe8933ded 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -1395,8 +1395,19 @@ err: return -1; } -static int do_server(struct sock_args *args) +static void ipc_write(int fd, int message) { + /* Not in both_mode, so there's no process to signal */ + if (fd < 0) + return; + + if (write(fd, &message, sizeof(message)) < 0) + log_err_errno("Failed to send client status"); +} + +static int do_server(struct sock_args *args, int ipc_fd) +{ + /* ipc_fd = -1 if no parent process to signal */ struct timeval timeout = { .tv_sec = prog_timeout }, *ptval = NULL; unsigned char addr[sizeof(struct sockaddr_in6)] = {}; socklen_t alen = sizeof(addr); @@ -1409,13 +1420,13 @@ static int do_server(struct sock_args *args) if (switch_ns(args->serverns)) { log_error("Could not set server netns to %s\n", args->serverns); - return 1; + goto err_exit; } log_msg("Switched server netns\n"); } if (resolve_devices(args) || validate_addresses(args)) - return 1; + goto err_exit; if (prog_timeout) ptval = &timeout; @@ -1426,14 +1437,16 @@ static int do_server(struct sock_args *args) lsd = lsock_init(args); if (lsd < 0) - return 1; + goto err_exit; if (args->bind_test_only) { close(lsd); + ipc_write(ipc_fd, 1); return 0; } if (args->type != SOCK_STREAM) { + ipc_write(ipc_fd, 1); rc = msg_loop(0, lsd, (void *) addr, alen, args); close(lsd); return rc; @@ -1441,9 +1454,10 @@ static int do_server(struct sock_args *args) if (args->password && tcp_md5_remote(lsd, args)) { close(lsd); - return 1; + goto err_exit; } + ipc_write(ipc_fd, 1); while (1) { log_msg("\n"); log_msg("waiting for client connection.\n"); @@ -1491,6 +1505,9 @@ static int do_server(struct sock_args *args) close(lsd); return rc; +err_exit: + ipc_write(ipc_fd, 0); + return 1; } static int wait_for_connect(int sd) @@ -1688,7 +1705,43 @@ static char *random_msg(int len) return m; } -#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:m:d:N:O:SCi6L:0:1:2:Fbq" +static int ipc_child(int fd, struct sock_args *args) +{ + server_mode = 1; /* to tell log_msg in case we are in both_mode */ + + return do_server(args, fd); +} + +static int ipc_parent(int cpid, int fd, struct sock_args *args) +{ + int client_status; + int status; + int buf; + + /* do the client-side function here in the parent process, + * waiting to be told when to continue + */ + if (read(fd, &buf, sizeof(buf)) <= 0) { + log_err_errno("Failed to read IPC status from status"); + return 1; + } + if (!buf) { + log_error("Server failed; can not continue\n"); + return 1; + } + log_msg("Server is ready\n"); + + client_status = do_client(args); + log_msg("parent is done!\n"); + + if (kill(cpid, 0) == 0) + kill(cpid, SIGKILL); + + wait(&status); + return client_status; +} + +#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:m:d:BN:O:SCi6L:0:1:2:Fbq" static void print_usage(char *prog) { @@ -1702,6 +1755,7 @@ static void print_usage(char *prog) " -t timeout seconds (default: none)\n" "\n" "Optional:\n" + " -B do both client and server via fork and IPC\n" " -N ns set client to network namespace ns (requires root)\n" " -O ns set server to network namespace ns (requires root)\n" " -F Restart server loop\n" @@ -1740,8 +1794,11 @@ int main(int argc, char *argv[]) .port = DEFAULT_PORT, }; struct protoent *pe; + int both_mode = 0; unsigned int tmp; int forever = 0; + int fd[2]; + int cpid; /* process inputs */ extern char *optarg; @@ -1753,6 +1810,9 @@ int main(int argc, char *argv[]) while ((rc = getopt(argc, argv, GETOPT_STR)) != -1) { switch (rc) { + case 'B': + both_mode = 1; + break; case 's': server_mode = 1; break; @@ -1892,7 +1952,7 @@ int main(int argc, char *argv[]) return 1; } - if (!server_mode && !args.has_grp && + if ((both_mode || !server_mode) && !args.has_grp && !args.has_remote_ip && !args.has_local_ip) { fprintf(stderr, "Local (server mode) or remote IP (client IP) required\n"); @@ -1904,9 +1964,26 @@ int main(int argc, char *argv[]) msg = NULL; } + if (both_mode) { + if (pipe(fd) < 0) { + perror("pipe"); + exit(1); + } + + cpid = fork(); + if (cpid < 0) { + perror("fork"); + exit(1); + } + if (cpid) + return ipc_parent(cpid, fd[0], &args); + + return ipc_child(fd[1], &args); + } + if (server_mode) { do { - rc = do_server(&args); + rc = do_server(&args, -1); } while (forever); return rc; -- GitLab From f222c37cf75a8a626a0ca9435378e9f87b0239f1 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 13 Jan 2021 20:09:42 -0700 Subject: [PATCH 0829/2012] selftests: Use separate stdout and stderr buffers in nettest When a single instance of nettest is doing both client and server modes, stdout and stderr messages can get interlaced and become unreadable. Allocate a new set of buffers for the child process handling server mode. Signed-off-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/nettest.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 685cbe8933ded..aba3615ce9775 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -1707,9 +1707,28 @@ static char *random_msg(int len) static int ipc_child(int fd, struct sock_args *args) { + char *outbuf, *errbuf; + int rc = 1; + + outbuf = malloc(4096); + errbuf = malloc(4096); + if (!outbuf || !errbuf) { + fprintf(stderr, "server: Failed to allocate buffers for stdout and stderr\n"); + goto out; + } + + setbuffer(stdout, outbuf, 4096); + setbuffer(stderr, errbuf, 4096); + server_mode = 1; /* to tell log_msg in case we are in both_mode */ - return do_server(args, fd); + rc = do_server(args, fd); + +out: + free(outbuf); + free(errbuf); + + return rc; } static int ipc_parent(int cpid, int fd, struct sock_args *args) -- GitLab From db9993359e58761e04d0dbee098dbf74d6a1dda8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 13 Jan 2021 20:09:43 -0700 Subject: [PATCH 0830/2012] selftests: Add missing newline in nettest error messages A few logging lines are missing the newline, or need it moved up for cleaner logging. Signed-off-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/nettest.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index aba3615ce9775..186262a702bf4 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -199,7 +199,7 @@ static void log_address(const char *desc, struct sockaddr *sa) if (sa->sa_family == AF_INET) { struct sockaddr_in *s = (struct sockaddr_in *) sa; - log_msg("%s %s:%d", + log_msg("%s %s:%d\n", desc, inet_ntop(AF_INET, &s->sin_addr, addrstr, sizeof(addrstr)), @@ -208,15 +208,13 @@ static void log_address(const char *desc, struct sockaddr *sa) } else if (sa->sa_family == AF_INET6) { struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) sa; - log_msg("%s [%s]:%d", + log_msg("%s [%s]:%d\n", desc, inet_ntop(AF_INET6, &s6->sin6_addr, addrstr, sizeof(addrstr)), ntohs(s6->sin6_port)); } - printf("\n"); - fflush(stdout); } @@ -594,7 +592,7 @@ static int expected_addr_match(struct sockaddr *sa, void *expected, struct in_addr *exp_in = (struct in_addr *) expected; if (s->sin_addr.s_addr != exp_in->s_addr) { - log_error("%s address does not match expected %s", + log_error("%s address does not match expected %s\n", desc, inet_ntop(AF_INET, exp_in, addrstr, sizeof(addrstr))); @@ -605,14 +603,14 @@ static int expected_addr_match(struct sockaddr *sa, void *expected, struct in6_addr *exp_in = (struct in6_addr *) expected; if (memcmp(&s6->sin6_addr, exp_in, sizeof(*exp_in))) { - log_error("%s address does not match expected %s", + log_error("%s address does not match expected %s\n", desc, inet_ntop(AF_INET6, exp_in, addrstr, sizeof(addrstr))); rc = 1; } } else { - log_error("%s address does not match expected - unknown family", + log_error("%s address does not match expected - unknown family\n", desc); rc = 1; } @@ -731,7 +729,7 @@ static int convert_addr(struct sock_args *args, const char *_str, } break; default: - log_error("unknown address type"); + log_error("unknown address type\n"); exit(1); } -- GitLab From 9a8d584964fc808080a855b435507fe5094f2160 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 13 Jan 2021 20:09:44 -0700 Subject: [PATCH 0831/2012] selftests: Make address validation apply only to client mode When a single instance of nettest is used for client and server make sure address validation is only done for client mode. Signed-off-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/nettest.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 186262a702bf4..0e01a74475216 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -1720,6 +1720,12 @@ static int ipc_child(int fd, struct sock_args *args) server_mode = 1; /* to tell log_msg in case we are in both_mode */ + /* when running in both mode, address validation applies + * solely to client side + */ + args->has_expected_laddr = 0; + args->has_expected_raddr = 0; + rc = do_server(args, fd); out: -- GitLab From a824e261d7cd95b0f04a3feff51f8e0fe2881b44 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 13 Jan 2021 20:09:45 -0700 Subject: [PATCH 0832/2012] selftests: Consistently specify address for MD5 protection nettest started with -r as the remote address for MD5 passwords. The -m argument was added to use prefixes with a length when that feature was added to the kernel. Since -r is used to specify remote address for client mode, change nettest to only use -m for MD5 passwords and update fcnal-test script. Signed-off-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 60 +++++++++++------------ tools/testing/selftests/net/nettest.c | 6 +-- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 02b0b9ead40b9..edd33f83f80ee 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -801,7 +801,7 @@ ipv4_tcp_md5_novrf() # basic use case log_start - run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} log_test $? 0 "MD5: Single address config" @@ -817,7 +817,7 @@ ipv4_tcp_md5_novrf() # wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" @@ -825,7 +825,7 @@ ipv4_tcp_md5_novrf() # client from different address log_start show_hint "Should timeout due to MD5 mismatch" - run_cmd nettest -s -M ${MD5_PW} -r ${NSB_LO_IP} & + run_cmd nettest -s -M ${MD5_PW} -m ${NSB_LO_IP} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" @@ -869,7 +869,7 @@ ipv4_tcp_md5() # basic use case log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" @@ -885,7 +885,7 @@ ipv4_tcp_md5() # wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" @@ -893,7 +893,7 @@ ipv4_tcp_md5() # client from different address log_start show_hint "Should timeout since server config differs from client" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP} & + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" @@ -930,31 +930,31 @@ ipv4_tcp_md5() # log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & - run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & - run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" log_start show_hint "Should timeout since client in default VRF uses VRF password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & - run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" log_start show_hint "Should timeout since client in VRF uses default VRF password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & - run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" @@ -993,7 +993,7 @@ ipv4_tcp_md5() # negative tests # log_start - run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP} + run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NSB_IP} log_test $? 1 "MD5: VRF: Device must be a VRF - single address" log_start @@ -2265,7 +2265,7 @@ ipv6_tcp_md5_novrf() # basic use case log_start - run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} log_test $? 0 "MD5: Single address config" @@ -2281,7 +2281,7 @@ ipv6_tcp_md5_novrf() # wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" @@ -2289,7 +2289,7 @@ ipv6_tcp_md5_novrf() # client from different address log_start show_hint "Should timeout due to MD5 mismatch" - run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_LO_IP6} & + run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_LO_IP6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" @@ -2333,7 +2333,7 @@ ipv6_tcp_md5() # basic use case log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" @@ -2349,7 +2349,7 @@ ipv6_tcp_md5() # wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" @@ -2357,7 +2357,7 @@ ipv6_tcp_md5() # client from different address log_start show_hint "Should timeout since server config differs from client" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP6} & + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" @@ -2394,31 +2394,31 @@ ipv6_tcp_md5() # log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & - run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & - run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" log_start show_hint "Should timeout since client in default VRF uses VRF password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & - run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" log_start show_hint "Should timeout since client in VRF uses default VRF password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & - run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" @@ -2457,7 +2457,7 @@ ipv6_tcp_md5() # negative tests # log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP6} + run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NSB_IP6} log_test $? 1 "MD5: VRF: Device must be a VRF - single address" log_start diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 0e01a74475216..4c8d4570872dc 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -291,13 +291,13 @@ static int tcp_md5_remote(int sd, struct sock_args *args) switch (args->version) { case AF_INET: sin.sin_port = htons(args->port); - sin.sin_addr = args->remote_addr.in; + sin.sin_addr = args->md5_prefix.v4.sin_addr; addr = &sin; alen = sizeof(sin); break; case AF_INET6: sin6.sin6_port = htons(args->port); - sin6.sin6_addr = args->remote_addr.in6; + sin6.sin6_addr = args->md5_prefix.v6.sin6_addr; addr = &sin6; alen = sizeof(sin6); break; @@ -725,7 +725,7 @@ static int convert_addr(struct sock_args *args, const char *_str, return 1; } } else { - args->prefix_len = pfx_len_max; + args->prefix_len = 0; } break; default: -- GitLab From d3857b8f0d192e0313990481d223e554db7d878e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 13 Jan 2021 20:09:46 -0700 Subject: [PATCH 0833/2012] selftests: Add new option for client-side passwords Add new option to nettest to specify MD5 password to use for client side. Update fcnal-test script. This is needed for a single instance running both server and client modes to test password mismatches. Signed-off-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 88 +++++++++++------------ tools/testing/selftests/net/nettest.c | 9 ++- 2 files changed, 52 insertions(+), 45 deletions(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index edd33f83f80ee..5d15ded2433b0 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -803,7 +803,7 @@ ipv4_tcp_md5_novrf() log_start run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" # client sends MD5, server not configured @@ -811,7 +811,7 @@ ipv4_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" # wrong password @@ -819,7 +819,7 @@ ipv4_tcp_md5_novrf() show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" # client from different address @@ -827,7 +827,7 @@ ipv4_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s -M ${MD5_PW} -m ${NSB_LO_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" # @@ -838,7 +838,7 @@ ipv4_tcp_md5_novrf() log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" # client in prefix, wrong password @@ -846,7 +846,7 @@ ipv4_tcp_md5_novrf() show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" # client outside of prefix @@ -854,7 +854,7 @@ ipv4_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -871,7 +871,7 @@ ipv4_tcp_md5() log_start run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" # client sends MD5, server not configured @@ -879,7 +879,7 @@ ipv4_tcp_md5() show_hint "Should timeout since server does not have MD5 auth" run_cmd nettest -s -d ${VRF} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" # wrong password @@ -887,7 +887,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" # client from different address @@ -895,7 +895,7 @@ ipv4_tcp_md5() show_hint "Should timeout since server config differs from client" run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" # @@ -906,7 +906,7 @@ ipv4_tcp_md5() log_start run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" # client in prefix, wrong password @@ -914,7 +914,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" # client outside of prefix @@ -922,7 +922,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client address is outside of prefix" run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" # @@ -933,14 +933,14 @@ ipv4_tcp_md5() run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" log_start @@ -948,7 +948,7 @@ ipv4_tcp_md5() run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" log_start @@ -956,21 +956,21 @@ ipv4_tcp_md5() run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" log_start @@ -978,7 +978,7 @@ ipv4_tcp_md5() run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW} + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" log_start @@ -986,7 +986,7 @@ ipv4_tcp_md5() run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" # @@ -2267,7 +2267,7 @@ ipv6_tcp_md5_novrf() log_start run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" # client sends MD5, server not configured @@ -2275,7 +2275,7 @@ ipv6_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" # wrong password @@ -2283,7 +2283,7 @@ ipv6_tcp_md5_novrf() show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" # client from different address @@ -2291,7 +2291,7 @@ ipv6_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_LO_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" # @@ -2302,7 +2302,7 @@ ipv6_tcp_md5_novrf() log_start run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" # client in prefix, wrong password @@ -2310,7 +2310,7 @@ ipv6_tcp_md5_novrf() show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" # client outside of prefix @@ -2318,7 +2318,7 @@ ipv6_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -2335,7 +2335,7 @@ ipv6_tcp_md5() log_start run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" # client sends MD5, server not configured @@ -2343,7 +2343,7 @@ ipv6_tcp_md5() show_hint "Should timeout since server does not have MD5 auth" run_cmd nettest -6 -s -d ${VRF} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" # wrong password @@ -2351,7 +2351,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" # client from different address @@ -2359,7 +2359,7 @@ ipv6_tcp_md5() show_hint "Should timeout since server config differs from client" run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" # @@ -2370,7 +2370,7 @@ ipv6_tcp_md5() log_start run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" # client in prefix, wrong password @@ -2378,7 +2378,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" # client outside of prefix @@ -2386,7 +2386,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client address is outside of prefix" run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" # @@ -2397,14 +2397,14 @@ ipv6_tcp_md5() run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" log_start @@ -2412,7 +2412,7 @@ ipv6_tcp_md5() run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" log_start @@ -2420,21 +2420,21 @@ ipv6_tcp_md5() run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" log_start @@ -2442,7 +2442,7 @@ ipv6_tcp_md5() run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" log_start @@ -2450,7 +2450,7 @@ ipv6_tcp_md5() run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" # diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 4c8d4570872dc..e20e74e001a21 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -85,6 +85,7 @@ struct sock_args { const char *serverns; const char *password; + const char *client_pw; /* prefix for MD5 password */ const char *md5_prefix_str; union { @@ -1655,6 +1656,8 @@ static int do_client(struct sock_args *args) break; } + args->password = args->client_pw; + if (args->has_grp) sd = msock_client(args); else @@ -1764,7 +1767,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) return client_status; } -#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:m:d:BN:O:SCi6L:0:1:2:Fbq" +#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:X:m:d:BN:O:SCi6L:0:1:2:Fbq" static void print_usage(char *prog) { @@ -1796,6 +1799,7 @@ static void print_usage(char *prog) " -n num number of times to send message\n" "\n" " -M password use MD5 sum protection\n" + " -X password MD5 password for client mode\n" " -m prefix/len prefix and length to use for MD5 key\n" " -g grp multicast group (e.g., 239.1.1.1)\n" " -i interactive mode (default is echo and terminate)\n" @@ -1900,6 +1904,9 @@ int main(int argc, char *argv[]) case 'M': args.password = optarg; break; + case 'X': + args.client_pw = optarg; + break; case 'm': args.md5_prefix_str = optarg; break; -- GitLab From 8a909735fa29fb700fef064b339988ff404d2d72 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 13 Jan 2021 20:09:47 -0700 Subject: [PATCH 0834/2012] selftests: Add separate options for server device bindings Add new options to nettest to specify device binding and expected device binding for server mode, and update fcnal-test script. This is needed to allow a single instance of nettest running both server and client modes to use different device bindings. Signed-off-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 282 +++++++++++----------- tools/testing/selftests/net/nettest.c | 14 +- 2 files changed, 154 insertions(+), 142 deletions(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 5d15ded2433b0..2514a9cb95307 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -869,7 +869,7 @@ ipv4_tcp_md5() # basic use case log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" @@ -877,7 +877,7 @@ ipv4_tcp_md5() # client sends MD5, server not configured log_start show_hint "Should timeout since server does not have MD5 auth" - run_cmd nettest -s -d ${VRF} & + run_cmd nettest -s -I ${VRF} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" @@ -885,7 +885,7 @@ ipv4_tcp_md5() # wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" @@ -893,7 +893,7 @@ ipv4_tcp_md5() # client from different address log_start show_hint "Should timeout since server config differs from client" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" @@ -904,7 +904,7 @@ ipv4_tcp_md5() # client in prefix log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" @@ -912,7 +912,7 @@ ipv4_tcp_md5() # client in prefix, wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" @@ -920,7 +920,7 @@ ipv4_tcp_md5() # client outside of prefix log_start show_hint "Should timeout since client address is outside of prefix" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & sleep 1 run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" @@ -930,14 +930,14 @@ ipv4_tcp_md5() # log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} @@ -945,7 +945,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client in default VRF uses VRF password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} @@ -953,21 +953,21 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client in VRF uses default VRF password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} @@ -975,7 +975,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client in default VRF uses VRF password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} @@ -983,7 +983,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client in VRF uses default VRF password" - run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & sleep 1 run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} @@ -993,11 +993,11 @@ ipv4_tcp_md5() # negative tests # log_start - run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NSB_IP} + run_cmd nettest -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NSB_IP} log_test $? 1 "MD5: VRF: Device must be a VRF - single address" log_start - run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET} + run_cmd nettest -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET} log_test $? 1 "MD5: VRF: Device must be a VRF - prefix" } @@ -1020,7 +1020,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start - run_cmd nettest -s -d ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1076,7 +1076,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start - run_cmd nettest -s -d ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} & sleep 1 run_cmd nettest -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1085,7 +1085,7 @@ ipv4_tcp_novrf() do log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" - run_cmd nettest -s -d ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} & sleep 1 run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" @@ -1110,7 +1110,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start - run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" @@ -1145,13 +1145,13 @@ ipv4_tcp_vrf() log_test_addr ${a} $? 1 "Global server" log_start - run_cmd nettest -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start - run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1186,14 +1186,14 @@ ipv4_tcp_vrf() do log_start show_hint "client socket should be bound to VRF" - run_cmd nettest -s -2 ${VRF} & + run_cmd nettest -s -3 ${VRF} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Global server" log_start show_hint "client socket should be bound to VRF" - run_cmd nettest -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -1208,7 +1208,7 @@ ipv4_tcp_vrf() a=${NSA_IP} log_start show_hint "client socket should be bound to device" - run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1218,7 +1218,7 @@ ipv4_tcp_vrf() do log_start show_hint "Should fail 'Connection refused' since client is not bound to VRF" - run_cmd nettest -s -d ${VRF} & + run_cmd nettest -s -I ${VRF} & sleep 1 run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" @@ -1255,7 +1255,7 @@ ipv4_tcp_vrf() for a in ${NSA_IP} ${VRF_IP} 127.0.0.1 do log_start - run_cmd nettest -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" @@ -1263,26 +1263,26 @@ ipv4_tcp_vrf() a=${NSA_IP} log_start - run_cmd nettest -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" log_start show_hint "Should fail 'No route to host' since client is out of VRF scope" - run_cmd nettest -s -d ${VRF} & + run_cmd nettest -s -I ${VRF} & sleep 1 run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start - run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" log_start - run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" @@ -1321,7 +1321,7 @@ ipv4_udp_novrf() for a in ${NSA_IP} ${NSA_LO_IP} do log_start - run_cmd nettest -D -s -2 ${NSA_DEV} & + run_cmd nettest -D -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -1334,7 +1334,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start - run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1393,7 +1393,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start - run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1402,7 +1402,7 @@ ipv4_udp_novrf() do log_start show_hint "Should fail 'Connection refused' since address is out of device scope" - run_cmd nettest -s -D -d ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} & sleep 1 run_cmd nettest -D -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" @@ -1456,7 +1456,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start - run_cmd nettest -D -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -1487,13 +1487,13 @@ ipv4_udp_vrf() log_test_addr ${a} $? 1 "Global server" log_start - run_cmd nettest -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start - run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1513,26 +1513,26 @@ ipv4_udp_vrf() a=${NSA_IP} log_start - run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start - run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, enslaved device client, local connection" a=${NSA_IP} log_start - run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start - run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1547,19 +1547,19 @@ ipv4_udp_vrf() for a in ${NSA_IP} ${VRF_IP} do log_start - run_cmd nettest -D -s -2 ${NSA_DEV} & + run_cmd nettest -D -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start - run_cmd nettest -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start - run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1601,31 +1601,31 @@ ipv4_udp_vrf() # a=${NSA_IP} log_start - run_cmd nettest -D -s -2 ${NSA_DEV} & + run_cmd nettest -D -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start - run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start - run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start - run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start - run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1633,7 +1633,7 @@ ipv4_udp_vrf() for a in ${VRF_IP} 127.0.0.1 do log_start - run_cmd nettest -D -s -2 ${VRF} & + run_cmd nettest -D -s -3 ${VRF} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" @@ -1642,7 +1642,7 @@ ipv4_udp_vrf() for a in ${VRF_IP} 127.0.0.1 do log_start - run_cmd nettest -s -D -d ${VRF} -2 ${VRF} & + run_cmd nettest -s -D -I ${VRF} -3 ${VRF} & sleep 1 run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -1697,7 +1697,7 @@ ipv4_addr_bind_novrf() log_test_addr ${a} $? 0 "Raw socket bind to local address" log_start - run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b + run_cmd nettest -s -R -P icmp -l ${a} -I ${NSA_DEV} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" done @@ -1720,7 +1720,7 @@ ipv4_addr_bind_novrf() #a=${NSA_LO_IP} #log_start #show_hint "Should fail with 'Cannot assign requested address'" - #run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b + #run_cmd nettest -s -l ${a} -I ${NSA_DEV} -t1 -b #log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address" } @@ -1736,17 +1736,17 @@ ipv4_addr_bind_vrf() log_test_addr ${a} $? 0 "Raw socket bind to local address" log_start - run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b + run_cmd nettest -s -R -P icmp -l ${a} -I ${NSA_DEV} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" log_start - run_cmd nettest -s -R -P icmp -l ${a} -d ${VRF} -b + run_cmd nettest -s -R -P icmp -l ${a} -I ${VRF} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after VRF bind" done a=${NSA_LO_IP} log_start show_hint "Address on loopback is out of VRF scope" - run_cmd nettest -s -R -P icmp -l ${a} -d ${VRF} -b + run_cmd nettest -s -R -P icmp -l ${a} -I ${VRF} -b log_test_addr ${a} $? 1 "Raw socket bind to out of scope address after VRF bind" # @@ -1755,23 +1755,23 @@ ipv4_addr_bind_vrf() for a in ${NSA_IP} ${VRF_IP} do log_start - run_cmd nettest -s -l ${a} -d ${VRF} -t1 -b + run_cmd nettest -s -l ${a} -I ${VRF} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address" log_start - run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" done a=${NSA_LO_IP} log_start show_hint "Address on loopback out of scope for VRF" - run_cmd nettest -s -l ${a} -d ${VRF} -t1 -b + run_cmd nettest -s -l ${a} -I ${VRF} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for VRF" log_start show_hint "Address on loopback out of scope for device in VRF" - run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for device bind" } @@ -1818,7 +1818,7 @@ ipv4_rt() for a in ${NSA_IP} ${VRF_IP} do log_start - run_cmd nettest ${varg} -s -d ${VRF} & + run_cmd nettest ${varg} -s -I ${VRF} & sleep 1 run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 @@ -1831,7 +1831,7 @@ ipv4_rt() a=${NSA_IP} log_start - run_cmd nettest ${varg} -s -d ${NSA_DEV} & + run_cmd nettest ${varg} -s -I ${NSA_DEV} & sleep 1 run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 @@ -1886,7 +1886,7 @@ ipv4_rt() for a in ${NSA_IP} ${VRF_IP} do log_start - run_cmd nettest ${varg} -d ${VRF} -s & + run_cmd nettest ${varg} -I ${VRF} -s & sleep 1 run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 @@ -1910,7 +1910,7 @@ ipv4_rt() setup ${with_vrf} log_start - run_cmd nettest ${varg} -d ${VRF} -s & + run_cmd nettest ${varg} -I ${VRF} -s & sleep 1 run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 @@ -1921,7 +1921,7 @@ ipv4_rt() setup ${with_vrf} log_start - run_cmd nettest ${varg} -d ${NSA_DEV} -s & + run_cmd nettest ${varg} -I ${NSA_DEV} -s & sleep 1 run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 @@ -2333,7 +2333,7 @@ ipv6_tcp_md5() # basic use case log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" @@ -2341,7 +2341,7 @@ ipv6_tcp_md5() # client sends MD5, server not configured log_start show_hint "Should timeout since server does not have MD5 auth" - run_cmd nettest -6 -s -d ${VRF} & + run_cmd nettest -6 -s -I ${VRF} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" @@ -2349,7 +2349,7 @@ ipv6_tcp_md5() # wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" @@ -2357,7 +2357,7 @@ ipv6_tcp_md5() # client from different address log_start show_hint "Should timeout since server config differs from client" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" @@ -2368,7 +2368,7 @@ ipv6_tcp_md5() # client in prefix log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" @@ -2376,7 +2376,7 @@ ipv6_tcp_md5() # client in prefix, wrong password log_start show_hint "Should timeout since client uses wrong password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" @@ -2384,7 +2384,7 @@ ipv6_tcp_md5() # client outside of prefix log_start show_hint "Should timeout since client address is outside of prefix" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & sleep 1 run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" @@ -2394,14 +2394,14 @@ ipv6_tcp_md5() # log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} @@ -2409,7 +2409,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client in default VRF uses VRF password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} @@ -2417,21 +2417,21 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client in VRF uses default VRF password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} @@ -2439,7 +2439,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client in default VRF uses VRF password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} @@ -2447,7 +2447,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client in VRF uses default VRF password" - run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & sleep 1 run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} @@ -2457,11 +2457,11 @@ ipv6_tcp_md5() # negative tests # log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NSB_IP6} + run_cmd nettest -6 -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NSB_IP6} log_test $? 1 "MD5: VRF: Device must be a VRF - single address" log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET6} + run_cmd nettest -6 -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET6} log_test $? 1 "MD5: VRF: Device must be a VRF - prefix" } @@ -2534,7 +2534,7 @@ ipv6_tcp_novrf() a=${NSA_IP6} log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -2543,7 +2543,7 @@ ipv6_tcp_novrf() do log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" - run_cmd nettest -6 -s -d ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} & sleep 1 run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" @@ -2569,7 +2569,7 @@ ipv6_tcp_novrf() for a in ${NSA_IP6} ${NSA_LINKIP6} do log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -2611,7 +2611,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2620,7 +2620,7 @@ ipv6_tcp_vrf() # link local is always bound to ingress device a=${NSA_LINKIP6}%${NSB_DEV} log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2628,7 +2628,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${VRF_IP6} ${NSA_LINKIP6}%${NSB_DEV} do log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -2664,7 +2664,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -s -2 ${VRF} & + run_cmd nettest -6 -s -3 ${VRF} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -2673,7 +2673,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2682,13 +2682,13 @@ ipv6_tcp_vrf() # For LLA, child socket is bound to device a=${NSA_LINKIP6}%${NSB_DEV} log_start - run_cmd nettest -6 -s -2 ${NSA_DEV} & + run_cmd nettest -6 -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2696,7 +2696,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSB_DEV} do log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -2716,7 +2716,7 @@ ipv6_tcp_vrf() do log_start show_hint "Fails 'Connection refused' since client is not in VRF" - run_cmd nettest -6 -s -d ${VRF} & + run_cmd nettest -6 -s -I ${VRF} & sleep 1 run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" @@ -2771,7 +2771,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${VRF_IP6} ::1 do log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" @@ -2779,7 +2779,7 @@ ipv6_tcp_vrf() a=${NSA_IP6} log_start - run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & sleep 1 run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" @@ -2787,13 +2787,13 @@ ipv6_tcp_vrf() a=${NSA_IP6} log_start show_hint "Should fail since unbound client is out of VRF scope" - run_cmd nettest -6 -s -d ${VRF} & + run_cmd nettest -6 -s -I ${VRF} & sleep 1 run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" @@ -2801,7 +2801,7 @@ ipv6_tcp_vrf() for a in ${NSA_IP6} ${NSA_LINKIP6} do log_start - run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" @@ -2841,13 +2841,13 @@ ipv6_udp_novrf() for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSB_DEV} do log_start - run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -2855,7 +2855,7 @@ ipv6_udp_novrf() a=${NSA_LO_IP6} log_start - run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -2865,7 +2865,7 @@ ipv6_udp_novrf() # behavior. #log_start #show_hint "Should fail since loopback address is out of scope" - #run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + #run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & #sleep 1 #run_cmd_nsb nettest -6 -D -r ${a} #log_test_addr ${a} $? 1 "Device server" @@ -2933,7 +2933,7 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start - run_cmd nettest -6 -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -2942,7 +2942,7 @@ ipv6_udp_novrf() do log_start show_hint "Should fail 'Connection refused' since address is out of device scope" - run_cmd nettest -6 -s -D -d ${NSA_DEV} & + run_cmd nettest -6 -s -D -I ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 1 "Device server, local connection" @@ -2993,7 +2993,7 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start - run_cmd nettest -6 -D -s -d ${NSA_DEV} -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -3040,7 +3040,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -3049,7 +3049,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -3080,7 +3080,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -d ${VRF} -s & + run_cmd nettest -6 -D -I ${VRF} -s & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3095,19 +3095,19 @@ ipv6_udp_vrf() log_test_addr ${a} $? 1 "Global server, device client, local conn" log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -3122,7 +3122,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -3131,7 +3131,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -3140,7 +3140,7 @@ ipv6_udp_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -3184,13 +3184,13 @@ ipv6_udp_vrf() # a=${NSA_IP6} log_start - run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" #log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3198,13 +3198,13 @@ ipv6_udp_vrf() a=${VRF_IP6} log_start - run_cmd nettest -6 -D -s -2 ${VRF} & + run_cmd nettest -6 -D -s -3 ${VRF} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${VRF} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${VRF} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3220,25 +3220,25 @@ ipv6_udp_vrf() # device to global IP a=${NSA_IP6} log_start - run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local conn" log_start - run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local conn" log_start - run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & sleep 1 run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -3332,7 +3332,7 @@ ipv6_addr_bind_novrf() log_test_addr ${a} $? 0 "Raw socket bind to local address" log_start - run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${NSA_DEV} -b + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${NSA_DEV} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" done @@ -3345,13 +3345,13 @@ ipv6_addr_bind_novrf() log_test_addr ${a} $? 0 "TCP socket bind to local address" log_start - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" a=${NSA_LO_IP6} log_start show_hint "Should fail with 'Cannot assign requested address'" - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address" } @@ -3363,18 +3363,18 @@ ipv6_addr_bind_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${VRF} -b + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${VRF} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after vrf bind" log_start - run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${NSA_DEV} -b + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${NSA_DEV} -b log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" done a=${NSA_LO_IP6} log_start show_hint "Address on loopback is out of VRF scope" - run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${VRF} -b + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${VRF} -b log_test_addr ${a} $? 1 "Raw socket bind to invalid local address after vrf bind" # @@ -3384,29 +3384,29 @@ ipv6_addr_bind_vrf() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest -6 -s -l ${a} -d ${VRF} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${VRF} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address with VRF bind" done a=${NSA_IP6} log_start - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address with device bind" a=${VRF_IP6} log_start - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to VRF address with device bind" a=${NSA_LO_IP6} log_start show_hint "Address on loopback out of scope for VRF" - run_cmd nettest -6 -s -l ${a} -d ${VRF} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${VRF} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for VRF" log_start show_hint "Address on loopback out of scope for device in VRF" - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for device bind" } @@ -3454,7 +3454,7 @@ ipv6_rt() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest ${varg} -d ${VRF} -s & + run_cmd nettest ${varg} -I ${VRF} -s & sleep 1 run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 @@ -3468,7 +3468,7 @@ ipv6_rt() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest ${varg} -d ${NSA_DEV} -s & + run_cmd nettest ${varg} -I ${NSA_DEV} -s & sleep 1 run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 @@ -3525,7 +3525,7 @@ ipv6_rt() for a in ${NSA_IP6} ${VRF_IP6} do log_start - run_cmd nettest ${varg} -d ${VRF} -s & + run_cmd nettest ${varg} -I ${VRF} -s & sleep 1 run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 @@ -3549,7 +3549,7 @@ ipv6_rt() setup ${with_vrf} log_start - run_cmd nettest ${varg} -d ${VRF} -s & + run_cmd nettest ${varg} -I ${VRF} -s & sleep 1 run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 @@ -3560,7 +3560,7 @@ ipv6_rt() setup ${with_vrf} log_start - run_cmd nettest ${varg} -d ${NSA_DEV} -s & + run_cmd nettest ${varg} -I ${NSA_DEV} -s & sleep 1 run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index e20e74e001a21..1707af21eb153 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -79,6 +79,7 @@ struct sock_args { int use_setsockopt; int use_cmsg; const char *dev; + const char *server_dev; int ifindex; const char *clientns; @@ -96,6 +97,7 @@ struct sock_args { /* expected addresses and device index for connection */ const char *expected_dev; + const char *expected_server_dev; int expected_ifindex; /* local address */ @@ -1424,6 +1426,8 @@ static int do_server(struct sock_args *args, int ipc_fd) log_msg("Switched server netns\n"); } + args->dev = args->server_dev; + args->expected_dev = args->expected_server_dev; if (resolve_devices(args) || validate_addresses(args)) goto err_exit; @@ -1767,7 +1771,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) return client_status; } -#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:X:m:d:BN:O:SCi6L:0:1:2:Fbq" +#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6L:0:1:2:3:Fbq" static void print_usage(char *prog) { @@ -1791,6 +1795,7 @@ static void print_usage(char *prog) " -l addr local address to bind to\n" "\n" " -d dev bind socket to given device name\n" + " -I dev bind socket to given device name - server mode\n" " -S use setsockopt (IP_UNICAST_IF or IP_MULTICAST_IF)\n" " to set device binding\n" " -C use cmsg and IP_PKTINFO to specify device binding\n" @@ -1807,6 +1812,7 @@ static void print_usage(char *prog) " -0 addr Expected local address\n" " -1 addr Expected remote address\n" " -2 dev Expected device name (or index) to receive packet\n" + " -3 dev Expected device name (or index) to receive packets - server mode\n" "\n" " -b Bind test only.\n" " -q Be quiet. Run test without printing anything.\n" @@ -1919,6 +1925,9 @@ int main(int argc, char *argv[]) case 'd': args.dev = optarg; break; + case 'I': + args.server_dev = optarg; + break; case 'i': interactive = 1; break; @@ -1945,6 +1954,9 @@ int main(int argc, char *argv[]) case '2': args.expected_dev = optarg; break; + case '3': + args.expected_server_dev = optarg; + break; case 'q': quiet = 1; break; -- GitLab From f26a008c45122d85f8b753f861464b136a1d3ae5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 13 Jan 2021 20:09:48 -0700 Subject: [PATCH 0835/2012] selftests: Remove exraneous newline in nettest Signed-off-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/nettest.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 1707af21eb153..55c586eb2393f 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -1462,7 +1462,6 @@ static int do_server(struct sock_args *args, int ipc_fd) ipc_write(ipc_fd, 1); while (1) { - log_msg("\n"); log_msg("waiting for client connection.\n"); FD_ZERO(&rfds); FD_SET(lsd, &rfds); -- GitLab From 5265a0142f57b10f57b5795e0dba90edfd127803 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 13 Jan 2021 20:09:49 -0700 Subject: [PATCH 0836/2012] selftests: Add separate option to nettest for address binding Add separate option to nettest to specify local address binding in client mode. Signed-off-by: David Ahern Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 12 ++++++------ tools/testing/selftests/net/nettest.c | 11 +++++++++-- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 2514a9cb95307..a8ad92850e630 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -854,7 +854,7 @@ ipv4_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} + run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -922,7 +922,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client address is outside of prefix" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & sleep 1 - run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} + run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" # @@ -1706,11 +1706,11 @@ ipv4_addr_bind_novrf() # a=${NSA_IP} log_start - run_cmd nettest -l ${a} -r ${NSB_IP} -t1 -b + run_cmd nettest -c ${a} -r ${NSB_IP} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address" log_start - run_cmd nettest -l ${a} -r ${NSB_IP} -d ${NSA_DEV} -t1 -b + run_cmd nettest -c ${a} -r ${NSB_IP} -d ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" # Sadly, the kernel allows binding a socket to a device and then @@ -2318,7 +2318,7 @@ ipv6_tcp_md5_novrf() show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} + run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -2386,7 +2386,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client address is outside of prefix" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & sleep 1 - run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} + run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" # diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 55c586eb2393f..6365c7fd1262a 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -48,6 +48,7 @@ struct sock_args { /* local address */ const char *local_addr_str; + const char *client_local_addr_str; union { struct in_addr in; struct in6_addr in6; @@ -1630,6 +1631,7 @@ static int do_client(struct sock_args *args) log_msg("Switched client netns\n"); } + args->local_addr_str = args->client_local_addr_str; if (resolve_devices(args) || validate_addresses(args)) return 1; @@ -1770,7 +1772,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) return client_status; } -#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6L:0:1:2:3:Fbq" +#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6L:0:1:2:3:Fbq" static void print_usage(char *prog) { @@ -1791,7 +1793,8 @@ static void print_usage(char *prog) " -6 IPv6 (default is IPv4)\n" " -P proto protocol for socket: icmp, ospf (default: none)\n" " -D|R datagram (D) / raw (R) socket (default stream)\n" - " -l addr local address to bind to\n" + " -l addr local address to bind to in server mode\n" + " -c addr local address to bind to in client mode\n" "\n" " -d dev bind socket to given device name\n" " -I dev bind socket to given device name - server mode\n" @@ -1859,6 +1862,10 @@ int main(int argc, char *argv[]) args.has_remote_ip = 1; args.remote_addr_str = optarg; break; + case 'c': + args.has_local_ip = 1; + args.client_local_addr_str = optarg; + break; case 'p': if (str_to_uint(optarg, 1, 65535, &tmp) != 0) { fprintf(stderr, "Invalid port\n"); -- GitLab From a5317f3b06b3afe7906785dc5912aca3058cfdc2 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Wed, 13 Jan 2021 14:50:00 +0100 Subject: [PATCH 0837/2012] net: openvswitch: add log message for error case As requested by upstream OVS, added some error messages in the validate_and_copy_dec_ttl function. Includes a small cleanup, which removes an unnecessary parameter from the dec_ttl_exception_handler() function. Reported-by: Flavio Leitner Signed-off-by: Eelco Chaudron Acked-by: Flavio Leitner Link: https://lore.kernel.org/r/161054576573.26637.18396634650212670580.stgit@ebuild Signed-off-by: Jakub Kicinski --- net/openvswitch/actions.c | 12 +++++------- net/openvswitch/flow_netlink.c | 14 ++++++++++++-- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index e8902a7e60f24..92a0b67b27282 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -957,14 +957,14 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, - const struct nlattr *attr, bool last) + const struct nlattr *attr) { /* The first attribute is always 'OVS_DEC_TTL_ATTR_ACTION'. */ struct nlattr *actions = nla_data(attr); if (nla_len(actions)) return clone_execute(dp, skb, key, 0, nla_data(actions), - nla_len(actions), last, false); + nla_len(actions), true, false); consume_skb(skb); return 0; @@ -1418,11 +1418,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_DEC_TTL: err = execute_dec_ttl(skb, key); - if (err == -EHOSTUNREACH) { - err = dec_ttl_exception_handler(dp, skb, key, - a, true); - return err; - } + if (err == -EHOSTUNREACH) + return dec_ttl_exception_handler(dp, skb, + key, a); break; } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 4c5c2331e7648..fd1f809e9bc1b 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2515,15 +2515,25 @@ static int validate_and_copy_dec_ttl(struct net *net, if (type > OVS_DEC_TTL_ATTR_MAX) continue; - if (!type || attrs[type]) + if (!type || attrs[type]) { + OVS_NLERR(log, "Duplicate or invalid key (type %d).", + type); return -EINVAL; + } attrs[type] = a; } + if (rem) { + OVS_NLERR(log, "Message has %d unknown bytes.", rem); + return -EINVAL; + } + actions = attrs[OVS_DEC_TTL_ATTR_ACTION]; - if (rem || !actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) + if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) { + OVS_NLERR(log, "Missing valid actions attribute."); return -EINVAL; + } start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log); if (start < 0) -- GitLab From c612fe7808033ca2c7000fa06a4abb12a5e3e253 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 14 Jan 2021 10:35:56 +0200 Subject: [PATCH 0838/2012] net: marvell: prestera: fix uninitialized vid in prestera_port_vlans_add prestera_bridge_port_vlan_add should have been called with vlan->vid, however this was masked by the presence of the local vid variable and I did not notice the build warning. Reported-by: kernel test robot Fixes: b7a9e0da2d1c ("net: switchdev: remove vid_begin -> vid_end range from VLAN objects") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Reviewed-by: Taras Chornyi Link: https://lore.kernel.org/r/20210114083556.2274440-1-olteanv@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/prestera/prestera_switchdev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index beb6447fbe407..8c2b03151736c 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -1007,7 +1007,6 @@ static int prestera_port_vlans_add(struct prestera_port *port, struct prestera_bridge_port *br_port; struct prestera_switch *sw = port->sw; struct prestera_bridge *bridge; - u16 vid; if (netif_is_bridge_master(dev)) return 0; @@ -1021,7 +1020,7 @@ static int prestera_port_vlans_add(struct prestera_port *port, return 0; return prestera_bridge_port_vlan_add(port, br_port, - vid, flag_untagged, + vlan->vid, flag_untagged, flag_pvid, extack); } -- GitLab From bb5c64c879e5684d8e65421ae3c3d3094ecb71c3 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 14 Jan 2021 09:47:57 +0100 Subject: [PATCH 0839/2012] mlxsw: pci: switch from 'pci_' to 'dma_' API The wrappers in include/linux/pci-dma-compat.h should go away. The patch has been generated with the coccinelle script below and has been hand modified to replace GFP_ with a correct flag. It has been compile tested. When memory is allocated in 'mlxsw_pci_queue_init()' and 'mlxsw_pci_fw_area_init()' GFP_KERNEL can be used because both functions are already using this flag and no lock is acquired. When memory is allocated in 'mlxsw_pci_mbox_alloc()' GFP_KERNEL can be used because it is only called from the probe function and no lock is acquired in the between. The call chain is: --> mlxsw_pci_probe() --> mlxsw_pci_cmd_init() --> mlxsw_pci_mbox_alloc() While at it, also replace the 'dma_set_mask/dma_set_coherent_mask' sequence by a less verbose 'dma_set_mask_and_coherent() call. @@ @@ - PCI_DMA_BIDIRECTIONAL + DMA_BIDIRECTIONAL @@ @@ - PCI_DMA_TODEVICE + DMA_TO_DEVICE @@ @@ - PCI_DMA_FROMDEVICE + DMA_FROM_DEVICE @@ @@ - PCI_DMA_NONE + DMA_NONE @@ expression e1, e2, e3; @@ - pci_alloc_consistent(e1, e2, e3) + dma_alloc_coherent(&e1->dev, e2, e3, GFP_) @@ expression e1, e2, e3; @@ - pci_zalloc_consistent(e1, e2, e3) + dma_alloc_coherent(&e1->dev, e2, e3, GFP_) @@ expression e1, e2, e3, e4; @@ - pci_free_consistent(e1, e2, e3, e4) + dma_free_coherent(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_map_single(e1, e2, e3, e4) + dma_map_single(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_unmap_single(e1, e2, e3, e4) + dma_unmap_single(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4, e5; @@ - pci_map_page(e1, e2, e3, e4, e5) + dma_map_page(&e1->dev, e2, e3, e4, e5) @@ expression e1, e2, e3, e4; @@ - pci_unmap_page(e1, e2, e3, e4) + dma_unmap_page(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_map_sg(e1, e2, e3, e4) + dma_map_sg(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_unmap_sg(e1, e2, e3, e4) + dma_unmap_sg(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_single_for_cpu(e1, e2, e3, e4) + dma_sync_single_for_cpu(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_single_for_device(e1, e2, e3, e4) + dma_sync_single_for_device(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_sg_for_cpu(e1, e2, e3, e4) + dma_sync_sg_for_cpu(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_sg_for_device(e1, e2, e3, e4) + dma_sync_sg_for_device(&e1->dev, e2, e3, e4) @@ expression e1, e2; @@ - pci_dma_mapping_error(e1, e2) + dma_mapping_error(&e1->dev, e2) @@ expression e1, e2; @@ - pci_set_dma_mask(e1, e2) + dma_set_mask(&e1->dev, e2) @@ expression e1, e2; @@ - pci_set_consistent_dma_mask(e1, e2) + dma_set_coherent_mask(&e1->dev, e2) Signed-off-by: Christophe JAILLET Tested-by: Ido Schimmel Link: https://lore.kernel.org/r/20210114084757.490540-1-christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 56 ++++++++++------------- 1 file changed, 25 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 4eeae8d780061..d0052537e627e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -323,8 +323,8 @@ static int mlxsw_pci_wqe_frag_map(struct mlxsw_pci *mlxsw_pci, char *wqe, struct pci_dev *pdev = mlxsw_pci->pdev; dma_addr_t mapaddr; - mapaddr = pci_map_single(pdev, frag_data, frag_len, direction); - if (unlikely(pci_dma_mapping_error(pdev, mapaddr))) { + mapaddr = dma_map_single(&pdev->dev, frag_data, frag_len, direction); + if (unlikely(dma_mapping_error(&pdev->dev, mapaddr))) { dev_err_ratelimited(&pdev->dev, "failed to dma map tx frag\n"); return -EIO; } @@ -342,7 +342,7 @@ static void mlxsw_pci_wqe_frag_unmap(struct mlxsw_pci *mlxsw_pci, char *wqe, if (!frag_len) return; - pci_unmap_single(pdev, mapaddr, frag_len, direction); + dma_unmap_single(&pdev->dev, mapaddr, frag_len, direction); } static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci, @@ -858,9 +858,9 @@ static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox, tasklet_setup(&q->tasklet, q_ops->tasklet); mem_item->size = MLXSW_PCI_AQ_SIZE; - mem_item->buf = pci_alloc_consistent(mlxsw_pci->pdev, - mem_item->size, - &mem_item->mapaddr); + mem_item->buf = dma_alloc_coherent(&mlxsw_pci->pdev->dev, + mem_item->size, &mem_item->mapaddr, + GFP_KERNEL); if (!mem_item->buf) return -ENOMEM; @@ -890,8 +890,8 @@ static int mlxsw_pci_queue_init(struct mlxsw_pci *mlxsw_pci, char *mbox, err_q_ops_init: kfree(q->elem_info); err_elem_info_alloc: - pci_free_consistent(mlxsw_pci->pdev, mem_item->size, - mem_item->buf, mem_item->mapaddr); + dma_free_coherent(&mlxsw_pci->pdev->dev, mem_item->size, + mem_item->buf, mem_item->mapaddr); return err; } @@ -903,8 +903,8 @@ static void mlxsw_pci_queue_fini(struct mlxsw_pci *mlxsw_pci, q_ops->fini(mlxsw_pci, q); kfree(q->elem_info); - pci_free_consistent(mlxsw_pci->pdev, mem_item->size, - mem_item->buf, mem_item->mapaddr); + dma_free_coherent(&mlxsw_pci->pdev->dev, mem_item->size, + mem_item->buf, mem_item->mapaddr); } static int mlxsw_pci_queue_group_init(struct mlxsw_pci *mlxsw_pci, char *mbox, @@ -1273,9 +1273,9 @@ static int mlxsw_pci_fw_area_init(struct mlxsw_pci *mlxsw_pci, char *mbox, mem_item = &mlxsw_pci->fw_area.items[i]; mem_item->size = MLXSW_PCI_PAGE_SIZE; - mem_item->buf = pci_alloc_consistent(mlxsw_pci->pdev, - mem_item->size, - &mem_item->mapaddr); + mem_item->buf = dma_alloc_coherent(&mlxsw_pci->pdev->dev, + mem_item->size, + &mem_item->mapaddr, GFP_KERNEL); if (!mem_item->buf) { err = -ENOMEM; goto err_alloc; @@ -1304,8 +1304,8 @@ err_alloc: for (i--; i >= 0; i--) { mem_item = &mlxsw_pci->fw_area.items[i]; - pci_free_consistent(mlxsw_pci->pdev, mem_item->size, - mem_item->buf, mem_item->mapaddr); + dma_free_coherent(&mlxsw_pci->pdev->dev, mem_item->size, + mem_item->buf, mem_item->mapaddr); } kfree(mlxsw_pci->fw_area.items); return err; @@ -1321,8 +1321,8 @@ static void mlxsw_pci_fw_area_fini(struct mlxsw_pci *mlxsw_pci) for (i = 0; i < mlxsw_pci->fw_area.count; i++) { mem_item = &mlxsw_pci->fw_area.items[i]; - pci_free_consistent(mlxsw_pci->pdev, mem_item->size, - mem_item->buf, mem_item->mapaddr); + dma_free_coherent(&mlxsw_pci->pdev->dev, mem_item->size, + mem_item->buf, mem_item->mapaddr); } kfree(mlxsw_pci->fw_area.items); } @@ -1347,8 +1347,8 @@ static int mlxsw_pci_mbox_alloc(struct mlxsw_pci *mlxsw_pci, int err = 0; mbox->size = MLXSW_CMD_MBOX_SIZE; - mbox->buf = pci_alloc_consistent(pdev, MLXSW_CMD_MBOX_SIZE, - &mbox->mapaddr); + mbox->buf = dma_alloc_coherent(&pdev->dev, MLXSW_CMD_MBOX_SIZE, + &mbox->mapaddr, GFP_KERNEL); if (!mbox->buf) { dev_err(&pdev->dev, "Failed allocating memory for mailbox\n"); err = -ENOMEM; @@ -1362,8 +1362,8 @@ static void mlxsw_pci_mbox_free(struct mlxsw_pci *mlxsw_pci, { struct pci_dev *pdev = mlxsw_pci->pdev; - pci_free_consistent(pdev, MLXSW_CMD_MBOX_SIZE, mbox->buf, - mbox->mapaddr); + dma_free_coherent(&pdev->dev, MLXSW_CMD_MBOX_SIZE, mbox->buf, + mbox->mapaddr); } static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci, @@ -1848,17 +1848,11 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_pci_request_regions; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); - if (!err) { - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (err) { - dev_err(&pdev->dev, "pci_set_consistent_dma_mask failed\n"); - goto err_pci_set_dma_mask; - } - } else { - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); if (err) { - dev_err(&pdev->dev, "pci_set_dma_mask failed\n"); + dev_err(&pdev->dev, "dma_set_mask failed\n"); goto err_pci_set_dma_mask; } } -- GitLab From 32d4c5647aad131cde0a056171d031d21c4380a2 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 13 Jan 2021 09:42:51 +0100 Subject: [PATCH 0840/2012] net: bonding: Notify ports about their initial state When creating a static bond (e.g. balance-xor), all ports will always be enabled. This is set, and the corresponding notification is sent out, before the port is linked to the bond upper. In the offloaded case, this ordering is hard to deal with. The lower will first see a notification that it can not associate with any bond. Then the bond is joined. After that point no more notifications are sent, so all ports remain disabled. This change simply sends an extra notification once the port has been linked to the upper to synchronize the initial state. Signed-off-by: Tobias Waldekranz Acked-by: Jay Vosburgh Tested-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5fe5232cc3f38..ad5192ee18459 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1922,6 +1922,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, goto err_unregister; } + bond_lower_state_changed(new_slave); + res = bond_sysfs_slave_add(new_slave); if (res) { slave_dbg(bond_dev, slave_dev, "Error %d calling bond_sysfs_slave_add\n", res); -- GitLab From 5696c8aedfccbc5ec644d00fc91f71595b495660 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 13 Jan 2021 09:42:52 +0100 Subject: [PATCH 0841/2012] net: dsa: Don't offload port attributes on standalone ports In a situation where a standalone port is indirectly attached to a bridge (e.g. via a LAG) which is not offloaded, do not offload any port attributes either. The port should behave as a standard NIC. Previously, on mv88e6xxx, this meant that in the following setup: br0 / team0 / \ swp0 swp1 If vlan filtering was enabled on br0, swp0's and swp1's QMode was set to "secure". This caused all untagged packets to be dropped, as their default VID (0) was not loaded into the VTU. Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- net/dsa/slave.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 5a1769602e653..e53c8ca6eb661 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -273,6 +273,9 @@ static int dsa_slave_port_attr_set(struct net_device *dev, struct dsa_port *dp = dsa_slave_to_port(dev); int ret; + if (attr->orig_dev != dev) + return -EOPNOTSUPP; + switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: ret = dsa_port_set_state(dp, attr->u.stp_state); -- GitLab From 058102a6e9eb1905a7da41b7a5a7a1f278a3828a Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 13 Jan 2021 09:42:53 +0100 Subject: [PATCH 0842/2012] net: dsa: Link aggregation support Monitor the following events and notify the driver when: - A DSA port joins/leaves a LAG. - A LAG, made up of DSA ports, joins/leaves a bridge. - A DSA port in a LAG is enabled/disabled (enabled meaning "distributing" in 802.3ad LACP terms). When a LAG joins a bridge, the DSA subsystem will treat that as each individual port joining the bridge. The driver may look at the port's LAG device pointer to see if it is associated with any LAG, if that is required. This is analogue to how switchdev events are replicated out to all lower devices when reaching e.g. a LAG. Drivers can optionally request that DSA maintain a linear mapping from a LAG ID to the corresponding netdev by setting ds->num_lag_ids to the desired size. In the event that the hardware is not capable of offloading a particular LAG for any reason (the typical case being use of exotic modes like broadcast), DSA will take a hands-off approach, allowing the LAG to be formed as a pure software construct. This is reported back through the extended ACK, but is otherwise transparent to the user. Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 60 ++++++++++++++++++++++++++++++ net/dsa/dsa2.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++ net/dsa/dsa_priv.h | 36 ++++++++++++++++++ net/dsa/port.c | 79 +++++++++++++++++++++++++++++++++++++++ net/dsa/slave.c | 70 ++++++++++++++++++++++++++++++---- net/dsa/switch.c | 50 +++++++++++++++++++++++++ 6 files changed, 381 insertions(+), 7 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index c3485ba6c3125..06e3784ec55c8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -149,8 +149,41 @@ struct dsa_switch_tree { /* List of DSA links composing the routing table */ struct list_head rtable; + + /* Maps offloaded LAG netdevs to a zero-based linear ID for + * drivers that need it. + */ + struct net_device **lags; + unsigned int lags_len; }; +#define dsa_lags_foreach_id(_id, _dst) \ + for ((_id) = 0; (_id) < (_dst)->lags_len; (_id)++) \ + if ((_dst)->lags[(_id)]) + +#define dsa_lag_foreach_port(_dp, _dst, _lag) \ + list_for_each_entry((_dp), &(_dst)->ports, list) \ + if ((_dp)->lag_dev == (_lag)) + +static inline struct net_device *dsa_lag_dev(struct dsa_switch_tree *dst, + unsigned int id) +{ + return dst->lags[id]; +} + +static inline int dsa_lag_id(struct dsa_switch_tree *dst, + struct net_device *lag) +{ + unsigned int id; + + dsa_lags_foreach_id(id, dst) { + if (dsa_lag_dev(dst, id) == lag) + return id; + } + + return -ENODEV; +} + /* TC matchall action types */ enum dsa_port_mall_action_type { DSA_PORT_MALL_MIRROR, @@ -220,6 +253,8 @@ struct dsa_port { bool devlink_port_setup; struct phylink *pl; struct phylink_config pl_config; + struct net_device *lag_dev; + bool lag_tx_enabled; struct list_head list; @@ -340,6 +375,14 @@ struct dsa_switch { */ bool mtu_enforcement_ingress; + /* Drivers that benefit from having an ID associated with each + * offloaded LAG should set this to the maximum number of + * supported IDs. DSA will then maintain a mapping of _at + * least_ these many IDs, accessible to drivers via + * dsa_lag_id(). + */ + unsigned int num_lag_ids; + size_t num_ports; }; @@ -626,6 +669,13 @@ struct dsa_switch_ops { void (*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index, int sw_index, int port, struct net_device *br); + int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index, + int port); + int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index, + int port, struct net_device *lag, + struct netdev_lag_upper_info *info); + int (*crosschip_lag_leave)(struct dsa_switch *ds, int sw_index, + int port, struct net_device *lag); /* * PTP functionality @@ -657,6 +707,16 @@ struct dsa_switch_ops { int (*port_change_mtu)(struct dsa_switch *ds, int port, int new_mtu); int (*port_max_mtu)(struct dsa_switch *ds, int port); + + /* + * LAG integration + */ + int (*port_lag_change)(struct dsa_switch *ds, int port); + int (*port_lag_join)(struct dsa_switch *ds, int port, + struct net_device *lag, + struct netdev_lag_upper_info *info); + int (*port_lag_leave)(struct dsa_switch *ds, int port, + struct net_device *lag); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 01f21b0b379a6..fd343466df27b 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -21,6 +21,65 @@ static DEFINE_MUTEX(dsa2_mutex); LIST_HEAD(dsa_tree_list); +/** + * dsa_lag_map() - Map LAG netdev to a linear LAG ID + * @dst: Tree in which to record the mapping. + * @lag: Netdev that is to be mapped to an ID. + * + * dsa_lag_id/dsa_lag_dev can then be used to translate between the + * two spaces. The size of the mapping space is determined by the + * driver by setting ds->num_lag_ids. It is perfectly legal to leave + * it unset if it is not needed, in which case these functions become + * no-ops. + */ +void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag) +{ + unsigned int id; + + if (dsa_lag_id(dst, lag) >= 0) + /* Already mapped */ + return; + + for (id = 0; id < dst->lags_len; id++) { + if (!dsa_lag_dev(dst, id)) { + dst->lags[id] = lag; + return; + } + } + + /* No IDs left, which is OK. Some drivers do not need it. The + * ones that do, e.g. mv88e6xxx, will discover that dsa_lag_id + * returns an error for this device when joining the LAG. The + * driver can then return -EOPNOTSUPP back to DSA, which will + * fall back to a software LAG. + */ +} + +/** + * dsa_lag_unmap() - Remove a LAG ID mapping + * @dst: Tree in which the mapping is recorded. + * @lag: Netdev that was mapped. + * + * As there may be multiple users of the mapping, it is only removed + * if there are no other references to it. + */ +void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag) +{ + struct dsa_port *dp; + unsigned int id; + + dsa_lag_foreach_port(dp, dst, lag) + /* There are remaining users of this mapping */ + return; + + dsa_lags_foreach_id(id, dst) { + if (dsa_lag_dev(dst, id) == lag) { + dst->lags[id] = NULL; + break; + } + } +} + struct dsa_switch *dsa_switch_find(int tree_index, int sw_index) { struct dsa_switch_tree *dst; @@ -578,6 +637,32 @@ static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) dsa_master_teardown(dp->master); } +static int dsa_tree_setup_lags(struct dsa_switch_tree *dst) +{ + unsigned int len = 0; + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) { + if (dp->ds->num_lag_ids > len) + len = dp->ds->num_lag_ids; + } + + if (!len) + return 0; + + dst->lags = kcalloc(len, sizeof(*dst->lags), GFP_KERNEL); + if (!dst->lags) + return -ENOMEM; + + dst->lags_len = len; + return 0; +} + +static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst) +{ + kfree(dst->lags); +} + static int dsa_tree_setup(struct dsa_switch_tree *dst) { bool complete; @@ -605,12 +690,18 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (err) goto teardown_switches; + err = dsa_tree_setup_lags(dst); + if (err) + goto teardown_master; + dst->setup = true; pr_info("DSA: tree %d setup\n", dst->index); return 0; +teardown_master: + dsa_tree_teardown_master(dst); teardown_switches: dsa_tree_teardown_switches(dst); teardown_default_cpu: @@ -626,6 +717,8 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) if (!dst->setup) return; + dsa_tree_teardown_lags(dst); + dsa_tree_teardown_master(dst); dsa_tree_teardown_switches(dst); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 89143cc049db5..2ce46bb877033 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -20,6 +20,9 @@ enum { DSA_NOTIFIER_BRIDGE_LEAVE, DSA_NOTIFIER_FDB_ADD, DSA_NOTIFIER_FDB_DEL, + DSA_NOTIFIER_LAG_CHANGE, + DSA_NOTIFIER_LAG_JOIN, + DSA_NOTIFIER_LAG_LEAVE, DSA_NOTIFIER_MDB_ADD, DSA_NOTIFIER_MDB_DEL, DSA_NOTIFIER_VLAN_ADD, @@ -55,6 +58,15 @@ struct dsa_notifier_mdb_info { int port; }; +/* DSA_NOTIFIER_LAG_* */ +struct dsa_notifier_lag_info { + struct net_device *lag; + int sw_index; + int port; + + struct netdev_lag_upper_info *info; +}; + /* DSA_NOTIFIER_VLAN_* */ struct dsa_notifier_vlan_info { const struct switchdev_obj_port_vlan *vlan; @@ -134,6 +146,11 @@ void dsa_port_disable_rt(struct dsa_port *dp); void dsa_port_disable(struct dsa_port *dp); int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); +int dsa_port_lag_change(struct dsa_port *dp, + struct netdev_lag_lower_state_info *linfo); +int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev, + struct netdev_lag_upper_info *uinfo); +void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev); int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering); bool dsa_port_skip_vlan_configuration(struct dsa_port *dp); int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock); @@ -159,6 +176,22 @@ int dsa_port_link_register_of(struct dsa_port *dp); void dsa_port_link_unregister_of(struct dsa_port *dp); extern const struct phylink_mac_ops dsa_port_phylink_mac_ops; +static inline bool dsa_port_offloads_netdev(struct dsa_port *dp, + struct net_device *dev) +{ + /* Switchdev offloading can be configured on: */ + + if (dev == dp->slave) + /* DSA ports directly connected to a bridge. */ + return true; + + if (dp->lag_dev == dev) + /* DSA ports connected to a bridge via a LAG */ + return true; + + return false; +} + /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); @@ -248,6 +281,9 @@ int dsa_switch_register_notifier(struct dsa_switch *ds); void dsa_switch_unregister_notifier(struct dsa_switch *ds); /* dsa2.c */ +void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag); +void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag); + extern struct list_head dsa_tree_list; #endif diff --git a/net/dsa/port.c b/net/dsa/port.c index e59bf66c4c0da..f5b0f72ee7cd2 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -191,6 +191,85 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br) dsa_port_set_state_now(dp, BR_STATE_FORWARDING); } +int dsa_port_lag_change(struct dsa_port *dp, + struct netdev_lag_lower_state_info *linfo) +{ + struct dsa_notifier_lag_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + }; + bool tx_enabled; + + if (!dp->lag_dev) + return 0; + + /* On statically configured aggregates (e.g. loadbalance + * without LACP) ports will always be tx_enabled, even if the + * link is down. Thus we require both link_up and tx_enabled + * in order to include it in the tx set. + */ + tx_enabled = linfo->link_up && linfo->tx_enabled; + + if (tx_enabled == dp->lag_tx_enabled) + return 0; + + dp->lag_tx_enabled = tx_enabled; + + return dsa_port_notify(dp, DSA_NOTIFIER_LAG_CHANGE, &info); +} + +int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag, + struct netdev_lag_upper_info *uinfo) +{ + struct dsa_notifier_lag_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .lag = lag, + .info = uinfo, + }; + int err; + + dsa_lag_map(dp->ds->dst, lag); + dp->lag_dev = lag; + + err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_JOIN, &info); + if (err) { + dp->lag_dev = NULL; + dsa_lag_unmap(dp->ds->dst, lag); + } + + return err; +} + +void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag) +{ + struct dsa_notifier_lag_info info = { + .sw_index = dp->ds->index, + .port = dp->index, + .lag = lag, + }; + int err; + + if (!dp->lag_dev) + return; + + /* Port might have been part of a LAG that in turn was + * attached to a bridge. + */ + if (dp->bridge_dev) + dsa_port_bridge_leave(dp, dp->bridge_dev); + + dp->lag_tx_enabled = false; + dp->lag_dev = NULL; + + err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info); + if (err) + pr_err("DSA: failed to notify DSA_NOTIFIER_LAG_LEAVE: %d\n", + err); + + dsa_lag_unmap(dp->ds->dst, lag); +} + /* Must be called under rcu_read_lock() */ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, bool vlan_filtering) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index e53c8ca6eb661..c5c81cba8259d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -273,7 +273,7 @@ static int dsa_slave_port_attr_set(struct net_device *dev, struct dsa_port *dp = dsa_slave_to_port(dev); int ret; - if (attr->orig_dev != dev) + if (!dsa_port_offloads_netdev(dp, attr->orig_dev)) return -EOPNOTSUPP; switch (attr->id) { @@ -333,7 +333,7 @@ static int dsa_slave_vlan_add(struct net_device *dev, struct switchdev_obj_port_vlan vlan; int err; - if (obj->orig_dev != dev) + if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) return -EOPNOTSUPP; if (dsa_port_skip_vlan_configuration(dp)) @@ -378,7 +378,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: - if (obj->orig_dev != dev) + if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; @@ -407,7 +407,7 @@ static int dsa_slave_vlan_del(struct net_device *dev, struct switchdev_obj_port_vlan *vlan; int err; - if (obj->orig_dev != dev) + if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) return -EOPNOTSUPP; if (dsa_port_skip_vlan_configuration(dp)) @@ -435,7 +435,7 @@ static int dsa_slave_port_obj_del(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: - if (obj->orig_dev != dev) + if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) return -EOPNOTSUPP; err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; @@ -1907,6 +1907,46 @@ static int dsa_slave_changeupper(struct net_device *dev, dsa_port_bridge_leave(dp, info->upper_dev); err = NOTIFY_OK; } + } else if (netif_is_lag_master(info->upper_dev)) { + if (info->linking) { + err = dsa_port_lag_join(dp, info->upper_dev, + info->upper_info); + if (err == -EOPNOTSUPP) { + NL_SET_ERR_MSG_MOD(info->info.extack, + "Offloading not supported"); + err = 0; + } + err = notifier_from_errno(err); + } else { + dsa_port_lag_leave(dp, info->upper_dev); + err = NOTIFY_OK; + } + } + + return err; +} + +static int +dsa_slave_lag_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *lower; + struct list_head *iter; + int err = NOTIFY_DONE; + struct dsa_port *dp; + + netdev_for_each_lower_dev(dev, lower, iter) { + if (!dsa_slave_dev_check(lower)) + continue; + + dp = dsa_slave_to_port(lower); + if (!dp->lag_dev) + /* Software LAG */ + continue; + + err = dsa_slave_changeupper(lower, info); + if (notifier_to_errno(err)) + break; } return err; @@ -2004,10 +2044,26 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, break; } case NETDEV_CHANGEUPPER: + if (dsa_slave_dev_check(dev)) + return dsa_slave_changeupper(dev, ptr); + + if (netif_is_lag_master(dev)) + return dsa_slave_lag_changeupper(dev, ptr); + + break; + case NETDEV_CHANGELOWERSTATE: { + struct netdev_notifier_changelowerstate_info *info = ptr; + struct dsa_port *dp; + int err; + if (!dsa_slave_dev_check(dev)) - return NOTIFY_DONE; + break; - return dsa_slave_changeupper(dev, ptr); + dp = dsa_slave_to_port(dev); + + err = dsa_port_lag_change(dp, info->lower_state_info); + return notifier_from_errno(err); + } } return NOTIFY_DONE; diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 21d2f842d068f..cc0b25f3adea9 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -166,6 +166,47 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds, return ds->ops->port_fdb_del(ds, port, info->addr, info->vid); } +static int dsa_switch_lag_change(struct dsa_switch *ds, + struct dsa_notifier_lag_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_lag_change) + return ds->ops->port_lag_change(ds, info->port); + + if (ds->index != info->sw_index && ds->ops->crosschip_lag_change) + return ds->ops->crosschip_lag_change(ds, info->sw_index, + info->port); + + return 0; +} + +static int dsa_switch_lag_join(struct dsa_switch *ds, + struct dsa_notifier_lag_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_lag_join) + return ds->ops->port_lag_join(ds, info->port, info->lag, + info->info); + + if (ds->index != info->sw_index && ds->ops->crosschip_lag_join) + return ds->ops->crosschip_lag_join(ds, info->sw_index, + info->port, info->lag, + info->info); + + return 0; +} + +static int dsa_switch_lag_leave(struct dsa_switch *ds, + struct dsa_notifier_lag_info *info) +{ + if (ds->index == info->sw_index && ds->ops->port_lag_leave) + return ds->ops->port_lag_leave(ds, info->port, info->lag); + + if (ds->index != info->sw_index && ds->ops->crosschip_lag_leave) + return ds->ops->crosschip_lag_leave(ds, info->sw_index, + info->port, info->lag); + + return 0; +} + static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port, struct dsa_notifier_mdb_info *info) { @@ -278,6 +319,15 @@ static int dsa_switch_event(struct notifier_block *nb, case DSA_NOTIFIER_FDB_DEL: err = dsa_switch_fdb_del(ds, info); break; + case DSA_NOTIFIER_LAG_CHANGE: + err = dsa_switch_lag_change(ds, info); + break; + case DSA_NOTIFIER_LAG_JOIN: + err = dsa_switch_lag_join(ds, info); + break; + case DSA_NOTIFIER_LAG_LEAVE: + err = dsa_switch_lag_leave(ds, info); + break; case DSA_NOTIFIER_MDB_ADD: err = dsa_switch_mdb_add(ds, info); break; -- GitLab From 57e661aae6a823140787dbcc34d92e223e2f71c5 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 13 Jan 2021 09:42:54 +0100 Subject: [PATCH 0843/2012] net: dsa: mv88e6xxx: Link aggregation support Support offloading of LAGs to hardware. LAGs may be attached to a bridge in which case VLANs, multicast groups, etc. are also offloaded as usual. Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 296 +++++++++++++++++++++++++++- drivers/net/dsa/mv88e6xxx/global2.c | 8 +- drivers/net/dsa/mv88e6xxx/global2.h | 5 + drivers/net/dsa/mv88e6xxx/port.c | 21 ++ drivers/net/dsa/mv88e6xxx/port.h | 5 + 5 files changed, 330 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 4aa7d0a8f1977..dcb1726b68cc8 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1396,15 +1396,32 @@ static int mv88e6xxx_mac_setup(struct mv88e6xxx_chip *chip) static int mv88e6xxx_pvt_map(struct mv88e6xxx_chip *chip, int dev, int port) { + struct dsa_switch_tree *dst = chip->ds->dst; + struct dsa_switch *ds; + struct dsa_port *dp; u16 pvlan = 0; if (!mv88e6xxx_has_pvt(chip)) return 0; /* Skip the local source device, which uses in-chip port VLAN */ - if (dev != chip->ds->index) + if (dev != chip->ds->index) { pvlan = mv88e6xxx_port_vlan(chip, dev, port); + ds = dsa_switch_find(dst->index, dev); + dp = ds ? dsa_to_port(ds, port) : NULL; + if (dp && dp->lag_dev) { + /* As the PVT is used to limit flooding of + * FORWARD frames, which use the LAG ID as the + * source port, we must translate dev/port to + * the special "LAG device" in the PVT, using + * the LAG ID as the port number. + */ + dev = MV88E6XXX_G2_PVT_ADRR_DEV_TRUNK; + port = dsa_lag_id(dst, dp->lag_dev); + } + } + return mv88e6xxx_g2_pvt_write(chip, dev, port, pvlan); } @@ -5364,6 +5381,271 @@ static int mv88e6xxx_port_egress_floods(struct dsa_switch *ds, int port, return err; } +static bool mv88e6xxx_lag_can_offload(struct dsa_switch *ds, + struct net_device *lag, + struct netdev_lag_upper_info *info) +{ + struct dsa_port *dp; + int id, members = 0; + + id = dsa_lag_id(ds->dst, lag); + if (id < 0 || id >= ds->num_lag_ids) + return false; + + dsa_lag_foreach_port(dp, ds->dst, lag) + /* Includes the port joining the LAG */ + members++; + + if (members > 8) + return false; + + /* We could potentially relax this to include active + * backup in the future. + */ + if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + return false; + + /* Ideally we would also validate that the hash type matches + * the hardware. Alas, this is always set to unknown on team + * interfaces. + */ + return true; +} + +static int mv88e6xxx_lag_sync_map(struct dsa_switch *ds, struct net_device *lag) +{ + struct mv88e6xxx_chip *chip = ds->priv; + struct dsa_port *dp; + u16 map = 0; + int id; + + id = dsa_lag_id(ds->dst, lag); + + /* Build the map of all ports to distribute flows destined for + * this LAG. This can be either a local user port, or a DSA + * port if the LAG port is on a remote chip. + */ + dsa_lag_foreach_port(dp, ds->dst, lag) + map |= BIT(dsa_towards_port(ds, dp->ds->index, dp->index)); + + return mv88e6xxx_g2_trunk_mapping_write(chip, id, map); +} + +static const u8 mv88e6xxx_lag_mask_table[8][8] = { + /* Row number corresponds to the number of active members in a + * LAG. Each column states which of the eight hash buckets are + * mapped to the column:th port in the LAG. + * + * Example: In a LAG with three active ports, the second port + * ([2][1]) would be selected for traffic mapped to buckets + * 3,4,5 (0x38). + */ + { 0xff, 0, 0, 0, 0, 0, 0, 0 }, + { 0x0f, 0xf0, 0, 0, 0, 0, 0, 0 }, + { 0x07, 0x38, 0xc0, 0, 0, 0, 0, 0 }, + { 0x03, 0x0c, 0x30, 0xc0, 0, 0, 0, 0 }, + { 0x03, 0x0c, 0x30, 0x40, 0x80, 0, 0, 0 }, + { 0x03, 0x0c, 0x10, 0x20, 0x40, 0x80, 0, 0 }, + { 0x03, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0 }, + { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80 }, +}; + +static void mv88e6xxx_lag_set_port_mask(u16 *mask, int port, + int num_tx, int nth) +{ + u8 active = 0; + int i; + + num_tx = num_tx <= 8 ? num_tx : 8; + if (nth < num_tx) + active = mv88e6xxx_lag_mask_table[num_tx - 1][nth]; + + for (i = 0; i < 8; i++) { + if (BIT(i) & active) + mask[i] |= BIT(port); + } +} + +static int mv88e6xxx_lag_sync_masks(struct dsa_switch *ds) +{ + struct mv88e6xxx_chip *chip = ds->priv; + unsigned int id, num_tx; + struct net_device *lag; + struct dsa_port *dp; + int i, err, nth; + u16 mask[8]; + u16 ivec; + + /* Assume no port is a member of any LAG. */ + ivec = BIT(mv88e6xxx_num_ports(chip)) - 1; + + /* Disable all masks for ports that _are_ members of a LAG. */ + list_for_each_entry(dp, &ds->dst->ports, list) { + if (!dp->lag_dev || dp->ds != ds) + continue; + + ivec &= ~BIT(dp->index); + } + + for (i = 0; i < 8; i++) + mask[i] = ivec; + + /* Enable the correct subset of masks for all LAG ports that + * are in the Tx set. + */ + dsa_lags_foreach_id(id, ds->dst) { + lag = dsa_lag_dev(ds->dst, id); + if (!lag) + continue; + + num_tx = 0; + dsa_lag_foreach_port(dp, ds->dst, lag) { + if (dp->lag_tx_enabled) + num_tx++; + } + + if (!num_tx) + continue; + + nth = 0; + dsa_lag_foreach_port(dp, ds->dst, lag) { + if (!dp->lag_tx_enabled) + continue; + + if (dp->ds == ds) + mv88e6xxx_lag_set_port_mask(mask, dp->index, + num_tx, nth); + + nth++; + } + } + + for (i = 0; i < 8; i++) { + err = mv88e6xxx_g2_trunk_mask_write(chip, i, true, mask[i]); + if (err) + return err; + } + + return 0; +} + +static int mv88e6xxx_lag_sync_masks_map(struct dsa_switch *ds, + struct net_device *lag) +{ + int err; + + err = mv88e6xxx_lag_sync_masks(ds); + + if (!err) + err = mv88e6xxx_lag_sync_map(ds, lag); + + return err; +} + +static int mv88e6xxx_port_lag_change(struct dsa_switch *ds, int port) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err; + + mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_lag_sync_masks(ds); + mv88e6xxx_reg_unlock(chip); + return err; +} + +static int mv88e6xxx_port_lag_join(struct dsa_switch *ds, int port, + struct net_device *lag, + struct netdev_lag_upper_info *info) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err, id; + + if (!mv88e6xxx_lag_can_offload(ds, lag, info)) + return -EOPNOTSUPP; + + id = dsa_lag_id(ds->dst, lag); + + mv88e6xxx_reg_lock(chip); + + err = mv88e6xxx_port_set_trunk(chip, port, true, id); + if (err) + goto err_unlock; + + err = mv88e6xxx_lag_sync_masks_map(ds, lag); + if (err) + goto err_clear_trunk; + + mv88e6xxx_reg_unlock(chip); + return 0; + +err_clear_trunk: + mv88e6xxx_port_set_trunk(chip, port, false, 0); +err_unlock: + mv88e6xxx_reg_unlock(chip); + return err; +} + +static int mv88e6xxx_port_lag_leave(struct dsa_switch *ds, int port, + struct net_device *lag) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err_sync, err_trunk; + + mv88e6xxx_reg_lock(chip); + err_sync = mv88e6xxx_lag_sync_masks_map(ds, lag); + err_trunk = mv88e6xxx_port_set_trunk(chip, port, false, 0); + mv88e6xxx_reg_unlock(chip); + return err_sync ? : err_trunk; +} + +static int mv88e6xxx_crosschip_lag_change(struct dsa_switch *ds, int sw_index, + int port) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err; + + mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_lag_sync_masks(ds); + mv88e6xxx_reg_unlock(chip); + return err; +} + +static int mv88e6xxx_crosschip_lag_join(struct dsa_switch *ds, int sw_index, + int port, struct net_device *lag, + struct netdev_lag_upper_info *info) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err; + + if (!mv88e6xxx_lag_can_offload(ds, lag, info)) + return -EOPNOTSUPP; + + mv88e6xxx_reg_lock(chip); + + err = mv88e6xxx_lag_sync_masks_map(ds, lag); + if (err) + goto unlock; + + err = mv88e6xxx_pvt_map(chip, sw_index, port); + +unlock: + mv88e6xxx_reg_unlock(chip); + return err; +} + +static int mv88e6xxx_crosschip_lag_leave(struct dsa_switch *ds, int sw_index, + int port, struct net_device *lag) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err_sync, err_pvt; + + mv88e6xxx_reg_lock(chip); + err_sync = mv88e6xxx_lag_sync_masks_map(ds, lag); + err_pvt = mv88e6xxx_pvt_map(chip, sw_index, port); + mv88e6xxx_reg_unlock(chip); + return err_sync ? : err_pvt; +} + static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .get_tag_protocol = mv88e6xxx_get_tag_protocol, .setup = mv88e6xxx_setup, @@ -5416,6 +5698,12 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .devlink_param_get = mv88e6xxx_devlink_param_get, .devlink_param_set = mv88e6xxx_devlink_param_set, .devlink_info_get = mv88e6xxx_devlink_info_get, + .port_lag_change = mv88e6xxx_port_lag_change, + .port_lag_join = mv88e6xxx_port_lag_join, + .port_lag_leave = mv88e6xxx_port_lag_leave, + .crosschip_lag_change = mv88e6xxx_crosschip_lag_change, + .crosschip_lag_join = mv88e6xxx_crosschip_lag_join, + .crosschip_lag_leave = mv88e6xxx_crosschip_lag_leave, }; static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip) @@ -5435,6 +5723,12 @@ static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip) ds->ageing_time_min = chip->info->age_time_coeff; ds->ageing_time_max = chip->info->age_time_coeff * U8_MAX; + /* Some chips support up to 32, but that requires enabling the + * 5-bit port mode, which we do not support. 640k^W16 ought to + * be enough for anyone. + */ + ds->num_lag_ids = 16; + dev_set_drvdata(dev, ds); return dsa_register_switch(ds); diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index 75b227d0f73b4..da8bac8813e14 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -126,8 +126,8 @@ int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target, /* Offset 0x07: Trunk Mask Table register */ -static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, - bool hash, u16 mask) +int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, + bool hash, u16 mask) { u16 val = (num << 12) | (mask & mv88e6xxx_port_mask(chip)); @@ -140,8 +140,8 @@ static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, /* Offset 0x08: Trunk Mapping Table register */ -static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, - u16 map) +int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, + u16 map) { const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1; u16 val = (id << 11) | (map & port_mask); diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index 1f42ee656816b..60febaf4da765 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -101,6 +101,7 @@ #define MV88E6XXX_G2_PVT_ADDR_OP_WRITE_PVLAN 0x3000 #define MV88E6XXX_G2_PVT_ADDR_OP_READ 0x4000 #define MV88E6XXX_G2_PVT_ADDR_PTR_MASK 0x01ff +#define MV88E6XXX_G2_PVT_ADRR_DEV_TRUNK 0x1f /* Offset 0x0C: Cross-chip Port VLAN Data Register */ #define MV88E6XXX_G2_PVT_DATA 0x0c @@ -345,6 +346,10 @@ int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip); int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip); +int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, + bool hash, u16 mask); +int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, + u16 map); int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip); int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target, diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index 77a5fd1798cda..4b46e10a2dde1 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -851,6 +851,27 @@ int mv88e6xxx_port_set_message_port(struct mv88e6xxx_chip *chip, int port, return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL1, val); } +int mv88e6xxx_port_set_trunk(struct mv88e6xxx_chip *chip, int port, + bool trunk, u8 id) +{ + u16 val; + int err; + + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_CTL1, &val); + if (err) + return err; + + val &= ~MV88E6XXX_PORT_CTL1_TRUNK_ID_MASK; + + if (trunk) + val |= MV88E6XXX_PORT_CTL1_TRUNK_PORT | + (id << MV88E6XXX_PORT_CTL1_TRUNK_ID_SHIFT); + else + val &= ~MV88E6XXX_PORT_CTL1_TRUNK_PORT; + + return mv88e6xxx_port_write(chip, port, MV88E6XXX_PORT_CTL1, val); +} + /* Offset 0x06: Port Based VLAN Map */ int mv88e6xxx_port_set_vlan_map(struct mv88e6xxx_chip *chip, int port, u16 map) diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index 500e1d4896ff3..a729bba050df7 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -168,6 +168,9 @@ /* Offset 0x05: Port Control 1 */ #define MV88E6XXX_PORT_CTL1 0x05 #define MV88E6XXX_PORT_CTL1_MESSAGE_PORT 0x8000 +#define MV88E6XXX_PORT_CTL1_TRUNK_PORT 0x4000 +#define MV88E6XXX_PORT_CTL1_TRUNK_ID_MASK 0x0f00 +#define MV88E6XXX_PORT_CTL1_TRUNK_ID_SHIFT 8 #define MV88E6XXX_PORT_CTL1_FID_11_4_MASK 0x00ff /* Offset 0x06: Port Based VLAN Map */ @@ -351,6 +354,8 @@ int mv88e6351_port_set_ether_type(struct mv88e6xxx_chip *chip, int port, u16 etype); int mv88e6xxx_port_set_message_port(struct mv88e6xxx_chip *chip, int port, bool message_port); +int mv88e6xxx_port_set_trunk(struct mv88e6xxx_chip *chip, int port, + bool trunk, u8 id); int mv88e6165_port_set_jumbo_size(struct mv88e6xxx_chip *chip, int port, size_t size); int mv88e6095_port_egress_rate_limiting(struct mv88e6xxx_chip *chip, int port); -- GitLab From 5b60dadb71db7df94b824a0fab20acc1eabb9050 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Wed, 13 Jan 2021 09:42:55 +0100 Subject: [PATCH 0844/2012] net: dsa: tag_dsa: Support reception of packets from LAG devices Packets ingressing on a LAG that egress on the CPU port, which are not classified as management, will have a FORWARD tag that does not contain the normal source device/port tuple. Instead the trunk bit will be set, and the port field holds the LAG id. Since the exact source port information is not available in the tag, frames are injected directly on the LAG interface and thus do never pass through any DSA port interface on ingress. Management frames (TO_CPU) are not affected and will pass through the DSA port interface as usual. Signed-off-by: Tobias Waldekranz Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- net/dsa/dsa.c | 12 +++++++++++- net/dsa/tag_dsa.c | 17 ++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index df75481b12ed8..f4ce3c5826a01 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -219,11 +219,21 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, } skb = nskb; - p = netdev_priv(skb->dev); skb_push(skb, ETH_HLEN); skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); + if (unlikely(!dsa_slave_dev_check(skb->dev))) { + /* Packet is to be injected directly on an upper + * device, e.g. a team/bond, so skip all DSA-port + * specific actions. + */ + netif_rx(skb); + return 0; + } + + p = netdev_priv(skb->dev); + if (unlikely(cpu_dp->ds->untag_bridge_pvid)) { nskb = dsa_untag_bridge_pvid(skb); if (!nskb) { diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 112c7c6dd5686..7e7b7decdf397 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -163,6 +163,7 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, u8 extra) { int source_device, source_port; + bool trunk = false; enum dsa_code code; enum dsa_cmd cmd; u8 *dsa_header; @@ -174,6 +175,8 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, switch (cmd) { case DSA_CMD_FORWARD: skb->offload_fwd_mark = 1; + + trunk = !!(dsa_header[1] & 7); break; case DSA_CMD_TO_CPU: @@ -216,7 +219,19 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, source_device = dsa_header[0] & 0x1f; source_port = (dsa_header[1] >> 3) & 0x1f; - skb->dev = dsa_master_find_slave(dev, source_device, source_port); + if (trunk) { + struct dsa_port *cpu_dp = dev->dsa_ptr; + + /* The exact source port is not available in the tag, + * so we inject the frame directly on the upper + * team/bond. + */ + skb->dev = dsa_lag_dev(cpu_dp->dst, source_port); + } else { + skb->dev = dsa_master_find_slave(dev, source_device, + source_port); + } + if (!skb->dev) return NULL; -- GitLab From d9cbe818485bafaf460e5d2a414b0f72b5a200ee Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 13 Jan 2021 11:15:27 -0600 Subject: [PATCH 0845/2012] net: ipa: a few simple renames The return value of gsi_command() is true if successful or false if we time out waiting for a completion interrupt. Rename the variables in the three callers of gsi_command() to be "timeout", to make it more obvious that's the only reason for failure. In addition, add a "gsi_" prefix to evt_ring_command() so its name is consistent with the convention used for GSI channel and generic commands. Signed-off-by: Alex Elder Reviewed-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 14d9a791924bf..b5913ce0bc943 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -326,13 +326,13 @@ gsi_evt_ring_state(struct gsi *gsi, u32 evt_ring_id) } /* Issue an event ring command and wait for it to complete */ -static void evt_ring_command(struct gsi *gsi, u32 evt_ring_id, - enum gsi_evt_cmd_opcode opcode) +static void gsi_evt_ring_command(struct gsi *gsi, u32 evt_ring_id, + enum gsi_evt_cmd_opcode opcode) { struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id]; struct completion *completion = &evt_ring->completion; struct device *dev = gsi->dev; - bool success; + bool timeout; u32 val; /* We only perform one event ring command at a time, and event @@ -354,13 +354,13 @@ static void evt_ring_command(struct gsi *gsi, u32 evt_ring_id, val = u32_encode_bits(evt_ring_id, EV_CHID_FMASK); val |= u32_encode_bits(opcode, EV_OPCODE_FMASK); - success = gsi_command(gsi, GSI_EV_CH_CMD_OFFSET, val, completion); + timeout = !gsi_command(gsi, GSI_EV_CH_CMD_OFFSET, val, completion); /* Disable the interrupt again */ gsi_irq_type_disable(gsi, GSI_EV_CTRL); iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); - if (success) + if (!timeout) return; dev_err(dev, "GSI command %u for event ring %u timed out, state %u\n", @@ -380,7 +380,7 @@ static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id) return -EINVAL; } - evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE); + gsi_evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE); /* If successful the event ring state will have changed */ if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED) @@ -405,7 +405,7 @@ static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id) return; } - evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET); + gsi_evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET); /* If successful the event ring state will have changed */ if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED) @@ -426,7 +426,7 @@ static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id) return; } - evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC); + gsi_evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC); /* If successful the event ring state will have changed */ if (evt_ring->state == GSI_EVT_RING_STATE_NOT_ALLOCATED) @@ -456,7 +456,7 @@ gsi_channel_command(struct gsi_channel *channel, enum gsi_ch_cmd_opcode opcode) u32 channel_id = gsi_channel_id(channel); struct gsi *gsi = channel->gsi; struct device *dev = gsi->dev; - bool success; + bool timeout; u32 val; /* We only perform one channel command at a time, and channel @@ -477,13 +477,13 @@ gsi_channel_command(struct gsi_channel *channel, enum gsi_ch_cmd_opcode opcode) val = u32_encode_bits(channel_id, CH_CHID_FMASK); val |= u32_encode_bits(opcode, CH_OPCODE_FMASK); - success = gsi_command(gsi, GSI_CH_CMD_OFFSET, val, completion); + timeout = !gsi_command(gsi, GSI_CH_CMD_OFFSET, val, completion); /* Disable the interrupt again */ gsi_irq_type_disable(gsi, GSI_CH_CTRL); iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); - if (success) + if (!timeout) return; dev_err(dev, "GSI command %u for channel %u timed out, state %u\n", @@ -1627,7 +1627,7 @@ static int gsi_generic_command(struct gsi *gsi, u32 channel_id, enum gsi_generic_cmd_opcode opcode) { struct completion *completion = &gsi->completion; - bool success; + bool timeout; u32 val; /* The error global interrupt type is always enabled (until we @@ -1650,12 +1650,12 @@ static int gsi_generic_command(struct gsi *gsi, u32 channel_id, val |= u32_encode_bits(channel_id, GENERIC_CHID_FMASK); val |= u32_encode_bits(GSI_EE_MODEM, GENERIC_EE_FMASK); - success = gsi_command(gsi, GSI_GENERIC_CMD_OFFSET, val, completion); + timeout = !gsi_command(gsi, GSI_GENERIC_CMD_OFFSET, val, completion); /* Disable the GP_INT1 IRQ type again */ iowrite32(BIT(ERROR_INT), gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET); - if (success) + if (!timeout) return gsi->result; dev_err(gsi->dev, "GSI generic command %u to channel %u timed out\n", -- GitLab From a60d0632f6e8e62584cd25d830c69dd09b0d4115 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 13 Jan 2021 11:15:28 -0600 Subject: [PATCH 0846/2012] net: ipa: introduce some interrupt helpers Create a new function gsi_irq_ev_ctrl_enable() that encapsulates enabling the event ring control GSI interrupt type, and enables a single event ring to signal that interrupt. When an event ring changes state as a result of an event ring command, it triggers this interrupt. Create an inverse function gsi_irq_ev_ctrl_disable() as well. Because only one event ring at a time is enabled for this interrupt, we can simply disable the interrupt for *all* channels. Create a pair of helpers that serve the same purpose for channel commands. Signed-off-by: Alex Elder Reviewed-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 94 ++++++++++++++++++++++++++----------------- 1 file changed, 58 insertions(+), 36 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index b5913ce0bc943..e5681a39b5fc6 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -220,6 +220,58 @@ static void gsi_irq_teardown(struct gsi *gsi) /* Nothing to do */ } +/* Event ring commands are performed one at a time. Their completion + * is signaled by the event ring control GSI interrupt type, which is + * only enabled when we issue an event ring command. Only the event + * ring being operated on has this interrupt enabled. + */ +static void gsi_irq_ev_ctrl_enable(struct gsi *gsi, u32 evt_ring_id) +{ + u32 val = BIT(evt_ring_id); + + /* There's a small chance that a previous command completed + * after the interrupt was disabled, so make sure we have no + * pending interrupts before we enable them. + */ + iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_CLR_OFFSET); + + iowrite32(val, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); + gsi_irq_type_enable(gsi, GSI_EV_CTRL); +} + +/* Disable event ring control interrupts */ +static void gsi_irq_ev_ctrl_disable(struct gsi *gsi) +{ + gsi_irq_type_disable(gsi, GSI_EV_CTRL); + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); +} + +/* Channel commands are performed one at a time. Their completion is + * signaled by the channel control GSI interrupt type, which is only + * enabled when we issue a channel command. Only the channel being + * operated on has this interrupt enabled. + */ +static void gsi_irq_ch_ctrl_enable(struct gsi *gsi, u32 channel_id) +{ + u32 val = BIT(channel_id); + + /* There's a small chance that a previous command completed + * after the interrupt was disabled, so make sure we have no + * pending interrupts before we enable them. + */ + iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_CLR_OFFSET); + + iowrite32(val, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); + gsi_irq_type_enable(gsi, GSI_CH_CTRL); +} + +/* Disable channel control interrupts */ +static void gsi_irq_ch_ctrl_disable(struct gsi *gsi) +{ + gsi_irq_type_disable(gsi, GSI_CH_CTRL); + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); +} + static void gsi_irq_ieob_enable(struct gsi *gsi, u32 evt_ring_id) { bool enable_ieob = !gsi->ieob_enabled_bitmap; @@ -335,30 +387,15 @@ static void gsi_evt_ring_command(struct gsi *gsi, u32 evt_ring_id, bool timeout; u32 val; - /* We only perform one event ring command at a time, and event - * control interrupts should only occur when such a command - * is issued here. Only permit *this* event ring to trigger - * an interrupt, and only enable the event control IRQ type - * when we expect it to occur. - * - * There's a small chance that a previous command completed - * after the interrupt was disabled, so make sure we have no - * pending interrupts before we enable them. - */ - iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_CLR_OFFSET); - - val = BIT(evt_ring_id); - iowrite32(val, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); - gsi_irq_type_enable(gsi, GSI_EV_CTRL); + /* Enable the completion interrupt for the command */ + gsi_irq_ev_ctrl_enable(gsi, evt_ring_id); val = u32_encode_bits(evt_ring_id, EV_CHID_FMASK); val |= u32_encode_bits(opcode, EV_OPCODE_FMASK); timeout = !gsi_command(gsi, GSI_EV_CH_CMD_OFFSET, val, completion); - /* Disable the interrupt again */ - gsi_irq_type_disable(gsi, GSI_EV_CTRL); - iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); + gsi_irq_ev_ctrl_disable(gsi); if (!timeout) return; @@ -459,29 +496,14 @@ gsi_channel_command(struct gsi_channel *channel, enum gsi_ch_cmd_opcode opcode) bool timeout; u32 val; - /* We only perform one channel command at a time, and channel - * control interrupts should only occur when such a command is - * issued here. So we only permit *this* channel to trigger - * an interrupt and only enable the channel control IRQ type - * when we expect it to occur. - * - * There's a small chance that a previous command completed - * after the interrupt was disabled, so make sure we have no - * pending interrupts before we enable them. - */ - iowrite32(~0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_CLR_OFFSET); - - val = BIT(channel_id); - iowrite32(val, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); - gsi_irq_type_enable(gsi, GSI_CH_CTRL); + /* Enable the completion interrupt for the command */ + gsi_irq_ch_ctrl_enable(gsi, channel_id); val = u32_encode_bits(channel_id, CH_CHID_FMASK); val |= u32_encode_bits(opcode, CH_OPCODE_FMASK); timeout = !gsi_command(gsi, GSI_CH_CMD_OFFSET, val, completion); - /* Disable the interrupt again */ - gsi_irq_type_disable(gsi, GSI_CH_CTRL); - iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); + gsi_irq_ch_ctrl_disable(gsi); if (!timeout) return; -- GitLab From 74401946bdad6eb812d2d3c77f1ace849f963a6a Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 13 Jan 2021 11:15:29 -0600 Subject: [PATCH 0847/2012] net: ipa: use usleep_range() 65;6003;1c The use of msleep() for small periods (less than 20 milliseconds) is not recommended because the actual delay can be much different than expected. We use msleep(1) in several places in the IPA driver to insert short delays. Replace them with usleep_range calls, which should reliably delay a period in the range requested. Signed-off-by: Alex Elder Reviewed-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 5 +++-- drivers/net/ipa/ipa_endpoint.c | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index e5681a39b5fc6..93b143ba92be5 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -611,7 +611,8 @@ static void gsi_channel_reset_command(struct gsi_channel *channel) struct device *dev = channel->gsi->dev; enum gsi_channel_state state; - msleep(1); /* A short delay is required before a RESET command */ + /* A short delay is required before a RESET command */ + usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC); state = gsi_channel_state(channel); if (state != GSI_CHANNEL_STATE_STOPPED && @@ -900,7 +901,7 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id) ret = gsi_channel_stop_command(channel); if (ret != -EAGAIN) break; - msleep(1); + usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC); } while (retries--); mutex_unlock(&gsi->mutex); diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 9f4be9812a1f3..688a3dd40510a 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -1378,7 +1378,7 @@ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint) do { if (!ipa_endpoint_aggr_active(endpoint)) break; - msleep(1); + usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC); } while (retries--); /* Check one last time */ @@ -1399,7 +1399,7 @@ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint) */ gsi_channel_reset(gsi, endpoint->channel_id, true); - msleep(1); + usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC); goto out_suspend_again; -- GitLab From 59b5f45496253ae977792cbe82480686be46b005 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 13 Jan 2021 11:15:30 -0600 Subject: [PATCH 0848/2012] net: ipa: change GSI command timeout The GSI command timeout is currently 5 seconds, which is much higher than it should be. Express the timeout in milliseconds rather than seconds, and reduce it to 50 milliseconds. Signed-off-by: Alex Elder Reviewed-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 93b143ba92be5..4de769166978b 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -89,7 +89,7 @@ /* Delay period for interrupt moderation (in 32KHz IPA internal timer ticks) */ #define GSI_EVT_RING_INT_MODT (32 * 1) /* 1ms under 32KHz clock */ -#define GSI_CMD_TIMEOUT 5 /* seconds */ +#define GSI_CMD_TIMEOUT 50 /* milliseconds */ #define GSI_CHANNEL_STOP_RX_RETRIES 10 #define GSI_CHANNEL_MODEM_HALT_RETRIES 10 @@ -359,11 +359,13 @@ static u32 gsi_ring_index(struct gsi_ring *ring, u32 offset) static bool gsi_command(struct gsi *gsi, u32 reg, u32 val, struct completion *completion) { + unsigned long timeout = msecs_to_jiffies(GSI_CMD_TIMEOUT); + reinit_completion(completion); iowrite32(val, gsi->virt + reg); - return !!wait_for_completion_timeout(completion, GSI_CMD_TIMEOUT * HZ); + return !!wait_for_completion_timeout(completion, timeout); } /* Return the hardware's notion of the current state of an event ring */ -- GitLab From 3d60e15f6ead2f4a56903a8c737e7f522c867827 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 13 Jan 2021 11:15:31 -0600 Subject: [PATCH 0849/2012] net: ipa: change stop channel retry delay If a GSI stop channel command leaves the channel in STOP_IN_PROC state, we retry the stop command after a 1-2 millisecond delay. I have been told that a 3-5 millisecond delay is a better choice. Signed-off-by: Alex Elder Reviewed-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 4de769166978b..5c163f9c0ea7b 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -903,7 +903,7 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id) ret = gsi_channel_stop_command(channel); if (ret != -EAGAIN) break; - usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC); + usleep_range(3 * USEC_PER_MSEC, 5 * USEC_PER_MSEC); } while (retries--); mutex_unlock(&gsi->mutex); -- GitLab From 057ef63f755f4ef15eb534f922c1d057c50d4571 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 13 Jan 2021 11:15:32 -0600 Subject: [PATCH 0850/2012] net: ipa: retry TX channel stop commands For RX channels we issue a stop command more than once if necessary to allow it to stop. It turns out that TX channels could also require retries. Retry channel stop commands if necessary regardless of the channel direction. Rename the symbol defining the retry count so it's not RX-specific. Signed-off-by: Alex Elder Reviewed-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 5c163f9c0ea7b..5b29f7d9d6ac1 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -91,7 +91,7 @@ #define GSI_CMD_TIMEOUT 50 /* milliseconds */ -#define GSI_CHANNEL_STOP_RX_RETRIES 10 +#define GSI_CHANNEL_STOP_RETRIES 10 #define GSI_CHANNEL_MODEM_HALT_RETRIES 10 #define GSI_MHI_EVENT_ID_START 10 /* 1st reserved event id */ @@ -889,14 +889,11 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id) int gsi_channel_stop(struct gsi *gsi, u32 channel_id) { struct gsi_channel *channel = &gsi->channel[channel_id]; - u32 retries; + u32 retries = GSI_CHANNEL_STOP_RETRIES; int ret; gsi_channel_freeze(channel); - /* RX channels might require a little time to enter STOPPED state */ - retries = channel->toward_ipa ? 0 : GSI_CHANNEL_STOP_RX_RETRIES; - mutex_lock(&gsi->mutex); do { -- GitLab From e3a7670737ecd7eb55b5c5e1900678e2a2e51ef9 Mon Sep 17 00:00:00 2001 From: Ayush Sawal Date: Wed, 13 Jan 2021 10:13:02 +0530 Subject: [PATCH 0851/2012] ch_ipsec: Remove initialization of rxq related data Removing initialization of nrxq and rxq_size in uld_info. As ipsec uses nic queues only, there is no need to create uld rx queues for ipsec. Signed-off-by: Ayush Sawal Link: https://lore.kernel.org/r/20210113044302.25522-1-ayush.sawal@chelsio.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c index 47d9268a7e3c9..5855905200761 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c @@ -92,9 +92,6 @@ static const struct xfrmdev_ops ch_ipsec_xfrmdev_ops = { static struct cxgb4_uld_info ch_ipsec_uld_info = { .name = CHIPSEC_DRV_MODULE_NAME, - .nrxq = MAX_ULD_QSETS, - /* Max ntxq will be derived from fw config file*/ - .rxq_size = 1024, .add = ch_ipsec_uld_add, .state_change = ch_ipsec_uld_state_change, .tx_handler = ch_ipsec_xmit, -- GitLab From 71854255820d1a479654bd2e98483129c588c118 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 12 Jan 2021 21:07:12 +0200 Subject: [PATCH 0852/2012] net: vlan: Add parse protocol header ops Add parse protocol header ops for vlan device. Before this patch, vlan tagged packet transmitted by af_packet had skb->protocol unset. Some kernel methods (like __skb_flow_dissect()) rely on this missing information for its packet processing. Signed-off-by: Eran Ben Elisha Reviewed-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- net/8021q/vlan_dev.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index ec8408d1638fb..dc1a197792e6b 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -510,9 +510,17 @@ static void vlan_dev_set_lockdep_class(struct net_device *dev) netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL); } +static __be16 vlan_parse_protocol(const struct sk_buff *skb) +{ + struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); + + return __vlan_get_protocol(skb, veth->h_vlan_proto, NULL); +} + static const struct header_ops vlan_header_ops = { .create = vlan_dev_hard_header, .parse = eth_header_parse, + .parse_protocol = vlan_parse_protocol, }; static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev, @@ -532,6 +540,7 @@ static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev static const struct header_ops vlan_passthru_header_ops = { .create = vlan_passthru_hard_header, .parse = eth_header_parse, + .parse_protocol = vlan_parse_protocol, }; static struct device_type vlan_type = { -- GitLab From 4f1cc51f34886d645cd3e8fc2915cc9b7a55c3b6 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 12 Jan 2021 21:07:13 +0200 Subject: [PATCH 0853/2012] net: flow_dissector: Parse PTP L2 packet header Add support for parsing PTP L2 packet header. Such packet consists of an L2 header (with ethertype of ETH_P_1588), PTP header, body and an optional suffix. Signed-off-by: Eran Ben Elisha Reviewed-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- net/core/flow_dissector.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 6f1adba6695fc..2d70ded389aeb 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -1251,6 +1252,21 @@ proto_again: &proto, &nhoff, hlen, flags); break; + case htons(ETH_P_1588): { + struct ptp_header *hdr, _hdr; + + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, + hlen, &_hdr); + if (!hdr) { + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } + + nhoff += ntohs(hdr->message_length); + fdret = FLOW_DISSECT_RET_OUT_GOOD; + break; + } + default: fdret = FLOW_DISSECT_RET_OUT_BAD; break; -- GitLab From dec822771b0174a01e72d7641d08e44461b6a82f Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 14 Jan 2021 10:46:57 +0800 Subject: [PATCH 0854/2012] riscv: stacktrace: Move register keyword to beginning of declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using global sp_in_global directly to fix the following warning, arch/riscv/kernel/stacktrace.c:31:3: warning: ‘register’ is not at beginning of declaration [-Wold-style-declaration] 31 | const register unsigned long current_sp = sp_in_global; | ^~~~~ Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 48b870a685b30..df5d2da7c40be 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -14,7 +14,7 @@ #include -register unsigned long sp_in_global __asm__("sp"); +register const unsigned long sp_in_global __asm__("sp"); #ifdef CONFIG_FRAME_POINTER @@ -28,9 +28,8 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - const register unsigned long current_sp = sp_in_global; fp = (unsigned long)__builtin_frame_address(0); - sp = current_sp; + sp = sp_in_global; pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ -- GitLab From 11c11d0751fce605090761f9c066bae947a35e76 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 14 Jan 2021 18:17:41 +0000 Subject: [PATCH 0855/2012] bpf: x86: Factor out emission of ModR/M for *(reg + off) The case for JITing atomics is about to get more complicated. Let's factor out some common code to make the review and result more readable. NB the atomics code doesn't yet use the new helper - a subsequent patch will add its use as a side-effect of other changes. Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210114181751.768687-2-jackmanb@google.com --- arch/x86/net/bpf_jit_comp.c | 43 +++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 796506dcfc42e..30526776fa78c 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -681,6 +681,27 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg) *pprog = prog; } +/* Emit the suffix (ModR/M etc) for addressing *(ptr_reg + off) and val_reg */ +static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off) +{ + u8 *prog = *pprog; + int cnt = 0; + + if (is_imm8(off)) { + /* 1-byte signed displacement. + * + * If off == 0 we could skip this and save one extra byte, but + * special case of x86 R13 which always needs an offset is not + * worth the hassle + */ + EMIT2(add_2reg(0x40, ptr_reg, val_reg), off); + } else { + /* 4-byte signed displacement */ + EMIT1_off32(add_2reg(0x80, ptr_reg, val_reg), off); + } + *pprog = prog; +} + /* LDX: dst_reg = *(u8*)(src_reg + off) */ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) { @@ -708,15 +729,7 @@ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); break; } - /* - * If insn->off == 0 we can save one extra byte, but - * special case of x86 R13 which always needs an offset - * is not worth the hassle - */ - if (is_imm8(off)) - EMIT2(add_2reg(0x40, src_reg, dst_reg), off); - else - EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off); + emit_insn_suffix(&prog, src_reg, dst_reg, off); *pprog = prog; } @@ -751,10 +764,7 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); break; } - if (is_imm8(off)) - EMIT2(add_2reg(0x40, dst_reg, src_reg), off); - else - EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off); + emit_insn_suffix(&prog, dst_reg, src_reg, off); *pprog = prog; } @@ -1240,11 +1250,8 @@ st: if (is_imm8(insn->off)) goto xadd; case BPF_STX | BPF_XADD | BPF_DW: EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01); -xadd: if (is_imm8(insn->off)) - EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); - else - EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), - insn->off); +xadd: + emit_modrm_dstoff(&prog, dst_reg, src_reg, insn->off); break; /* call */ -- GitLab From 74007cfc1f71e47394ca173b93d28afd0529fc86 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 14 Jan 2021 18:17:42 +0000 Subject: [PATCH 0856/2012] bpf: x86: Factor out emission of REX byte The JIT case for encoding atomic ops is about to get more complicated. In order to make the review & resulting code easier, let's factor out some shared helpers. Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210114181751.768687-3-jackmanb@google.com --- arch/x86/net/bpf_jit_comp.c | 39 ++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 30526776fa78c..f15c93275a18a 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -702,6 +702,21 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off) *pprog = prog; } +/* + * Emit a REX byte if it will be necessary to address these registers + */ +static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64) +{ + u8 *prog = *pprog; + int cnt = 0; + + if (is64) + EMIT1(add_2mod(0x48, dst_reg, src_reg)); + else if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT1(add_2mod(0x40, dst_reg, src_reg)); + *pprog = prog; +} + /* LDX: dst_reg = *(u8*)(src_reg + off) */ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) { @@ -854,10 +869,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_OR: b2 = 0x09; break; case BPF_XOR: b2 = 0x31; break; } - if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_2mod(0x48, dst_reg, src_reg)); - else if (is_ereg(dst_reg) || is_ereg(src_reg)) - EMIT1(add_2mod(0x40, dst_reg, src_reg)); + maybe_emit_mod(&prog, dst_reg, src_reg, + BPF_CLASS(insn->code) == BPF_ALU64); EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg)); break; @@ -1302,20 +1315,16 @@ xadd: case BPF_JMP32 | BPF_JSGE | BPF_X: case BPF_JMP32 | BPF_JSLE | BPF_X: /* cmp dst_reg, src_reg */ - if (BPF_CLASS(insn->code) == BPF_JMP) - EMIT1(add_2mod(0x48, dst_reg, src_reg)); - else if (is_ereg(dst_reg) || is_ereg(src_reg)) - EMIT1(add_2mod(0x40, dst_reg, src_reg)); + maybe_emit_mod(&prog, dst_reg, src_reg, + BPF_CLASS(insn->code) == BPF_JMP); EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg)); goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: /* test dst_reg, src_reg */ - if (BPF_CLASS(insn->code) == BPF_JMP) - EMIT1(add_2mod(0x48, dst_reg, src_reg)); - else if (is_ereg(dst_reg) || is_ereg(src_reg)) - EMIT1(add_2mod(0x40, dst_reg, src_reg)); + maybe_emit_mod(&prog, dst_reg, src_reg, + BPF_CLASS(insn->code) == BPF_JMP); EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg)); goto emit_cond_jmp; @@ -1351,10 +1360,8 @@ xadd: case BPF_JMP32 | BPF_JSLE | BPF_K: /* test dst_reg, dst_reg to save one extra byte */ if (imm32 == 0) { - if (BPF_CLASS(insn->code) == BPF_JMP) - EMIT1(add_2mod(0x48, dst_reg, dst_reg)); - else if (is_ereg(dst_reg)) - EMIT1(add_2mod(0x40, dst_reg, dst_reg)); + maybe_emit_mod(&prog, dst_reg, dst_reg, + BPF_CLASS(insn->code) == BPF_JMP); EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg)); goto emit_cond_jmp; } -- GitLab From e5f02caccfae94f5baf6ec6dbb57ce8a7e9a40e7 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 14 Jan 2021 18:17:43 +0000 Subject: [PATCH 0857/2012] bpf: x86: Factor out a lookup table for some ALU opcodes A later commit will need to lookup a subset of these opcodes. To avoid duplicating code, pull out a table. The shift opcodes won't be needed by that later commit, but they're already duplicated, so fold them into the table anyway. Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210114181751.768687-4-jackmanb@google.com --- arch/x86/net/bpf_jit_comp.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index f15c93275a18a..93f32e0ba0ef9 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -205,6 +205,18 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); } +/* Some 1-byte opcodes for binary ALU operations */ +static u8 simple_alu_opcodes[] = { + [BPF_ADD] = 0x01, + [BPF_SUB] = 0x29, + [BPF_AND] = 0x21, + [BPF_OR] = 0x09, + [BPF_XOR] = 0x31, + [BPF_LSH] = 0xE0, + [BPF_RSH] = 0xE8, + [BPF_ARSH] = 0xF8, +}; + static void jit_fill_hole(void *area, unsigned int size) { /* Fill whole space with INT3 instructions */ @@ -862,15 +874,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_AND | BPF_X: case BPF_ALU64 | BPF_OR | BPF_X: case BPF_ALU64 | BPF_XOR | BPF_X: - switch (BPF_OP(insn->code)) { - case BPF_ADD: b2 = 0x01; break; - case BPF_SUB: b2 = 0x29; break; - case BPF_AND: b2 = 0x21; break; - case BPF_OR: b2 = 0x09; break; - case BPF_XOR: b2 = 0x31; break; - } maybe_emit_mod(&prog, dst_reg, src_reg, BPF_CLASS(insn->code) == BPF_ALU64); + b2 = simple_alu_opcodes[BPF_OP(insn->code)]; EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg)); break; @@ -1050,12 +1056,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, else if (is_ereg(dst_reg)) EMIT1(add_1mod(0x40, dst_reg)); - switch (BPF_OP(insn->code)) { - case BPF_LSH: b3 = 0xE0; break; - case BPF_RSH: b3 = 0xE8; break; - case BPF_ARSH: b3 = 0xF8; break; - } - + b3 = simple_alu_opcodes[BPF_OP(insn->code)]; if (imm32 == 1) EMIT2(0xD1, add_1reg(b3, dst_reg)); else @@ -1089,11 +1090,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, else if (is_ereg(dst_reg)) EMIT1(add_1mod(0x40, dst_reg)); - switch (BPF_OP(insn->code)) { - case BPF_LSH: b3 = 0xE0; break; - case BPF_RSH: b3 = 0xE8; break; - case BPF_ARSH: b3 = 0xF8; break; - } + b3 = simple_alu_opcodes[BPF_OP(insn->code)]; EMIT2(0xD3, add_1reg(b3, dst_reg)); if (src_reg != BPF_REG_4) -- GitLab From 91c960b0056672e74627776655c926388350fa30 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 14 Jan 2021 18:17:44 +0000 Subject: [PATCH 0858/2012] bpf: Rename BPF_XADD and prepare to encode other atomics in .imm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A subsequent patch will add additional atomic operations. These new operations will use the same opcode field as the existing XADD, with the immediate discriminating different operations. In preparation, rename the instruction mode BPF_ATOMIC and start calling the zero immediate BPF_ADD. This is possible (doesn't break existing valid BPF progs) because the immediate field is currently reserved MBZ and BPF_ADD is zero. All uses are removed from the tree but the BPF_XADD definition is kept around to avoid breaking builds for people including kernel headers. Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20210114181751.768687-5-jackmanb@google.com --- Documentation/networking/filter.rst | 30 +++++++----- arch/arm/net/bpf_jit_32.c | 7 ++- arch/arm64/net/bpf_jit_comp.c | 16 +++++-- arch/mips/net/ebpf_jit.c | 11 +++-- arch/powerpc/net/bpf_jit_comp64.c | 25 ++++++++-- arch/riscv/net/bpf_jit_comp32.c | 20 ++++++-- arch/riscv/net/bpf_jit_comp64.c | 16 +++++-- arch/s390/net/bpf_jit_comp.c | 27 ++++++----- arch/sparc/net/bpf_jit_comp_64.c | 17 +++++-- arch/x86/net/bpf_jit_comp.c | 46 ++++++++++++++----- arch/x86/net/bpf_jit_comp32.c | 6 +-- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 14 ++++-- drivers/net/ethernet/netronome/nfp/bpf/main.h | 4 +- .../net/ethernet/netronome/nfp/bpf/verifier.c | 15 ++++-- include/linux/filter.h | 16 +++++-- include/uapi/linux/bpf.h | 5 +- kernel/bpf/core.c | 31 +++++++++---- kernel/bpf/disasm.c | 6 ++- kernel/bpf/verifier.c | 24 ++++++---- lib/test_bpf.c | 14 +++--- samples/bpf/bpf_insn.h | 4 +- samples/bpf/cookie_uid_helper_example.c | 8 ++-- samples/bpf/sock_example.c | 2 +- samples/bpf/test_cgrp2_attach.c | 5 +- tools/include/linux/filter.h | 15 ++++-- tools/include/uapi/linux/bpf.h | 5 +- .../bpf/prog_tests/cgroup_attach_multi.c | 4 +- .../selftests/bpf/test_cgroup_storage.c | 2 +- tools/testing/selftests/bpf/verifier/ctx.c | 7 ++- .../bpf/verifier/direct_packet_access.c | 4 +- .../testing/selftests/bpf/verifier/leak_ptr.c | 10 ++-- .../selftests/bpf/verifier/meta_access.c | 4 +- tools/testing/selftests/bpf/verifier/unpriv.c | 3 +- .../bpf/verifier/value_illegal_alu.c | 2 +- tools/testing/selftests/bpf/verifier/xadd.c | 18 ++++---- 35 files changed, 291 insertions(+), 152 deletions(-) diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst index debb59e374deb..1583d59d806d8 100644 --- a/Documentation/networking/filter.rst +++ b/Documentation/networking/filter.rst @@ -1006,13 +1006,13 @@ Size modifier is one of ... Mode modifier is one of:: - BPF_IMM 0x00 /* used for 32-bit mov in classic BPF and 64-bit in eBPF */ - BPF_ABS 0x20 - BPF_IND 0x40 - BPF_MEM 0x60 - BPF_LEN 0x80 /* classic BPF only, reserved in eBPF */ - BPF_MSH 0xa0 /* classic BPF only, reserved in eBPF */ - BPF_XADD 0xc0 /* eBPF only, exclusive add */ + BPF_IMM 0x00 /* used for 32-bit mov in classic BPF and 64-bit in eBPF */ + BPF_ABS 0x20 + BPF_IND 0x40 + BPF_MEM 0x60 + BPF_LEN 0x80 /* classic BPF only, reserved in eBPF */ + BPF_MSH 0xa0 /* classic BPF only, reserved in eBPF */ + BPF_ATOMIC 0xc0 /* eBPF only, atomic operations */ eBPF has two non-generic instructions: (BPF_ABS | | BPF_LD) and (BPF_IND | | BPF_LD) which are used to access packet data. @@ -1044,11 +1044,19 @@ Unlike classic BPF instruction set, eBPF has generic load/store operations:: BPF_MEM | | BPF_STX: *(size *) (dst_reg + off) = src_reg BPF_MEM | | BPF_ST: *(size *) (dst_reg + off) = imm32 BPF_MEM | | BPF_LDX: dst_reg = *(size *) (src_reg + off) - BPF_XADD | BPF_W | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg - BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg -Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and -2 byte atomic increments are not supported. +Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. + +It also includes atomic operations, which use the immediate field for extra +encoding. + + .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg + .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg + +Note that 1 and 2 byte atomic operations are not supported. + +You may encounter BPF_XADD - this is a legacy name for BPF_ATOMIC, referring to +the exclusive-add operation encoded when the immediate field is zero. eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM which consists of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 0207b6ea6e8a0..897634d0a67ca 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1620,10 +1620,9 @@ exit: } emit_str_r(dst_lo, tmp2, off, ctx, BPF_SIZE(code)); break; - /* STX XADD: lock *(u32 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_W: - /* STX XADD: lock *(u64 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_DW: + /* Atomic ops */ + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: goto notyet; /* STX: *(size *)(dst + off) = src */ case BPF_STX | BPF_MEM | BPF_W: diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index ef9f1d5e989d0..f7b194878a99a 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -875,10 +875,18 @@ emit_cond_jmp: } break; - /* STX XADD: lock *(u32 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_W: - /* STX XADD: lock *(u64 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_DW: + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + if (insn->imm != BPF_ADD) { + pr_err_once("unknown atomic op code %02x\n", insn->imm); + return -EINVAL; + } + + /* STX XADD: lock *(u32 *)(dst + off) += src + * and + * STX XADD: lock *(u64 *)(dst + off) += src + */ + if (!off) { reg = dst; } else { diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 561154cbcc401..939dd06764bc9 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -1423,8 +1423,8 @@ jeq_common: case BPF_STX | BPF_H | BPF_MEM: case BPF_STX | BPF_W | BPF_MEM: case BPF_STX | BPF_DW | BPF_MEM: - case BPF_STX | BPF_W | BPF_XADD: - case BPF_STX | BPF_DW | BPF_XADD: + case BPF_STX | BPF_W | BPF_ATOMIC: + case BPF_STX | BPF_DW | BPF_ATOMIC: if (insn->dst_reg == BPF_REG_10) { ctx->flags |= EBPF_SEEN_FP; dst = MIPS_R_SP; @@ -1438,7 +1438,12 @@ jeq_common: src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); if (src < 0) return src; - if (BPF_MODE(insn->code) == BPF_XADD) { + if (BPF_MODE(insn->code) == BPF_ATOMIC) { + if (insn->imm != BPF_ADD) { + pr_err("ATOMIC OP %02x NOT HANDLED\n", insn->imm); + return -EINVAL; + } + /* * If mem_off does not fit within the 9 bit ll/sc * instruction immediate field, use a temp reg. diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 022103c6a201a..aaf1a887f653b 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -683,10 +683,18 @@ emit_clear: break; /* - * BPF_STX XADD (atomic_add) + * BPF_STX ATOMIC (atomic ops) */ - /* *(u32 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_W: + if (insn->imm != BPF_ADD) { + pr_err_ratelimited( + "eBPF filter atomic op code %02x (@%d) unsupported\n", + code, i); + return -ENOTSUPP; + } + + /* *(u32 *)(dst + off) += src */ + /* Get EA into TMP_REG_1 */ EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); tmp_idx = ctx->idx * 4; @@ -699,8 +707,15 @@ emit_clear: /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, tmp_idx); break; - /* *(u64 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_DW: + case BPF_STX | BPF_ATOMIC | BPF_DW: + if (insn->imm != BPF_ADD) { + pr_err_ratelimited( + "eBPF filter atomic op code %02x (@%d) unsupported\n", + code, i); + return -ENOTSUPP; + } + /* *(u64 *)(dst + off) += src */ + EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); tmp_idx = ctx->idx * 4; EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c index 579575f9cdae0..81de865f4c7c3 100644 --- a/arch/riscv/net/bpf_jit_comp32.c +++ b/arch/riscv/net/bpf_jit_comp32.c @@ -881,7 +881,7 @@ static int emit_store_r64(const s8 *dst, const s8 *src, s16 off, const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); const s8 *rs = bpf_get_reg64(src, tmp2, ctx); - if (mode == BPF_XADD && size != BPF_W) + if (mode == BPF_ATOMIC && size != BPF_W) return -1; emit_imm(RV_REG_T0, off, ctx); @@ -899,7 +899,7 @@ static int emit_store_r64(const s8 *dst, const s8 *src, s16 off, case BPF_MEM: emit(rv_sw(RV_REG_T0, 0, lo(rs)), ctx); break; - case BPF_XADD: + case BPF_ATOMIC: /* Only BPF_ADD supported */ emit(rv_amoadd_w(RV_REG_ZERO, lo(rs), RV_REG_T0, 0, 0), ctx); break; @@ -1260,7 +1260,6 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_W: case BPF_STX | BPF_MEM | BPF_DW: - case BPF_STX | BPF_XADD | BPF_W: if (BPF_CLASS(code) == BPF_ST) { emit_imm32(tmp2, imm, ctx); src = tmp2; @@ -1271,8 +1270,21 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, return -1; break; + case BPF_STX | BPF_ATOMIC | BPF_W: + if (insn->imm != BPF_ADD) { + pr_info_once( + "bpf-jit: not supported: atomic operation %02x ***\n", + insn->imm); + return -EFAULT; + } + + if (emit_store_r64(dst, src, off, ctx, BPF_SIZE(code), + BPF_MODE(code))) + return -1; + break; + /* No hardware support for 8-byte atomics in RV32. */ - case BPF_STX | BPF_XADD | BPF_DW: + case BPF_STX | BPF_ATOMIC | BPF_DW: /* Fallthrough. */ notsupported: diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 8a56b52931170..b44ff52f84a62 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1027,10 +1027,18 @@ out_be: emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); emit_sd(RV_REG_T1, 0, rs, ctx); break; - /* STX XADD: lock *(u32 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_W: - /* STX XADD: lock *(u64 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_DW: + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + if (insn->imm != BPF_ADD) { + pr_err("bpf-jit: not supported: atomic operation %02x ***\n", + insn->imm); + return -EINVAL; + } + + /* atomic_add: lock *(u32 *)(dst + off) += src + * atomic_add: lock *(u64 *)(dst + off) += src + */ + if (off) { if (is_12b_int(off)) { emit_addi(RV_REG_T1, rd, off, ctx); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 0a41827928769..f973e2ead1973 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1205,18 +1205,23 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, jit->seen |= SEEN_MEM; break; /* - * BPF_STX XADD (atomic_add) + * BPF_ATOMIC */ - case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */ - /* laal %w0,%src,off(%dst) */ - EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W0, src_reg, - dst_reg, off); - jit->seen |= SEEN_MEM; - break; - case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */ - /* laalg %w0,%src,off(%dst) */ - EMIT6_DISP_LH(0xeb000000, 0x00ea, REG_W0, src_reg, - dst_reg, off); + case BPF_STX | BPF_ATOMIC | BPF_DW: + case BPF_STX | BPF_ATOMIC | BPF_W: + if (insn->imm != BPF_ADD) { + pr_err("Unknown atomic operation %02x\n", insn->imm); + return -1; + } + + /* *(u32/u64 *)(dst + off) += src + * + * BFW_W: laal %w0,%src,off(%dst) + * BPF_DW: laalg %w0,%src,off(%dst) + */ + EMIT6_DISP_LH(0xeb000000, + BPF_SIZE(insn->code) == BPF_W ? 0x00fa : 0x00ea, + REG_W0, src_reg, dst_reg, off); jit->seen |= SEEN_MEM; break; /* diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 3364e2a009899..4b8d3c65d2666 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1366,12 +1366,18 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; } - /* STX XADD: lock *(u32 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_W: { + case BPF_STX | BPF_ATOMIC | BPF_W: { const u8 tmp = bpf2sparc[TMP_REG_1]; const u8 tmp2 = bpf2sparc[TMP_REG_2]; const u8 tmp3 = bpf2sparc[TMP_REG_3]; + if (insn->imm != BPF_ADD) { + pr_err_once("unknown atomic op %02x\n", insn->imm); + return -EINVAL; + } + + /* lock *(u32 *)(dst + off) += src */ + if (insn->dst_reg == BPF_REG_FP) ctx->saw_frame_pointer = true; @@ -1390,11 +1396,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; } /* STX XADD: lock *(u64 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_DW: { + case BPF_STX | BPF_ATOMIC | BPF_DW: { const u8 tmp = bpf2sparc[TMP_REG_1]; const u8 tmp2 = bpf2sparc[TMP_REG_2]; const u8 tmp3 = bpf2sparc[TMP_REG_3]; + if (insn->imm != BPF_ADD) { + pr_err_once("unknown atomic op %02x\n", insn->imm); + return -EINVAL; + } + if (insn->dst_reg == BPF_REG_FP) ctx->saw_frame_pointer = true; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 93f32e0ba0ef9..b1829a534da14 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -795,6 +795,33 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) *pprog = prog; } +static int emit_atomic(u8 **pprog, u8 atomic_op, + u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size) +{ + u8 *prog = *pprog; + int cnt = 0; + + EMIT1(0xF0); /* lock prefix */ + + maybe_emit_mod(&prog, dst_reg, src_reg, bpf_size == BPF_DW); + + /* emit opcode */ + switch (atomic_op) { + case BPF_ADD: + /* lock *(u32/u64*)(dst_reg + off) = src_reg */ + EMIT1(simple_alu_opcodes[atomic_op]); + break; + default: + pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op); + return -EFAULT; + } + + emit_insn_suffix(&prog, dst_reg, src_reg, off); + + *pprog = prog; + return 0; +} + static bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) @@ -839,6 +866,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int i, cnt = 0, excnt = 0; int proglen = 0; u8 *prog = temp; + int err; detect_reg_usage(insn, insn_cnt, callee_regs_used, &tail_call_seen); @@ -1250,18 +1278,12 @@ st: if (is_imm8(insn->off)) } break; - /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ - case BPF_STX | BPF_XADD | BPF_W: - /* Emit 'lock add dword ptr [rax + off], eax' */ - if (is_ereg(dst_reg) || is_ereg(src_reg)) - EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); - else - EMIT2(0xF0, 0x01); - goto xadd; - case BPF_STX | BPF_XADD | BPF_DW: - EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01); -xadd: - emit_modrm_dstoff(&prog, dst_reg, src_reg, insn->off); + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + err = emit_atomic(&prog, insn->imm, dst_reg, src_reg, + insn->off, BPF_SIZE(insn->code)); + if (err) + return err; break; /* call */ diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 96fde03aa9877..d17b67c69f89a 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -2243,10 +2243,8 @@ emit_jmp: return -EFAULT; } break; - /* STX XADD: lock *(u32 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_W: - /* STX XADD: lock *(u64 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_DW: + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: goto notyet; case BPF_JMP | BPF_EXIT: if (seen_exit) { diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 0a721f6e8676e..e31f8fbbc696d 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -3109,13 +3109,19 @@ mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64) return 0; } -static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int mem_atomic4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { + if (meta->insn.imm != BPF_ADD) + return -EOPNOTSUPP; + return mem_xadd(nfp_prog, meta, false); } -static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +static int mem_atomic8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { + if (meta->insn.imm != BPF_ADD) + return -EOPNOTSUPP; + return mem_xadd(nfp_prog, meta, true); } @@ -3475,8 +3481,8 @@ static const instr_cb_t instr_cb[256] = { [BPF_STX | BPF_MEM | BPF_H] = mem_stx2, [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8, - [BPF_STX | BPF_XADD | BPF_W] = mem_xadd4, - [BPF_STX | BPF_XADD | BPF_DW] = mem_xadd8, + [BPF_STX | BPF_ATOMIC | BPF_W] = mem_atomic4, + [BPF_STX | BPF_ATOMIC | BPF_DW] = mem_atomic8, [BPF_ST | BPF_MEM | BPF_B] = mem_st1, [BPF_ST | BPF_MEM | BPF_H] = mem_st2, [BPF_ST | BPF_MEM | BPF_W] = mem_st4, diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index fac9c6f9e197b..d0e17eebddd94 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -428,9 +428,9 @@ static inline bool is_mbpf_classic_store_pkt(const struct nfp_insn_meta *meta) return is_mbpf_classic_store(meta) && meta->ptr.type == PTR_TO_PACKET; } -static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta) +static inline bool is_mbpf_atomic(const struct nfp_insn_meta *meta) { - return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD); + return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_ATOMIC); } static inline bool is_mbpf_mul(const struct nfp_insn_meta *meta) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index e92ee510fd52a..9d235c0ce46a8 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -479,7 +479,7 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, pr_vlog(env, "map writes not supported\n"); return -EOPNOTSUPP; } - if (is_mbpf_xadd(meta)) { + if (is_mbpf_atomic(meta)) { err = nfp_bpf_map_mark_used(env, meta, reg, NFP_MAP_USE_ATOMIC_CNT); if (err) @@ -523,12 +523,17 @@ exit_check_ptr: } static int -nfp_bpf_check_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - struct bpf_verifier_env *env) +nfp_bpf_check_atomic(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + struct bpf_verifier_env *env) { const struct bpf_reg_state *sreg = cur_regs(env) + meta->insn.src_reg; const struct bpf_reg_state *dreg = cur_regs(env) + meta->insn.dst_reg; + if (meta->insn.imm != BPF_ADD) { + pr_vlog(env, "atomic op not implemented: %d\n", meta->insn.imm); + return -EOPNOTSUPP; + } + if (dreg->type != PTR_TO_MAP_VALUE) { pr_vlog(env, "atomic add not to a map value pointer: %d\n", dreg->type); @@ -655,8 +660,8 @@ int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, if (is_mbpf_store(meta)) return nfp_bpf_check_store(nfp_prog, meta, env); - if (is_mbpf_xadd(meta)) - return nfp_bpf_check_xadd(nfp_prog, meta, env); + if (is_mbpf_atomic(meta)) + return nfp_bpf_check_atomic(nfp_prog, meta, env); if (is_mbpf_alu(meta)) return nfp_bpf_check_alu(nfp_prog, meta, env); diff --git a/include/linux/filter.h b/include/linux/filter.h index 5edf2b6608812..392e94b796687 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -259,15 +259,23 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .off = OFF, \ .imm = 0 }) -/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ -#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ +/* + * Atomic operations: + * + * BPF_ADD *(uint *) (dst_reg + off16) += src_reg + */ + +#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ ((struct bpf_insn) { \ - .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ - .imm = 0 }) + .imm = OP }) + +/* Legacy alias */ +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) BPF_ATOMIC_OP(SIZE, BPF_ADD, DST, SRC, OFF) /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a1ad32456f89a..6b3996343e637 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -19,7 +19,8 @@ /* ld/ldx fields */ #define BPF_DW 0x18 /* double word (64-bit) */ -#define BPF_XADD 0xc0 /* exclusive add */ +#define BPF_ATOMIC 0xc0 /* atomic memory ops - op type in immediate */ +#define BPF_XADD 0xc0 /* exclusive add - legacy name */ /* alu/jmp fields */ #define BPF_MOV 0xb0 /* mov reg to reg */ @@ -2448,7 +2449,7 @@ union bpf_attr { * running simultaneously. * * A user should care about the synchronization by himself. - * For example, by using the **BPF_STX_XADD** instruction to alter + * For example, by using the **BPF_ATOMIC** instructions to alter * the shared data. * Return * A pointer to the local storage area. diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 69c3c308de5e0..4836ebf459cfe 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1309,8 +1309,8 @@ EXPORT_SYMBOL_GPL(__bpf_call_base); INSN_3(STX, MEM, H), \ INSN_3(STX, MEM, W), \ INSN_3(STX, MEM, DW), \ - INSN_3(STX, XADD, W), \ - INSN_3(STX, XADD, DW), \ + INSN_3(STX, ATOMIC, W), \ + INSN_3(STX, ATOMIC, DW), \ /* Immediate based. */ \ INSN_3(ST, MEM, B), \ INSN_3(ST, MEM, H), \ @@ -1618,13 +1618,25 @@ out: LDX_PROBE(DW, 8) #undef LDX_PROBE - STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */ - atomic_add((u32) SRC, (atomic_t *)(unsigned long) - (DST + insn->off)); + STX_ATOMIC_W: + switch (IMM) { + case BPF_ADD: + /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */ + atomic_add((u32) SRC, (atomic_t *)(unsigned long) + (DST + insn->off)); + default: + goto default_label; + } CONT; - STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */ - atomic64_add((u64) SRC, (atomic64_t *)(unsigned long) - (DST + insn->off)); + STX_ATOMIC_DW: + switch (IMM) { + case BPF_ADD: + /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */ + atomic64_add((u64) SRC, (atomic64_t *)(unsigned long) + (DST + insn->off)); + default: + goto default_label; + } CONT; default_label: @@ -1634,7 +1646,8 @@ out: * * Note, verifier whitelists all opcodes in bpf_opcode_in_insntable(). */ - pr_warn("BPF interpreter: unknown opcode %02x\n", insn->code); + pr_warn("BPF interpreter: unknown opcode %02x (imm: 0x%x)\n", + insn->code, insn->imm); BUG_ON(1); return 0; } diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index b44d8c447afd1..37c8d6e9b4cce 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -153,14 +153,16 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->dst_reg, insn->off, insn->src_reg); - else if (BPF_MODE(insn->code) == BPF_XADD) + else if (BPF_MODE(insn->code) == BPF_ATOMIC && + insn->imm == BPF_ADD) { verbose(cbs->private_data, "(%02x) lock *(%s *)(r%d %+d) += r%d\n", insn->code, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->dst_reg, insn->off, insn->src_reg); - else + } else { verbose(cbs->private_data, "BUG_%02x\n", insn->code); + } } else if (class == BPF_ST) { if (BPF_MODE(insn->code) != BPF_MEM) { verbose(cbs->private_data, "BUG_st_%02x\n", insn->code); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ae2aee48cf821..cfc137b81ac66 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3604,13 +3604,17 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn return err; } -static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn) +static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn) { int err; - if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) || - insn->imm != 0) { - verbose(env, "BPF_XADD uses reserved fields\n"); + if (insn->imm != BPF_ADD) { + verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm); + return -EINVAL; + } + + if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) { + verbose(env, "invalid atomic operand size\n"); return -EINVAL; } @@ -3633,19 +3637,19 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins is_pkt_reg(env, insn->dst_reg) || is_flow_key_reg(env, insn->dst_reg) || is_sk_reg(env, insn->dst_reg)) { - verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", + verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n", insn->dst_reg, reg_type_str[reg_state(env, insn->dst_reg)->type]); return -EACCES; } - /* check whether atomic_add can read the memory */ + /* check whether we can read the memory */ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1, true); if (err) return err; - /* check whether atomic_add can write into the same memory */ + /* check whether we can write into the same memory */ return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1, true); } @@ -9524,8 +9528,8 @@ static int do_check(struct bpf_verifier_env *env) } else if (class == BPF_STX) { enum bpf_reg_type *prev_dst_type, dst_reg_type; - if (BPF_MODE(insn->code) == BPF_XADD) { - err = check_xadd(env, env->insn_idx, insn); + if (BPF_MODE(insn->code) == BPF_ATOMIC) { + err = check_atomic(env, env->insn_idx, insn); if (err) return err; env->insn_idx++; @@ -10010,7 +10014,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) if (BPF_CLASS(insn->code) == BPF_STX && ((BPF_MODE(insn->code) != BPF_MEM && - BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) { + BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm != 0)) { verbose(env, "BPF_STX uses reserved fields\n"); return -EINVAL; } diff --git a/lib/test_bpf.c b/lib/test_bpf.c index ca7d635bccd9d..49ec9e8d8aed6 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -4295,13 +4295,13 @@ static struct bpf_test tests[] = { { { 0, 0xffffffff } }, .stack_depth = 40, }, - /* BPF_STX | BPF_XADD | BPF_W/DW */ + /* BPF_STX | BPF_ATOMIC | BPF_W/DW */ { "STX_XADD_W: Test: 0x12 + 0x10 = 0x22", .u.insns_int = { BPF_ALU32_IMM(BPF_MOV, R0, 0x12), BPF_ST_MEM(BPF_W, R10, -40, 0x10), - BPF_STX_XADD(BPF_W, R10, R0, -40), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40), BPF_LDX_MEM(BPF_W, R0, R10, -40), BPF_EXIT_INSN(), }, @@ -4316,7 +4316,7 @@ static struct bpf_test tests[] = { BPF_ALU64_REG(BPF_MOV, R1, R10), BPF_ALU32_IMM(BPF_MOV, R0, 0x12), BPF_ST_MEM(BPF_W, R10, -40, 0x10), - BPF_STX_XADD(BPF_W, R10, R0, -40), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40), BPF_ALU64_REG(BPF_MOV, R0, R10), BPF_ALU64_REG(BPF_SUB, R0, R1), BPF_EXIT_INSN(), @@ -4331,7 +4331,7 @@ static struct bpf_test tests[] = { .u.insns_int = { BPF_ALU32_IMM(BPF_MOV, R0, 0x12), BPF_ST_MEM(BPF_W, R10, -40, 0x10), - BPF_STX_XADD(BPF_W, R10, R0, -40), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40), BPF_EXIT_INSN(), }, INTERNAL, @@ -4352,7 +4352,7 @@ static struct bpf_test tests[] = { .u.insns_int = { BPF_ALU32_IMM(BPF_MOV, R0, 0x12), BPF_ST_MEM(BPF_DW, R10, -40, 0x10), - BPF_STX_XADD(BPF_DW, R10, R0, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40), BPF_LDX_MEM(BPF_DW, R0, R10, -40), BPF_EXIT_INSN(), }, @@ -4367,7 +4367,7 @@ static struct bpf_test tests[] = { BPF_ALU64_REG(BPF_MOV, R1, R10), BPF_ALU32_IMM(BPF_MOV, R0, 0x12), BPF_ST_MEM(BPF_DW, R10, -40, 0x10), - BPF_STX_XADD(BPF_DW, R10, R0, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40), BPF_ALU64_REG(BPF_MOV, R0, R10), BPF_ALU64_REG(BPF_SUB, R0, R1), BPF_EXIT_INSN(), @@ -4382,7 +4382,7 @@ static struct bpf_test tests[] = { .u.insns_int = { BPF_ALU32_IMM(BPF_MOV, R0, 0x12), BPF_ST_MEM(BPF_DW, R10, -40, 0x10), - BPF_STX_XADD(BPF_DW, R10, R0, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40), BPF_EXIT_INSN(), }, INTERNAL, diff --git a/samples/bpf/bpf_insn.h b/samples/bpf/bpf_insn.h index 544237980582b..db67a2847395f 100644 --- a/samples/bpf/bpf_insn.h +++ b/samples/bpf/bpf_insn.h @@ -138,11 +138,11 @@ struct bpf_insn; #define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ ((struct bpf_insn) { \ - .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ - .imm = 0 }) + .imm = BPF_ADD }) /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c index deb0e3e0324d4..c5ff7a13918c9 100644 --- a/samples/bpf/cookie_uid_helper_example.c +++ b/samples/bpf/cookie_uid_helper_example.c @@ -147,12 +147,12 @@ static void prog_load(void) */ BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_STX_XADD(BPF_DW, BPF_REG_9, BPF_REG_1, - offsetof(struct stats, packets)), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_9, BPF_REG_1, + offsetof(struct stats, packets)), BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), - BPF_STX_XADD(BPF_DW, BPF_REG_9, BPF_REG_1, - offsetof(struct stats, bytes)), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_9, BPF_REG_1, + offsetof(struct stats, bytes)), BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct __sk_buff, len)), BPF_EXIT_INSN(), diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c index 00aae1d33fcad..23d1930e19270 100644 --- a/samples/bpf/sock_example.c +++ b/samples/bpf/sock_example.c @@ -54,7 +54,7 @@ static int test_sock(void) BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */ - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0), BPF_MOV64_IMM(BPF_REG_0, 0), /* r0 = 0 */ BPF_EXIT_INSN(), }; diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c index 20fbd1241db33..390ff38d2ac67 100644 --- a/samples/bpf/test_cgrp2_attach.c +++ b/samples/bpf/test_cgrp2_attach.c @@ -53,7 +53,7 @@ static int prog_load(int map_fd, int verdict) BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */ - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0), /* Count bytes */ BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */ @@ -64,7 +64,8 @@ static int prog_load(int map_fd, int verdict) BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */ - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0), BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ BPF_EXIT_INSN(), diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index ca28b6ab8db7c..e870c9039f0d5 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -169,15 +169,22 @@ .off = OFF, \ .imm = 0 }) -/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ +/* + * Atomic operations: + * + * BPF_ADD *(uint *) (dst_reg + off16) += src_reg + */ -#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ +#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ ((struct bpf_insn) { \ - .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \ .dst_reg = DST, \ .src_reg = SRC, \ .off = OFF, \ - .imm = 0 }) + .imm = OP }) + +/* Legacy alias */ +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) BPF_ATOMIC_OP(SIZE, BPF_ADD, DST, SRC, OFF) /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a1ad32456f89a..6b3996343e637 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -19,7 +19,8 @@ /* ld/ldx fields */ #define BPF_DW 0x18 /* double word (64-bit) */ -#define BPF_XADD 0xc0 /* exclusive add */ +#define BPF_ATOMIC 0xc0 /* atomic memory ops - op type in immediate */ +#define BPF_XADD 0xc0 /* exclusive add - legacy name */ /* alu/jmp fields */ #define BPF_MOV 0xb0 /* mov reg to reg */ @@ -2448,7 +2449,7 @@ union bpf_attr { * running simultaneously. * * A user should care about the synchronization by himself. - * For example, by using the **BPF_STX_XADD** instruction to alter + * For example, by using the **BPF_ATOMIC** instructions to alter * the shared data. * Return * A pointer to the local storage area. diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c index b549fcfacc0bd..0a1fc9816cef3 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -45,13 +45,13 @@ static int prog_load_cnt(int verdict, int val) BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */ - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0), BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd), BPF_MOV64_IMM(BPF_REG_2, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), BPF_MOV64_IMM(BPF_REG_1, val), - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 0), BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd), BPF_MOV64_IMM(BPF_REG_2, 0), diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c index d946252a25bbc..0cda61da5d395 100644 --- a/tools/testing/selftests/bpf/test_cgroup_storage.c +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -29,7 +29,7 @@ int main(int argc, char **argv) BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1), BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c index 93d6b16414812..23080862aafd2 100644 --- a/tools/testing/selftests/bpf/verifier/ctx.c +++ b/tools/testing/selftests/bpf/verifier/ctx.c @@ -10,14 +10,13 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "context stores via XADD", + "context stores via BPF_ATOMIC", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_1, - BPF_REG_0, offsetof(struct __sk_buff, mark), 0), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .errstr = "BPF_XADD stores into R1 ctx is not allowed", + .errstr = "BPF_ATOMIC stores into R1 ctx is not allowed", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c index ae72536603fe2..ac1e19d0f5200 100644 --- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c @@ -333,7 +333,7 @@ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), - BPF_STX_XADD(BPF_DW, BPF_REG_4, BPF_REG_5, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_4, BPF_REG_5, 0), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0), BPF_MOV64_IMM(BPF_REG_0, 0), @@ -488,7 +488,7 @@ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 11), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8), BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), - BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_4, -8), BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 49), BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), diff --git a/tools/testing/selftests/bpf/verifier/leak_ptr.c b/tools/testing/selftests/bpf/verifier/leak_ptr.c index d6eec17f2cd2e..73f0dea95546c 100644 --- a/tools/testing/selftests/bpf/verifier/leak_ptr.c +++ b/tools/testing/selftests/bpf/verifier/leak_ptr.c @@ -5,7 +5,7 @@ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, cb[0])), BPF_LD_MAP_FD(BPF_REG_2, 0), - BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2, + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_1, BPF_REG_2, offsetof(struct __sk_buff, cb[0])), BPF_EXIT_INSN(), }, @@ -13,7 +13,7 @@ .errstr_unpriv = "R2 leaks addr into mem", .result_unpriv = REJECT, .result = REJECT, - .errstr = "BPF_XADD stores into R1 ctx is not allowed", + .errstr = "BPF_ATOMIC stores into R1 ctx is not allowed", }, { "leak pointer into ctx 2", @@ -21,14 +21,14 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, cb[0])), - BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10, + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_1, BPF_REG_10, offsetof(struct __sk_buff, cb[0])), BPF_EXIT_INSN(), }, .errstr_unpriv = "R10 leaks addr into mem", .result_unpriv = REJECT, .result = REJECT, - .errstr = "BPF_XADD stores into R1 ctx is not allowed", + .errstr = "BPF_ATOMIC stores into R1 ctx is not allowed", }, { "leak pointer into ctx 3", @@ -56,7 +56,7 @@ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), BPF_MOV64_IMM(BPF_REG_3, 0), BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), - BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_6, 0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, diff --git a/tools/testing/selftests/bpf/verifier/meta_access.c b/tools/testing/selftests/bpf/verifier/meta_access.c index 205292b8dd65f..b45e8af414204 100644 --- a/tools/testing/selftests/bpf/verifier/meta_access.c +++ b/tools/testing/selftests/bpf/verifier/meta_access.c @@ -171,7 +171,7 @@ BPF_MOV64_IMM(BPF_REG_5, 42), BPF_MOV64_IMM(BPF_REG_6, 24), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), - BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_6, -8), BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_5), @@ -196,7 +196,7 @@ BPF_MOV64_IMM(BPF_REG_5, 42), BPF_MOV64_IMM(BPF_REG_6, 24), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), - BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_6, -8), BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_5), diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index a3fe0fbaed41a..ee298627abaee 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -207,7 +207,8 @@ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_10, BPF_REG_0, -8, 0), + BPF_RAW_INSN(BPF_STX | BPF_ATOMIC | BPF_DW, + BPF_REG_10, BPF_REG_0, -8, BPF_ADD), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc), BPF_EXIT_INSN(), diff --git a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c index ed1c2cea1dea6..4890628672183 100644 --- a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c +++ b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c @@ -82,7 +82,7 @@ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), - BPF_STX_XADD(BPF_DW, BPF_REG_2, BPF_REG_3, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_2, BPF_REG_3, 0), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0), BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), BPF_EXIT_INSN(), diff --git a/tools/testing/selftests/bpf/verifier/xadd.c b/tools/testing/selftests/bpf/verifier/xadd.c index c5de2e62cc8bb..b96ef35268150 100644 --- a/tools/testing/selftests/bpf/verifier/xadd.c +++ b/tools/testing/selftests/bpf/verifier/xadd.c @@ -3,7 +3,7 @@ .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -7), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_10, BPF_REG_0, -7), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), BPF_EXIT_INSN(), }, @@ -22,7 +22,7 @@ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_STX_XADD(BPF_W, BPF_REG_0, BPF_REG_1, 3), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 3), BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3), BPF_EXIT_INSN(), }, @@ -45,13 +45,13 @@ BPF_MOV64_IMM(BPF_REG_0, 1), BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0), - BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 1), - BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 2), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_2, BPF_REG_0, 1), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_2, BPF_REG_0, 2), BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1), BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_XADD stores into R2 pkt is not allowed", + .errstr = "BPF_ATOMIC stores into R2 pkt is not allowed", .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, @@ -62,8 +62,8 @@ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_0, -8), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_10, BPF_REG_0, -8), BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3), BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), @@ -82,8 +82,8 @@ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_10, BPF_REG_0, -8), + BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_10, BPF_REG_0, -8), BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3), BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2), BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8), -- GitLab From c5bcb5eb4db632280b4123135d583a7bc8caea3e Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 14 Jan 2021 18:17:45 +0000 Subject: [PATCH 0859/2012] bpf: Move BPF_STX reserved field check into BPF_STX verifier code I can't find a reason why this code is in resolve_pseudo_ldimm64; since I'll be modifying it in a subsequent commit, tidy it up. Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210114181751.768687-6-jackmanb@google.com --- kernel/bpf/verifier.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cfc137b81ac66..d8a85f4e5b950 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9528,6 +9528,12 @@ static int do_check(struct bpf_verifier_env *env) } else if (class == BPF_STX) { enum bpf_reg_type *prev_dst_type, dst_reg_type; + if (((BPF_MODE(insn->code) != BPF_MEM && + BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm != 0)) { + verbose(env, "BPF_STX uses reserved fields\n"); + return -EINVAL; + } + if (BPF_MODE(insn->code) == BPF_ATOMIC) { err = check_atomic(env, env->insn_idx, insn); if (err) @@ -10012,13 +10018,6 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) return -EINVAL; } - if (BPF_CLASS(insn->code) == BPF_STX && - ((BPF_MODE(insn->code) != BPF_MEM && - BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm != 0)) { - verbose(env, "BPF_STX uses reserved fields\n"); - return -EINVAL; - } - if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { struct bpf_insn_aux_data *aux; struct bpf_map *map; -- GitLab From 5ca419f2864a2c60940dcf4bbaeb69546200e36f Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 14 Jan 2021 18:17:46 +0000 Subject: [PATCH 0860/2012] bpf: Add BPF_FETCH field / create atomic_fetch_add instruction The BPF_FETCH field can be set in bpf_insn.imm, for BPF_ATOMIC instructions, in order to have the previous value of the atomically-modified memory location loaded into the src register after an atomic op is carried out. Suggested-by: Yonghong Song Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210114181751.768687-7-jackmanb@google.com --- arch/x86/net/bpf_jit_comp.c | 4 ++++ include/linux/filter.h | 1 + include/uapi/linux/bpf.h | 3 +++ kernel/bpf/core.c | 13 +++++++++++++ kernel/bpf/disasm.c | 7 +++++++ kernel/bpf/verifier.c | 33 ++++++++++++++++++++++++--------- tools/include/linux/filter.h | 1 + tools/include/uapi/linux/bpf.h | 3 +++ 8 files changed, 56 insertions(+), 9 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b1829a534da14..eea7d8b0bb12a 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -811,6 +811,10 @@ static int emit_atomic(u8 **pprog, u8 atomic_op, /* lock *(u32/u64*)(dst_reg + off) = src_reg */ EMIT1(simple_alu_opcodes[atomic_op]); break; + case BPF_ADD | BPF_FETCH: + /* src_reg = atomic_fetch_add(dst_reg + off, src_reg); */ + EMIT2(0x0F, 0xC1); + break; default: pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op); return -EFAULT; diff --git a/include/linux/filter.h b/include/linux/filter.h index 392e94b796687..23fca41b85405 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -264,6 +264,7 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) * Atomic operations: * * BPF_ADD *(uint *) (dst_reg + off16) += src_reg + * BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg); */ #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6b3996343e637..ea262b0090495 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -44,6 +44,9 @@ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ +/* atomic op type fields (stored in immediate) */ +#define BPF_FETCH 0x01 /* fetch previous value into src reg */ + /* Register numbers */ enum { BPF_REG_0 = 0, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 4836ebf459cfe..28d6000463e47 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1624,16 +1624,29 @@ out: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */ atomic_add((u32) SRC, (atomic_t *)(unsigned long) (DST + insn->off)); + break; + case BPF_ADD | BPF_FETCH: + SRC = (u32) atomic_fetch_add( + (u32) SRC, + (atomic_t *)(unsigned long) (DST + insn->off)); + break; default: goto default_label; } CONT; + STX_ATOMIC_DW: switch (IMM) { case BPF_ADD: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */ atomic64_add((u64) SRC, (atomic64_t *)(unsigned long) (DST + insn->off)); + break; + case BPF_ADD | BPF_FETCH: + SRC = (u64) atomic64_fetch_add( + (u64) SRC, + (atomic64_t *)(unsigned long) (DST + insn->off)); + break; default: goto default_label; } diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index 37c8d6e9b4cce..d2e20f6d05160 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -160,6 +160,13 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->dst_reg, insn->off, insn->src_reg); + } else if (BPF_MODE(insn->code) == BPF_ATOMIC && + insn->imm == (BPF_ADD | BPF_FETCH)) { + verbose(cbs->private_data, "(%02x) r%d = atomic%s_fetch_add((%s *)(r%d %+d), r%d)\n", + insn->code, insn->src_reg, + BPF_SIZE(insn->code) == BPF_DW ? "64" : "", + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, insn->off, insn->src_reg); } else { verbose(cbs->private_data, "BUG_%02x\n", insn->code); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d8a85f4e5b950..6aa1fc9197611 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3608,7 +3608,11 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i { int err; - if (insn->imm != BPF_ADD) { + switch (insn->imm) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + break; + default: verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm); return -EINVAL; } @@ -3650,8 +3654,20 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i return err; /* check whether we can write into the same memory */ - return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_WRITE, -1, true); + err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, + BPF_SIZE(insn->code), BPF_WRITE, -1, true); + if (err) + return err; + + if (!(insn->imm & BPF_FETCH)) + return 0; + + /* check and record load of old value into src reg */ + err = check_reg_arg(env, insn->src_reg, DST_OP); + if (err) + return err; + + return 0; } static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno, @@ -9528,12 +9544,6 @@ static int do_check(struct bpf_verifier_env *env) } else if (class == BPF_STX) { enum bpf_reg_type *prev_dst_type, dst_reg_type; - if (((BPF_MODE(insn->code) != BPF_MEM && - BPF_MODE(insn->code) != BPF_ATOMIC) || insn->imm != 0)) { - verbose(env, "BPF_STX uses reserved fields\n"); - return -EINVAL; - } - if (BPF_MODE(insn->code) == BPF_ATOMIC) { err = check_atomic(env, env->insn_idx, insn); if (err) @@ -9542,6 +9552,11 @@ static int do_check(struct bpf_verifier_env *env) continue; } + if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) { + verbose(env, "BPF_STX uses reserved fields\n"); + return -EINVAL; + } + /* check src1 operand */ err = check_reg_arg(env, insn->src_reg, SRC_OP); if (err) diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index e870c9039f0d5..7211ce9fba533 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -173,6 +173,7 @@ * Atomic operations: * * BPF_ADD *(uint *) (dst_reg + off16) += src_reg + * BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg); */ #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6b3996343e637..ea262b0090495 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -44,6 +44,9 @@ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ +/* atomic op type fields (stored in immediate) */ +#define BPF_FETCH 0x01 /* fetch previous value into src reg */ + /* Register numbers */ enum { BPF_REG_0 = 0, -- GitLab From 5ffa25502b5ab3d639829a2d1e316cff7f59a41e Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 14 Jan 2021 18:17:47 +0000 Subject: [PATCH 0861/2012] bpf: Add instructions for atomic_[cmp]xchg This adds two atomic opcodes, both of which include the BPF_FETCH flag. XCHG without the BPF_FETCH flag would naturally encode atomic_set. This is not supported because it would be of limited value to userspace (it doesn't imply any barriers). CMPXCHG without BPF_FETCH woulud be an atomic compare-and-write. We don't have such an operation in the kernel so it isn't provided to BPF either. There are two significant design decisions made for the CMPXCHG instruction: - To solve the issue that this operation fundamentally has 3 operands, but we only have two register fields. Therefore the operand we compare against (the kernel's API calls it 'old') is hard-coded to be R0. x86 has similar design (and A64 doesn't have this problem). A potential alternative might be to encode the other operand's register number in the immediate field. - The kernel's atomic_cmpxchg returns the old value, while the C11 userspace APIs return a boolean indicating the comparison result. Which should BPF do? A64 returns the old value. x86 returns the old value in the hard-coded register (and also sets a flag). That means return-old-value is easier to JIT, so that's what we use. Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210114181751.768687-8-jackmanb@google.com --- arch/x86/net/bpf_jit_comp.c | 8 ++++++++ include/linux/filter.h | 2 ++ include/uapi/linux/bpf.h | 4 +++- kernel/bpf/core.c | 20 ++++++++++++++++++++ kernel/bpf/disasm.c | 15 +++++++++++++++ kernel/bpf/verifier.c | 19 +++++++++++++++++-- tools/include/linux/filter.h | 2 ++ tools/include/uapi/linux/bpf.h | 4 +++- 8 files changed, 70 insertions(+), 4 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index eea7d8b0bb12a..3082411875828 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -815,6 +815,14 @@ static int emit_atomic(u8 **pprog, u8 atomic_op, /* src_reg = atomic_fetch_add(dst_reg + off, src_reg); */ EMIT2(0x0F, 0xC1); break; + case BPF_XCHG: + /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ + EMIT1(0x87); + break; + case BPF_CMPXCHG: + /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ + EMIT2(0x0F, 0xB1); + break; default: pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op); return -EFAULT; diff --git a/include/linux/filter.h b/include/linux/filter.h index 23fca41b85405..d563820f197d3 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -265,6 +265,8 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) * * BPF_ADD *(uint *) (dst_reg + off16) += src_reg * BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg); + * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg) + * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) */ #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ea262b0090495..c001766adcbc5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -45,7 +45,9 @@ #define BPF_EXIT 0x90 /* function return */ /* atomic op type fields (stored in immediate) */ -#define BPF_FETCH 0x01 /* fetch previous value into src reg */ +#define BPF_FETCH 0x01 /* not an opcode on its own, used to build others */ +#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ +#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ /* Register numbers */ enum { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 28d6000463e47..4df6daba43ef1 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1630,6 +1630,16 @@ out: (u32) SRC, (atomic_t *)(unsigned long) (DST + insn->off)); break; + case BPF_XCHG: + SRC = (u32) atomic_xchg( + (atomic_t *)(unsigned long) (DST + insn->off), + (u32) SRC); + break; + case BPF_CMPXCHG: + BPF_R0 = (u32) atomic_cmpxchg( + (atomic_t *)(unsigned long) (DST + insn->off), + (u32) BPF_R0, (u32) SRC); + break; default: goto default_label; } @@ -1647,6 +1657,16 @@ out: (u64) SRC, (atomic64_t *)(unsigned long) (DST + insn->off)); break; + case BPF_XCHG: + SRC = (u64) atomic64_xchg( + (atomic64_t *)(unsigned long) (DST + insn->off), + (u64) SRC); + break; + case BPF_CMPXCHG: + BPF_R0 = (u64) atomic64_cmpxchg( + (atomic64_t *)(unsigned long) (DST + insn->off), + (u64) BPF_R0, (u64) SRC); + break; default: goto default_label; } diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index d2e20f6d05160..ee8d1132767b7 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -167,6 +167,21 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, BPF_SIZE(insn->code) == BPF_DW ? "64" : "", bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->dst_reg, insn->off, insn->src_reg); + } else if (BPF_MODE(insn->code) == BPF_ATOMIC && + insn->imm == BPF_CMPXCHG) { + verbose(cbs->private_data, "(%02x) r0 = atomic%s_cmpxchg((%s *)(r%d %+d), r0, r%d)\n", + insn->code, + BPF_SIZE(insn->code) == BPF_DW ? "64" : "", + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, insn->off, + insn->src_reg); + } else if (BPF_MODE(insn->code) == BPF_ATOMIC && + insn->imm == BPF_XCHG) { + verbose(cbs->private_data, "(%02x) r%d = atomic%s_xchg((%s *)(r%d %+d), r%d)\n", + insn->code, insn->src_reg, + BPF_SIZE(insn->code) == BPF_DW ? "64" : "", + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, insn->off, insn->src_reg); } else { verbose(cbs->private_data, "BUG_%02x\n", insn->code); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6aa1fc9197611..89a4d154ab37e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3606,11 +3606,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn) { + int load_reg; int err; switch (insn->imm) { case BPF_ADD: case BPF_ADD | BPF_FETCH: + case BPF_XCHG: + case BPF_CMPXCHG: break; default: verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm); @@ -3632,6 +3635,13 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i if (err) return err; + if (insn->imm == BPF_CMPXCHG) { + /* Check comparison of R0 with memory location */ + err = check_reg_arg(env, BPF_REG_0, SRC_OP); + if (err) + return err; + } + if (is_pointer_value(env, insn->src_reg)) { verbose(env, "R%d leaks addr into mem\n", insn->src_reg); return -EACCES; @@ -3662,8 +3672,13 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i if (!(insn->imm & BPF_FETCH)) return 0; - /* check and record load of old value into src reg */ - err = check_reg_arg(env, insn->src_reg, DST_OP); + if (insn->imm == BPF_CMPXCHG) + load_reg = BPF_REG_0; + else + load_reg = insn->src_reg; + + /* check and record load of old value */ + err = check_reg_arg(env, load_reg, DST_OP); if (err) return err; diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index 7211ce9fba533..d75998b0d5ac6 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -174,6 +174,8 @@ * * BPF_ADD *(uint *) (dst_reg + off16) += src_reg * BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg); + * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg) + * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) */ #define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ea262b0090495..c001766adcbc5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -45,7 +45,9 @@ #define BPF_EXIT 0x90 /* function return */ /* atomic op type fields (stored in immediate) */ -#define BPF_FETCH 0x01 /* fetch previous value into src reg */ +#define BPF_FETCH 0x01 /* not an opcode on its own, used to build others */ +#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */ +#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */ /* Register numbers */ enum { -- GitLab From 462910670e4ac91509829c5549bd0227668176fb Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 14 Jan 2021 18:17:48 +0000 Subject: [PATCH 0862/2012] bpf: Pull out a macro for interpreting atomic ALU operations Since the atomic operations that are added in subsequent commits are all isomorphic with BPF_ADD, pull out a macro to avoid the interpreter becoming dominated by lines of atomic-related code. Note that this sacrificies interpreter performance (combining STX_ATOMIC_W and STX_ATOMIC_DW into single switch case means that we need an extra conditional branch to differentiate them) in favour of compact and (relatively!) simple C code. Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210114181751.768687-9-jackmanb@google.com --- kernel/bpf/core.c | 80 +++++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 41 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 4df6daba43ef1..8669e685825f9 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1618,55 +1618,53 @@ out: LDX_PROBE(DW, 8) #undef LDX_PROBE - STX_ATOMIC_W: - switch (IMM) { - case BPF_ADD: - /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */ - atomic_add((u32) SRC, (atomic_t *)(unsigned long) - (DST + insn->off)); - break; - case BPF_ADD | BPF_FETCH: - SRC = (u32) atomic_fetch_add( - (u32) SRC, - (atomic_t *)(unsigned long) (DST + insn->off)); - break; - case BPF_XCHG: - SRC = (u32) atomic_xchg( - (atomic_t *)(unsigned long) (DST + insn->off), - (u32) SRC); - break; - case BPF_CMPXCHG: - BPF_R0 = (u32) atomic_cmpxchg( - (atomic_t *)(unsigned long) (DST + insn->off), - (u32) BPF_R0, (u32) SRC); +#define ATOMIC_ALU_OP(BOP, KOP) \ + case BOP: \ + if (BPF_SIZE(insn->code) == BPF_W) \ + atomic_##KOP((u32) SRC, (atomic_t *)(unsigned long) \ + (DST + insn->off)); \ + else \ + atomic64_##KOP((u64) SRC, (atomic64_t *)(unsigned long) \ + (DST + insn->off)); \ + break; \ + case BOP | BPF_FETCH: \ + if (BPF_SIZE(insn->code) == BPF_W) \ + SRC = (u32) atomic_fetch_##KOP( \ + (u32) SRC, \ + (atomic_t *)(unsigned long) (DST + insn->off)); \ + else \ + SRC = (u64) atomic64_fetch_##KOP( \ + (u64) SRC, \ + (atomic64_t *)(unsigned long) (DST + insn->off)); \ break; - default: - goto default_label; - } - CONT; STX_ATOMIC_DW: + STX_ATOMIC_W: switch (IMM) { - case BPF_ADD: - /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */ - atomic64_add((u64) SRC, (atomic64_t *)(unsigned long) - (DST + insn->off)); - break; - case BPF_ADD | BPF_FETCH: - SRC = (u64) atomic64_fetch_add( - (u64) SRC, - (atomic64_t *)(unsigned long) (DST + insn->off)); - break; + ATOMIC_ALU_OP(BPF_ADD, add) +#undef ATOMIC_ALU_OP + case BPF_XCHG: - SRC = (u64) atomic64_xchg( - (atomic64_t *)(unsigned long) (DST + insn->off), - (u64) SRC); + if (BPF_SIZE(insn->code) == BPF_W) + SRC = (u32) atomic_xchg( + (atomic_t *)(unsigned long) (DST + insn->off), + (u32) SRC); + else + SRC = (u64) atomic64_xchg( + (atomic64_t *)(unsigned long) (DST + insn->off), + (u64) SRC); break; case BPF_CMPXCHG: - BPF_R0 = (u64) atomic64_cmpxchg( - (atomic64_t *)(unsigned long) (DST + insn->off), - (u64) BPF_R0, (u64) SRC); + if (BPF_SIZE(insn->code) == BPF_W) + BPF_R0 = (u32) atomic_cmpxchg( + (atomic_t *)(unsigned long) (DST + insn->off), + (u32) BPF_R0, (u32) SRC); + else + BPF_R0 = (u64) atomic64_cmpxchg( + (atomic64_t *)(unsigned long) (DST + insn->off), + (u64) BPF_R0, (u64) SRC); break; + default: goto default_label; } -- GitLab From 981f94c3e92146705baf97fb417a5ed1ab1a79a5 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 14 Jan 2021 18:17:49 +0000 Subject: [PATCH 0863/2012] bpf: Add bitwise atomic instructions This adds instructions for atomic[64]_[fetch_]and atomic[64]_[fetch_]or atomic[64]_[fetch_]xor All these operations are isomorphic enough to implement with the same verifier, interpreter, and x86 JIT code, hence being a single commit. The main interesting thing here is that x86 doesn't directly support the fetch_ version these operations, so we need to generate a CMPXCHG loop in the JIT. This requires the use of two temporary registers, IIUC it's safe to use BPF_REG_AX and x86's AUX_REG for this purpose. Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210114181751.768687-10-jackmanb@google.com --- arch/x86/net/bpf_jit_comp.c | 50 +++++++++++++++++++++++++++++++++++- include/linux/filter.h | 6 +++++ kernel/bpf/core.c | 3 +++ kernel/bpf/disasm.c | 21 ++++++++++++--- kernel/bpf/verifier.c | 6 +++++ tools/include/linux/filter.h | 6 +++++ 6 files changed, 87 insertions(+), 5 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 3082411875828..1d4d50199293a 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -808,6 +808,10 @@ static int emit_atomic(u8 **pprog, u8 atomic_op, /* emit opcode */ switch (atomic_op) { case BPF_ADD: + case BPF_SUB: + case BPF_AND: + case BPF_OR: + case BPF_XOR: /* lock *(u32/u64*)(dst_reg + off) = src_reg */ EMIT1(simple_alu_opcodes[atomic_op]); break; @@ -1292,8 +1296,52 @@ st: if (is_imm8(insn->off)) case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: + if (insn->imm == (BPF_AND | BPF_FETCH) || + insn->imm == (BPF_OR | BPF_FETCH) || + insn->imm == (BPF_XOR | BPF_FETCH)) { + u8 *branch_target; + bool is64 = BPF_SIZE(insn->code) == BPF_DW; + + /* + * Can't be implemented with a single x86 insn. + * Need to do a CMPXCHG loop. + */ + + /* Will need RAX as a CMPXCHG operand so save R0 */ + emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0); + branch_target = prog; + /* Load old value */ + emit_ldx(&prog, BPF_SIZE(insn->code), + BPF_REG_0, dst_reg, insn->off); + /* + * Perform the (commutative) operation locally, + * put the result in the AUX_REG. + */ + emit_mov_reg(&prog, is64, AUX_REG, BPF_REG_0); + maybe_emit_mod(&prog, AUX_REG, src_reg, is64); + EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)], + add_2reg(0xC0, AUX_REG, src_reg)); + /* Attempt to swap in new value */ + err = emit_atomic(&prog, BPF_CMPXCHG, + dst_reg, AUX_REG, insn->off, + BPF_SIZE(insn->code)); + if (WARN_ON(err)) + return err; + /* + * ZF tells us whether we won the race. If it's + * cleared we need to try again. + */ + EMIT2(X86_JNE, -(prog - branch_target) - 2); + /* Return the pre-modification value */ + emit_mov_reg(&prog, is64, src_reg, BPF_REG_0); + /* Restore R0 after clobbering RAX */ + emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX); + break; + + } + err = emit_atomic(&prog, insn->imm, dst_reg, src_reg, - insn->off, BPF_SIZE(insn->code)); + insn->off, BPF_SIZE(insn->code)); if (err) return err; break; diff --git a/include/linux/filter.h b/include/linux/filter.h index d563820f197d3..7fdce5407214b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -264,7 +264,13 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) * Atomic operations: * * BPF_ADD *(uint *) (dst_reg + off16) += src_reg + * BPF_AND *(uint *) (dst_reg + off16) &= src_reg + * BPF_OR *(uint *) (dst_reg + off16) |= src_reg + * BPF_XOR *(uint *) (dst_reg + off16) ^= src_reg * BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg); + * BPF_AND | BPF_FETCH src_reg = atomic_fetch_and(dst_reg + off16, src_reg); + * BPF_OR | BPF_FETCH src_reg = atomic_fetch_or(dst_reg + off16, src_reg); + * BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg); * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg) * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8669e685825f9..5bbd4884ff7ad 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1642,6 +1642,9 @@ out: STX_ATOMIC_W: switch (IMM) { ATOMIC_ALU_OP(BPF_ADD, add) + ATOMIC_ALU_OP(BPF_AND, and) + ATOMIC_ALU_OP(BPF_OR, or) + ATOMIC_ALU_OP(BPF_XOR, xor) #undef ATOMIC_ALU_OP case BPF_XCHG: diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index ee8d1132767b7..19ff8fed7f4b0 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -80,6 +80,13 @@ const char *const bpf_alu_string[16] = { [BPF_END >> 4] = "endian", }; +static const char *const bpf_atomic_alu_string[16] = { + [BPF_ADD >> 4] = "add", + [BPF_AND >> 4] = "and", + [BPF_OR >> 4] = "or", + [BPF_XOR >> 4] = "or", +}; + static const char *const bpf_ldst_string[] = { [BPF_W >> 3] = "u32", [BPF_H >> 3] = "u16", @@ -154,17 +161,23 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, insn->dst_reg, insn->off, insn->src_reg); else if (BPF_MODE(insn->code) == BPF_ATOMIC && - insn->imm == BPF_ADD) { - verbose(cbs->private_data, "(%02x) lock *(%s *)(r%d %+d) += r%d\n", + (insn->imm == BPF_ADD || insn->imm == BPF_ADD || + insn->imm == BPF_OR || insn->imm == BPF_XOR)) { + verbose(cbs->private_data, "(%02x) lock *(%s *)(r%d %+d) %s r%d\n", insn->code, bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->dst_reg, insn->off, + bpf_alu_string[BPF_OP(insn->imm) >> 4], insn->src_reg); } else if (BPF_MODE(insn->code) == BPF_ATOMIC && - insn->imm == (BPF_ADD | BPF_FETCH)) { - verbose(cbs->private_data, "(%02x) r%d = atomic%s_fetch_add((%s *)(r%d %+d), r%d)\n", + (insn->imm == (BPF_ADD | BPF_FETCH) || + insn->imm == (BPF_AND | BPF_FETCH) || + insn->imm == (BPF_OR | BPF_FETCH) || + insn->imm == (BPF_XOR | BPF_FETCH))) { + verbose(cbs->private_data, "(%02x) r%d = atomic%s_fetch_%s((%s *)(r%d %+d), r%d)\n", insn->code, insn->src_reg, BPF_SIZE(insn->code) == BPF_DW ? "64" : "", + bpf_atomic_alu_string[BPF_OP(insn->imm) >> 4], bpf_ldst_string[BPF_SIZE(insn->code) >> 3], insn->dst_reg, insn->off, insn->src_reg); } else if (BPF_MODE(insn->code) == BPF_ATOMIC && diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 89a4d154ab37e..0f82d5d46e2cd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3612,6 +3612,12 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i switch (insn->imm) { case BPF_ADD: case BPF_ADD | BPF_FETCH: + case BPF_AND: + case BPF_AND | BPF_FETCH: + case BPF_OR: + case BPF_OR | BPF_FETCH: + case BPF_XOR: + case BPF_XOR | BPF_FETCH: case BPF_XCHG: case BPF_CMPXCHG: break; diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index d75998b0d5ac6..736bdeccdfe44 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -173,7 +173,13 @@ * Atomic operations: * * BPF_ADD *(uint *) (dst_reg + off16) += src_reg + * BPF_AND *(uint *) (dst_reg + off16) &= src_reg + * BPF_OR *(uint *) (dst_reg + off16) |= src_reg + * BPF_XOR *(uint *) (dst_reg + off16) ^= src_reg * BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg); + * BPF_AND | BPF_FETCH src_reg = atomic_fetch_and(dst_reg + off16, src_reg); + * BPF_OR | BPF_FETCH src_reg = atomic_fetch_or(dst_reg + off16, src_reg); + * BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg); * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg) * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) */ -- GitLab From 98d666d05a1d9706bb3fe972157fa6155dbb180f Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 14 Jan 2021 18:17:50 +0000 Subject: [PATCH 0864/2012] bpf: Add tests for new BPF atomic operations The prog_test that's added depends on Clang/LLVM features added by Yonghong in commit 286daafd6512 (was https://reviews.llvm.org/D72184). Note the use of a define called ENABLE_ATOMICS_TESTS: this is used to: - Avoid breaking the build for people on old versions of Clang - Avoid needing separate lists of test objects for no_alu32, where atomics are not supported even if Clang has the feature. The atomics_test.o BPF object is built unconditionally both for test_progs and test_progs-no_alu32. For test_progs, if Clang supports atomics, ENABLE_ATOMICS_TESTS is defined, so it includes the proper test code. Otherwise, progs and global vars are defined anyway, as stubs; this means that the skeleton user code still builds. The atomics_test.o userspace object is built once and used for both test_progs and test_progs-no_alu32. A variable called skip_tests is defined in the BPF object's data section, which tells the userspace object whether to skip the atomics test. Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210114181751.768687-11-jackmanb@google.com --- tools/testing/selftests/bpf/Makefile | 2 + .../selftests/bpf/prog_tests/atomics.c | 246 ++++++++++++++++++ tools/testing/selftests/bpf/progs/atomics.c | 154 +++++++++++ .../selftests/bpf/verifier/atomic_and.c | 77 ++++++ .../selftests/bpf/verifier/atomic_cmpxchg.c | 96 +++++++ .../selftests/bpf/verifier/atomic_fetch_add.c | 106 ++++++++ .../selftests/bpf/verifier/atomic_or.c | 77 ++++++ .../selftests/bpf/verifier/atomic_xchg.c | 46 ++++ .../selftests/bpf/verifier/atomic_xor.c | 77 ++++++ 9 files changed, 881 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/atomics.c create mode 100644 tools/testing/selftests/bpf/progs/atomics.c create mode 100644 tools/testing/selftests/bpf/verifier/atomic_and.c create mode 100644 tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c create mode 100644 tools/testing/selftests/bpf/verifier/atomic_fetch_add.c create mode 100644 tools/testing/selftests/bpf/verifier/atomic_or.c create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xchg.c create mode 100644 tools/testing/selftests/bpf/verifier/atomic_xor.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 7f8667ad113e5..0552b07717b6a 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -414,10 +414,12 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) +TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS $(eval $(call DEFINE_TEST_RUNNER,test_progs)) # Define test_progs-no_alu32 test runner. TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32)) # Define test_progs BPF-GCC-flavored test runner. diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c new file mode 100644 index 0000000000000..21efe7bbf10d2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/atomics.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include "atomics.skel.h" + +static void test_add(struct atomics *skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.add); + if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link))) + return; + + prog_fd = bpf_program__fd(skel->progs.add); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "test_run add", + "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + goto cleanup; + + ASSERT_EQ(skel->data->add64_value, 3, "add64_value"); + ASSERT_EQ(skel->bss->add64_result, 1, "add64_result"); + + ASSERT_EQ(skel->data->add32_value, 3, "add32_value"); + ASSERT_EQ(skel->bss->add32_result, 1, "add32_result"); + + ASSERT_EQ(skel->bss->add_stack_value_copy, 3, "add_stack_value"); + ASSERT_EQ(skel->bss->add_stack_result, 1, "add_stack_result"); + + ASSERT_EQ(skel->data->add_noreturn_value, 3, "add_noreturn_value"); + +cleanup: + bpf_link__destroy(link); +} + +static void test_sub(struct atomics *skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.sub); + if (CHECK(IS_ERR(link), "attach(sub)", "err: %ld\n", PTR_ERR(link))) + return; + + prog_fd = bpf_program__fd(skel->progs.sub); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "test_run sub", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration)) + goto cleanup; + + ASSERT_EQ(skel->data->sub64_value, -1, "sub64_value"); + ASSERT_EQ(skel->bss->sub64_result, 1, "sub64_result"); + + ASSERT_EQ(skel->data->sub32_value, -1, "sub32_value"); + ASSERT_EQ(skel->bss->sub32_result, 1, "sub32_result"); + + ASSERT_EQ(skel->bss->sub_stack_value_copy, -1, "sub_stack_value"); + ASSERT_EQ(skel->bss->sub_stack_result, 1, "sub_stack_result"); + + ASSERT_EQ(skel->data->sub_noreturn_value, -1, "sub_noreturn_value"); + +cleanup: + bpf_link__destroy(link); +} + +static void test_and(struct atomics *skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.and); + if (CHECK(IS_ERR(link), "attach(and)", "err: %ld\n", PTR_ERR(link))) + return; + + prog_fd = bpf_program__fd(skel->progs.and); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "test_run and", + "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + goto cleanup; + + ASSERT_EQ(skel->data->and64_value, 0x010ull << 32, "and64_value"); + ASSERT_EQ(skel->bss->and64_result, 0x110ull << 32, "and64_result"); + + ASSERT_EQ(skel->data->and32_value, 0x010, "and32_value"); + ASSERT_EQ(skel->bss->and32_result, 0x110, "and32_result"); + + ASSERT_EQ(skel->data->and_noreturn_value, 0x010ull << 32, "and_noreturn_value"); +cleanup: + bpf_link__destroy(link); +} + +static void test_or(struct atomics *skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.or); + if (CHECK(IS_ERR(link), "attach(or)", "err: %ld\n", PTR_ERR(link))) + return; + + prog_fd = bpf_program__fd(skel->progs.or); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "test_run or", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration)) + goto cleanup; + + ASSERT_EQ(skel->data->or64_value, 0x111ull << 32, "or64_value"); + ASSERT_EQ(skel->bss->or64_result, 0x110ull << 32, "or64_result"); + + ASSERT_EQ(skel->data->or32_value, 0x111, "or32_value"); + ASSERT_EQ(skel->bss->or32_result, 0x110, "or32_result"); + + ASSERT_EQ(skel->data->or_noreturn_value, 0x111ull << 32, "or_noreturn_value"); +cleanup: + bpf_link__destroy(link); +} + +static void test_xor(struct atomics *skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.xor); + if (CHECK(IS_ERR(link), "attach(xor)", "err: %ld\n", PTR_ERR(link))) + return; + + prog_fd = bpf_program__fd(skel->progs.xor); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "test_run xor", + "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + goto cleanup; + + ASSERT_EQ(skel->data->xor64_value, 0x101ull << 32, "xor64_value"); + ASSERT_EQ(skel->bss->xor64_result, 0x110ull << 32, "xor64_result"); + + ASSERT_EQ(skel->data->xor32_value, 0x101, "xor32_value"); + ASSERT_EQ(skel->bss->xor32_result, 0x110, "xor32_result"); + + ASSERT_EQ(skel->data->xor_noreturn_value, 0x101ull << 32, "xor_nxoreturn_value"); +cleanup: + bpf_link__destroy(link); +} + +static void test_cmpxchg(struct atomics *skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.cmpxchg); + if (CHECK(IS_ERR(link), "attach(cmpxchg)", "err: %ld\n", PTR_ERR(link))) + return; + + prog_fd = bpf_program__fd(skel->progs.cmpxchg); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "test_run add", + "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + goto cleanup; + + ASSERT_EQ(skel->data->cmpxchg64_value, 2, "cmpxchg64_value"); + ASSERT_EQ(skel->bss->cmpxchg64_result_fail, 1, "cmpxchg_result_fail"); + ASSERT_EQ(skel->bss->cmpxchg64_result_succeed, 1, "cmpxchg_result_succeed"); + + ASSERT_EQ(skel->data->cmpxchg32_value, 2, "lcmpxchg32_value"); + ASSERT_EQ(skel->bss->cmpxchg32_result_fail, 1, "cmpxchg_result_fail"); + ASSERT_EQ(skel->bss->cmpxchg32_result_succeed, 1, "cmpxchg_result_succeed"); + +cleanup: + bpf_link__destroy(link); +} + +static void test_xchg(struct atomics *skel) +{ + int err, prog_fd; + __u32 duration = 0, retval; + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.xchg); + if (CHECK(IS_ERR(link), "attach(xchg)", "err: %ld\n", PTR_ERR(link))) + return; + + prog_fd = bpf_program__fd(skel->progs.xchg); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "test_run add", + "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) + goto cleanup; + + ASSERT_EQ(skel->data->xchg64_value, 2, "xchg64_value"); + ASSERT_EQ(skel->bss->xchg64_result, 1, "xchg64_result"); + + ASSERT_EQ(skel->data->xchg32_value, 2, "xchg32_value"); + ASSERT_EQ(skel->bss->xchg32_result, 1, "xchg32_result"); + +cleanup: + bpf_link__destroy(link); +} + +void test_atomics(void) +{ + struct atomics *skel; + __u32 duration = 0; + + skel = atomics__open_and_load(); + if (CHECK(!skel, "skel_load", "atomics skeleton failed\n")) + return; + + if (skel->data->skip_tests) { + printf("%s:SKIP:no ENABLE_ATOMICS_TESTS (missing Clang BPF atomics support)", + __func__); + test__skip(); + goto cleanup; + } + + if (test__start_subtest("add")) + test_add(skel); + if (test__start_subtest("sub")) + test_sub(skel); + if (test__start_subtest("and")) + test_and(skel); + if (test__start_subtest("or")) + test_or(skel); + if (test__start_subtest("xor")) + test_xor(skel); + if (test__start_subtest("cmpxchg")) + test_cmpxchg(skel); + if (test__start_subtest("xchg")) + test_xchg(skel); + +cleanup: + atomics__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/atomics.c b/tools/testing/selftests/bpf/progs/atomics.c new file mode 100644 index 0000000000000..c245345e41ca0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/atomics.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +#ifdef ENABLE_ATOMICS_TESTS +bool skip_tests __attribute((__section__(".data"))) = false; +#else +bool skip_tests = true; +#endif + +__u64 add64_value = 1; +__u64 add64_result = 0; +__u32 add32_value = 1; +__u32 add32_result = 0; +__u64 add_stack_value_copy = 0; +__u64 add_stack_result = 0; +__u64 add_noreturn_value = 1; + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(add, int a) +{ +#ifdef ENABLE_ATOMICS_TESTS + __u64 add_stack_value = 1; + + add64_result = __sync_fetch_and_add(&add64_value, 2); + add32_result = __sync_fetch_and_add(&add32_value, 2); + add_stack_result = __sync_fetch_and_add(&add_stack_value, 2); + add_stack_value_copy = add_stack_value; + __sync_fetch_and_add(&add_noreturn_value, 2); +#endif + + return 0; +} + +__s64 sub64_value = 1; +__s64 sub64_result = 0; +__s32 sub32_value = 1; +__s32 sub32_result = 0; +__s64 sub_stack_value_copy = 0; +__s64 sub_stack_result = 0; +__s64 sub_noreturn_value = 1; + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(sub, int a) +{ +#ifdef ENABLE_ATOMICS_TESTS + __u64 sub_stack_value = 1; + + sub64_result = __sync_fetch_and_sub(&sub64_value, 2); + sub32_result = __sync_fetch_and_sub(&sub32_value, 2); + sub_stack_result = __sync_fetch_and_sub(&sub_stack_value, 2); + sub_stack_value_copy = sub_stack_value; + __sync_fetch_and_sub(&sub_noreturn_value, 2); +#endif + + return 0; +} + +__u64 and64_value = (0x110ull << 32); +__u64 and64_result = 0; +__u32 and32_value = 0x110; +__u32 and32_result = 0; +__u64 and_noreturn_value = (0x110ull << 32); + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(and, int a) +{ +#ifdef ENABLE_ATOMICS_TESTS + + and64_result = __sync_fetch_and_and(&and64_value, 0x011ull << 32); + and32_result = __sync_fetch_and_and(&and32_value, 0x011); + __sync_fetch_and_and(&and_noreturn_value, 0x011ull << 32); +#endif + + return 0; +} + +__u64 or64_value = (0x110ull << 32); +__u64 or64_result = 0; +__u32 or32_value = 0x110; +__u32 or32_result = 0; +__u64 or_noreturn_value = (0x110ull << 32); + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(or, int a) +{ +#ifdef ENABLE_ATOMICS_TESTS + or64_result = __sync_fetch_and_or(&or64_value, 0x011ull << 32); + or32_result = __sync_fetch_and_or(&or32_value, 0x011); + __sync_fetch_and_or(&or_noreturn_value, 0x011ull << 32); +#endif + + return 0; +} + +__u64 xor64_value = (0x110ull << 32); +__u64 xor64_result = 0; +__u32 xor32_value = 0x110; +__u32 xor32_result = 0; +__u64 xor_noreturn_value = (0x110ull << 32); + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(xor, int a) +{ +#ifdef ENABLE_ATOMICS_TESTS + xor64_result = __sync_fetch_and_xor(&xor64_value, 0x011ull << 32); + xor32_result = __sync_fetch_and_xor(&xor32_value, 0x011); + __sync_fetch_and_xor(&xor_noreturn_value, 0x011ull << 32); +#endif + + return 0; +} + +__u64 cmpxchg64_value = 1; +__u64 cmpxchg64_result_fail = 0; +__u64 cmpxchg64_result_succeed = 0; +__u32 cmpxchg32_value = 1; +__u32 cmpxchg32_result_fail = 0; +__u32 cmpxchg32_result_succeed = 0; + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(cmpxchg, int a) +{ +#ifdef ENABLE_ATOMICS_TESTS + cmpxchg64_result_fail = __sync_val_compare_and_swap(&cmpxchg64_value, 0, 3); + cmpxchg64_result_succeed = __sync_val_compare_and_swap(&cmpxchg64_value, 1, 2); + + cmpxchg32_result_fail = __sync_val_compare_and_swap(&cmpxchg32_value, 0, 3); + cmpxchg32_result_succeed = __sync_val_compare_and_swap(&cmpxchg32_value, 1, 2); +#endif + + return 0; +} + +__u64 xchg64_value = 1; +__u64 xchg64_result = 0; +__u32 xchg32_value = 1; +__u32 xchg32_result = 0; + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(xchg, int a) +{ +#ifdef ENABLE_ATOMICS_TESTS + __u64 val64 = 2; + __u32 val32 = 2; + + xchg64_result = __sync_lock_test_and_set(&xchg64_value, val64); + xchg32_result = __sync_lock_test_and_set(&xchg32_value, val32); +#endif + + return 0; +} diff --git a/tools/testing/selftests/bpf/verifier/atomic_and.c b/tools/testing/selftests/bpf/verifier/atomic_and.c new file mode 100644 index 0000000000000..600bc5e0f1430 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_and.c @@ -0,0 +1,77 @@ +{ + "BPF_ATOMIC_AND without fetch", + .insns = { + /* val = 0x110; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110), + /* atomic_and(&val, 0x011); */ + BPF_MOV64_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_DW, BPF_AND, BPF_REG_10, BPF_REG_1, -8), + /* if (val != 0x010) exit(2); */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x010, 2), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + /* r1 should not be clobbered, no BPF_FETCH flag */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x011, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "BPF_ATOMIC_AND with fetch", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 123), + /* val = 0x110; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110), + /* old = atomic_fetch_and(&val, 0x011); */ + BPF_MOV64_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8), + /* if (old != 0x110) exit(3); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* if (val != 0x010) exit(2); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x010, 2), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_EXIT_INSN(), + /* Check R0 wasn't clobbered (for fear of x86 JIT bug) */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 123, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "BPF_ATOMIC_AND with fetch 32bit", + .insns = { + /* r0 = (s64) -1 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1), + /* val = 0x110; */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x110), + /* old = atomic_fetch_and(&val, 0x011); */ + BPF_MOV32_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_W, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_1, -4), + /* if (old != 0x110) exit(3); */ + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2), + BPF_MOV32_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* if (val != 0x010) exit(2); */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -4), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x010, 2), + BPF_MOV32_IMM(BPF_REG_1, 2), + BPF_EXIT_INSN(), + /* Check R0 wasn't clobbered (for fear of x86 JIT bug) + * It should be -1 so add 1 to get exit code. + */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c new file mode 100644 index 0000000000000..2efd8bcf57a1e --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -0,0 +1,96 @@ +{ + "atomic compare-and-exchange smoketest - 64bit", + .insns = { + /* val = 3; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3), + /* old = atomic_cmpxchg(&val, 2, 4); */ + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8), + /* if (old != 3) exit(2); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 3, 2), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + /* if (val != 3) exit(3); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 3, 2), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* old = atomic_cmpxchg(&val, 3, 4); */ + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8), + /* if (old != 3) exit(4); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 3, 2), + BPF_MOV64_IMM(BPF_REG_0, 4), + BPF_EXIT_INSN(), + /* if (val != 4) exit(5); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 4, 2), + BPF_MOV64_IMM(BPF_REG_0, 5), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "atomic compare-and-exchange smoketest - 32bit", + .insns = { + /* val = 3; */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 3), + /* old = atomic_cmpxchg(&val, 2, 4); */ + BPF_MOV32_IMM(BPF_REG_1, 4), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -4), + /* if (old != 3) exit(2); */ + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 3, 2), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + /* if (val != 3) exit(3); */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 3, 2), + BPF_MOV32_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* old = atomic_cmpxchg(&val, 3, 4); */ + BPF_MOV32_IMM(BPF_REG_1, 4), + BPF_MOV32_IMM(BPF_REG_0, 3), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -4), + /* if (old != 3) exit(4); */ + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 3, 2), + BPF_MOV32_IMM(BPF_REG_0, 4), + BPF_EXIT_INSN(), + /* if (val != 4) exit(5); */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 4, 2), + BPF_MOV32_IMM(BPF_REG_0, 5), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "Can't use cmpxchg on uninit src reg", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "!read_ok", +}, +{ + "Can't use cmpxchg on uninit memory", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid read from stack", +}, diff --git a/tools/testing/selftests/bpf/verifier/atomic_fetch_add.c b/tools/testing/selftests/bpf/verifier/atomic_fetch_add.c new file mode 100644 index 0000000000000..a91de8cd9defb --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_fetch_add.c @@ -0,0 +1,106 @@ +{ + "BPF_ATOMIC_FETCH_ADD smoketest - 64bit", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + /* Write 3 to stack */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3), + /* Put a 1 in R1, add it to the 3 on the stack, and load the value back into R1 */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8), + /* Check the value we loaded back was 3 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* Load value from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8), + /* Check value loaded from stack was 4 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "BPF_ATOMIC_FETCH_ADD smoketest - 32bit", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + /* Write 3 to stack */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 3), + /* Put a 1 in R1, add it to the 3 on the stack, and load the value back into R1 */ + BPF_MOV32_IMM(BPF_REG_1, 1), + BPF_ATOMIC_OP(BPF_W, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_1, -4), + /* Check the value we loaded back was 3 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* Load value from stack */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -4), + /* Check value loaded from stack was 4 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "Can't use ATM_FETCH_ADD on frame pointer", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_10, -8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr_unpriv = "R10 leaks addr into mem", + .errstr = "frame pointer is read only", +}, +{ + "Can't use ATM_FETCH_ADD on uninit src reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_2, -8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + /* It happens that the address leak check is first, but it would also be + * complain about the fact that we're trying to modify R10. + */ + .errstr = "!read_ok", +}, +{ + "Can't use ATM_FETCH_ADD on uninit dst reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_2, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .result = REJECT, + /* It happens that the address leak check is first, but it would also be + * complain about the fact that we're trying to modify R10. + */ + .errstr = "!read_ok", +}, +{ + "Can't use ATM_FETCH_ADD on kernel memory", + .insns = { + /* This is an fentry prog, context is array of the args of the + * kernel function being called. Load first arg into R2. + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 0), + /* First arg of bpf_fentry_test7 is a pointer to a struct. + * Attempt to modify that struct. Verifier shouldn't let us + * because it's kernel memory. + */ + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_2, BPF_REG_3, 0), + /* Done */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .kfunc = "bpf_fentry_test7", + .result = REJECT, + .errstr = "only read is supported", +}, diff --git a/tools/testing/selftests/bpf/verifier/atomic_or.c b/tools/testing/selftests/bpf/verifier/atomic_or.c new file mode 100644 index 0000000000000..ebe6e51455ba4 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_or.c @@ -0,0 +1,77 @@ +{ + "BPF_ATOMIC OR without fetch", + .insns = { + /* val = 0x110; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110), + /* atomic_or(&val, 0x011); */ + BPF_MOV64_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_DW, BPF_OR, BPF_REG_10, BPF_REG_1, -8), + /* if (val != 0x111) exit(2); */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x111, 2), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + /* r1 should not be clobbered, no BPF_FETCH flag */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x011, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "BPF_ATOMIC OR with fetch", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 123), + /* val = 0x110; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110), + /* old = atomic_fetch_or(&val, 0x011); */ + BPF_MOV64_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_DW, BPF_OR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8), + /* if (old != 0x110) exit(3); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* if (val != 0x111) exit(2); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x111, 2), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_EXIT_INSN(), + /* Check R0 wasn't clobbered (for fear of x86 JIT bug) */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 123, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "BPF_ATOMIC OR with fetch 32bit", + .insns = { + /* r0 = (s64) -1 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1), + /* val = 0x110; */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x110), + /* old = atomic_fetch_or(&val, 0x011); */ + BPF_MOV32_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_W, BPF_OR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -4), + /* if (old != 0x110) exit(3); */ + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2), + BPF_MOV32_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* if (val != 0x111) exit(2); */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -4), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x111, 2), + BPF_MOV32_IMM(BPF_REG_1, 2), + BPF_EXIT_INSN(), + /* Check R0 wasn't clobbered (for fear of x86 JIT bug) + * It should be -1 so add 1 to get exit code. + */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/atomic_xchg.c b/tools/testing/selftests/bpf/verifier/atomic_xchg.c new file mode 100644 index 0000000000000..33e2d6c973ee9 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_xchg.c @@ -0,0 +1,46 @@ +{ + "atomic exchange smoketest - 64bit", + .insns = { + /* val = 3; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3), + /* old = atomic_xchg(&val, 4); */ + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_10, BPF_REG_1, -8), + /* if (old != 3) exit(1); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* if (val != 4) exit(2); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 4, 2), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "atomic exchange smoketest - 32bit", + .insns = { + /* val = 3; */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 3), + /* old = atomic_xchg(&val, 4); */ + BPF_MOV32_IMM(BPF_REG_1, 4), + BPF_ATOMIC_OP(BPF_W, BPF_XCHG, BPF_REG_10, BPF_REG_1, -4), + /* if (old != 3) exit(1); */ + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 3, 2), + BPF_MOV32_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* if (val != 4) exit(2); */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 4, 2), + BPF_MOV32_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/atomic_xor.c b/tools/testing/selftests/bpf/verifier/atomic_xor.c new file mode 100644 index 0000000000000..eb791e547b47c --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/atomic_xor.c @@ -0,0 +1,77 @@ +{ + "BPF_ATOMIC XOR without fetch", + .insns = { + /* val = 0x110; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110), + /* atomic_xor(&val, 0x011); */ + BPF_MOV64_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_DW, BPF_XOR, BPF_REG_10, BPF_REG_1, -8), + /* if (val != 0x101) exit(2); */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x101, 2), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + /* r1 should not be clobbered, no BPF_FETCH flag */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x011, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "BPF_ATOMIC XOR with fetch", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 123), + /* val = 0x110; */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110), + /* old = atomic_fetch_xor(&val, 0x011); */ + BPF_MOV64_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_DW, BPF_XOR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8), + /* if (old != 0x110) exit(3); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* if (val != 0x101) exit(2); */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x101, 2), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_EXIT_INSN(), + /* Check R0 wasn't clobbered (fxor fear of x86 JIT bug) */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 123, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* exit(0); */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, +{ + "BPF_ATOMIC XOR with fetch 32bit", + .insns = { + /* r0 = (s64) -1 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1), + /* val = 0x110; */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x110), + /* old = atomic_fetch_xor(&val, 0x011); */ + BPF_MOV32_IMM(BPF_REG_1, 0x011), + BPF_ATOMIC_OP(BPF_W, BPF_XOR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -4), + /* if (old != 0x110) exit(3); */ + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2), + BPF_MOV32_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + /* if (val != 0x101) exit(2); */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -4), + BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x101, 2), + BPF_MOV32_IMM(BPF_REG_1, 2), + BPF_EXIT_INSN(), + /* Check R0 wasn't clobbered (fxor fear of x86 JIT bug) + * It should be -1 so add 1 to get exit code. + */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, +}, -- GitLab From de948576f8e7d7fa1b5db04f56184ffe176177c5 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 14 Jan 2021 18:17:51 +0000 Subject: [PATCH 0865/2012] bpf: Document new atomic instructions Document new atomic instructions. Signed-off-by: Brendan Jackman Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210114181751.768687-12-jackmanb@google.com --- Documentation/networking/filter.rst | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst index 1583d59d806d8..f6d8f90e9a560 100644 --- a/Documentation/networking/filter.rst +++ b/Documentation/networking/filter.rst @@ -1053,8 +1053,39 @@ encoding. .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg .imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg +The basic atomic operations supported are: + + BPF_ADD + BPF_AND + BPF_OR + BPF_XOR + +Each having equivalent semantics with the ``BPF_ADD`` example, that is: the +memory location addresed by ``dst_reg + off`` is atomically modified, with +``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the +immediate, then these operations also overwrite ``src_reg`` with the +value that was in memory before it was modified. + +The more special operations are: + + BPF_XCHG + +This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg + +off``. + + BPF_CMPXCHG + +This atomically compares the value addressed by ``dst_reg + off`` with +``R0``. If they match it is replaced with ``src_reg``, The value that was there +before is loaded back to ``R0``. + Note that 1 and 2 byte atomic operations are not supported. +Except ``BPF_ADD`` _without_ ``BPF_FETCH`` (for legacy reasons), all 4 byte +atomic operations require alu32 mode. Clang enables this mode by default in +architecture v3 (``-mcpu=v3``). For older versions it can be enabled with +``-Xclang -target-feature -Xclang +alu32``. + You may encounter BPF_XADD - this is a legacy name for BPF_ATOMIC, referring to the exclusive-add operation encoded when the immediate field is zero. -- GitLab From 780e1384687d6ecdee9ca789a1027610484ac8a2 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Wed, 13 Jan 2021 11:45:08 +0900 Subject: [PATCH 0866/2012] scsi: target: tcmu: Fix use-after-free of se_cmd->priv Commit a35129024e88 ("scsi: target: tcmu: Use priv pointer in se_cmd") modified tcmu_free_cmd() to set NULL to priv pointer in se_cmd. However, se_cmd can be already freed by work queue triggered in target_complete_cmd(). This caused BUG KASAN use-after-free [1]. To fix the bug, do not touch priv pointer in tcmu_free_cmd(). Instead, set NULL to priv pointer before target_complete_cmd() calls. Also, to avoid unnecessary priv pointer change in tcmu_queue_cmd(), modify priv pointer in the function only when tcmu_free_cmd() is not called. [1] BUG: KASAN: use-after-free in tcmu_handle_completions+0x1172/0x1770 [target_core_user] Write of size 8 at addr ffff88814cf79a40 by task cmdproc-uio0/14842 CPU: 2 PID: 14842 Comm: cmdproc-uio0 Not tainted 5.11.0-rc2 #1 Hardware name: Supermicro Super Server/X10SRL-F, BIOS 3.2 11/22/2019 Call Trace: dump_stack+0x9a/0xcc ? tcmu_handle_completions+0x1172/0x1770 [target_core_user] print_address_description.constprop.0+0x18/0x130 ? tcmu_handle_completions+0x1172/0x1770 [target_core_user] ? tcmu_handle_completions+0x1172/0x1770 [target_core_user] kasan_report.cold+0x7f/0x10e ? tcmu_handle_completions+0x1172/0x1770 [target_core_user] tcmu_handle_completions+0x1172/0x1770 [target_core_user] ? queue_tmr_ring+0x5d0/0x5d0 [target_core_user] tcmu_irqcontrol+0x28/0x60 [target_core_user] uio_write+0x155/0x230 ? uio_vma_fault+0x460/0x460 ? security_file_permission+0x4f/0x440 vfs_write+0x1ce/0x860 ksys_write+0xe9/0x1b0 ? __ia32_sys_read+0xb0/0xb0 ? syscall_enter_from_user_mode+0x27/0x70 ? trace_hardirqs_on+0x1c/0x110 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fcf8b61905f Code: 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 b9 fc ff ff 48 8b 54 24 18 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 44 24 08 e8 0c fd ff ff 48 RSP: 002b:00007fcf7b3e6c30 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fcf8b61905f RDX: 0000000000000004 RSI: 00007fcf7b3e6c78 RDI: 000000000000000c RBP: 00007fcf7b3e6c80 R08: 0000000000000000 R09: 00007fcf7b3e6aa8 R10: 000000000b01c000 R11: 0000000000000293 R12: 00007ffe0c32a52e R13: 00007ffe0c32a52f R14: 0000000000000000 R15: 00007fcf7b3e7640 Allocated by task 383: kasan_save_stack+0x1b/0x40 ____kasan_kmalloc.constprop.0+0x84/0xa0 kmem_cache_alloc+0x142/0x330 tcm_loop_queuecommand+0x2a/0x4e0 [tcm_loop] scsi_queue_rq+0x12ec/0x2d20 blk_mq_dispatch_rq_list+0x30a/0x1db0 __blk_mq_do_dispatch_sched+0x326/0x830 __blk_mq_sched_dispatch_requests+0x2c8/0x3f0 blk_mq_sched_dispatch_requests+0xca/0x120 __blk_mq_run_hw_queue+0x93/0xe0 process_one_work+0x7b6/0x1290 worker_thread+0x590/0xf80 kthread+0x362/0x430 ret_from_fork+0x22/0x30 Freed by task 11655: kasan_save_stack+0x1b/0x40 kasan_set_track+0x1c/0x30 kasan_set_free_info+0x20/0x30 ____kasan_slab_free+0xec/0x120 slab_free_freelist_hook+0x53/0x160 kmem_cache_free+0xf4/0x5c0 target_release_cmd_kref+0x3ea/0x9e0 [target_core_mod] transport_generic_free_cmd+0x28b/0x2f0 [target_core_mod] target_complete_ok_work+0x250/0xac0 [target_core_mod] process_one_work+0x7b6/0x1290 worker_thread+0x590/0xf80 kthread+0x362/0x430 ret_from_fork+0x22/0x30 Last potentially related work creation: kasan_save_stack+0x1b/0x40 kasan_record_aux_stack+0xa3/0xb0 insert_work+0x48/0x2e0 __queue_work+0x4e8/0xdf0 queue_work_on+0x78/0x80 tcmu_handle_completions+0xad0/0x1770 [target_core_user] tcmu_irqcontrol+0x28/0x60 [target_core_user] uio_write+0x155/0x230 vfs_write+0x1ce/0x860 ksys_write+0xe9/0x1b0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Second to last potentially related work creation: kasan_save_stack+0x1b/0x40 kasan_record_aux_stack+0xa3/0xb0 insert_work+0x48/0x2e0 __queue_work+0x4e8/0xdf0 queue_work_on+0x78/0x80 tcm_loop_queuecommand+0x1c3/0x4e0 [tcm_loop] scsi_queue_rq+0x12ec/0x2d20 blk_mq_dispatch_rq_list+0x30a/0x1db0 __blk_mq_do_dispatch_sched+0x326/0x830 __blk_mq_sched_dispatch_requests+0x2c8/0x3f0 blk_mq_sched_dispatch_requests+0xca/0x120 __blk_mq_run_hw_queue+0x93/0xe0 process_one_work+0x7b6/0x1290 worker_thread+0x590/0xf80 kthread+0x362/0x430 ret_from_fork+0x22/0x30 The buggy address belongs to the object at ffff88814cf79800 which belongs to the cache tcm_loop_cmd_cache of size 896. Link: https://lore.kernel.org/r/20210113024508.1264992-1-shinichiro.kawasaki@wdc.com Fixes: a35129024e88 ("scsi: target: tcmu: Use priv pointer in se_cmd") Cc: stable@vger.kernel.org # v5.9+ Acked-by: Bodo Stroesser Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Martin K. Petersen --- drivers/target/target_core_user.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 6b171fff007b6..a5991df235811 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -562,8 +562,6 @@ tcmu_get_block_page(struct tcmu_dev *udev, uint32_t dbi) static inline void tcmu_free_cmd(struct tcmu_cmd *tcmu_cmd) { - if (tcmu_cmd->se_cmd) - tcmu_cmd->se_cmd->priv = NULL; kfree(tcmu_cmd->dbi); kmem_cache_free(tcmu_cmd_cache, tcmu_cmd); } @@ -1174,11 +1172,12 @@ tcmu_queue_cmd(struct se_cmd *se_cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; mutex_lock(&udev->cmdr_lock); - se_cmd->priv = tcmu_cmd; if (!(se_cmd->transport_state & CMD_T_ABORTED)) ret = queue_cmd_ring(tcmu_cmd, &scsi_ret); if (ret < 0) tcmu_free_cmd(tcmu_cmd); + else + se_cmd->priv = tcmu_cmd; mutex_unlock(&udev->cmdr_lock); return scsi_ret; } @@ -1241,6 +1240,7 @@ tcmu_tmr_notify(struct se_device *se_dev, enum tcm_tmreq_table tmf, list_del_init(&cmd->queue_entry); tcmu_free_cmd(cmd); + se_cmd->priv = NULL; target_complete_cmd(se_cmd, SAM_STAT_TASK_ABORTED); unqueued = true; } @@ -1332,6 +1332,7 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * } done: + se_cmd->priv = NULL; if (read_len_valid) { pr_debug("read_len = %d\n", read_len); target_complete_cmd_with_length(cmd->se_cmd, @@ -1478,6 +1479,7 @@ static void tcmu_check_expired_queue_cmd(struct tcmu_cmd *cmd) se_cmd = cmd->se_cmd; tcmu_free_cmd(cmd); + se_cmd->priv = NULL; target_complete_cmd(se_cmd, SAM_STAT_TASK_SET_FULL); } @@ -1592,6 +1594,7 @@ static void run_qfull_queue(struct tcmu_dev *udev, bool fail) * removed then LIO core will do the right thing and * fail the retry. */ + tcmu_cmd->se_cmd->priv = NULL; target_complete_cmd(tcmu_cmd->se_cmd, SAM_STAT_BUSY); tcmu_free_cmd(tcmu_cmd); continue; @@ -1605,6 +1608,7 @@ static void run_qfull_queue(struct tcmu_dev *udev, bool fail) * Ignore scsi_ret for now. target_complete_cmd * drops it. */ + tcmu_cmd->se_cmd->priv = NULL; target_complete_cmd(tcmu_cmd->se_cmd, SAM_STAT_CHECK_CONDITION); tcmu_free_cmd(tcmu_cmd); @@ -2212,6 +2216,7 @@ static void tcmu_reset_ring(struct tcmu_dev *udev, u8 err_level) if (!test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { WARN_ON(!cmd->se_cmd); list_del_init(&cmd->queue_entry); + cmd->se_cmd->priv = NULL; if (err_level == 1) { /* * Userspace was not able to start the -- GitLab From 764907293edc1af7ac857389af9dc858944f53dc Mon Sep 17 00:00:00 2001 From: Brian King Date: Tue, 12 Jan 2021 09:06:38 -0600 Subject: [PATCH 0867/2012] scsi: ibmvfc: Set default timeout to avoid crash during migration While testing live partition mobility, we have observed occasional crashes of the Linux partition. What we've seen is that during the live migration, for specific configurations with large amounts of memory, slow network links, and workloads that are changing memory a lot, the partition can end up being suspended for 30 seconds or longer. This resulted in the following scenario: CPU 0 CPU 1 ------------------------------- ---------------------------------- scsi_queue_rq migration_store -> blk_mq_start_request -> rtas_ibm_suspend_me -> blk_add_timer -> on_each_cpu(rtas_percpu_suspend_me _______________________________________V | V -> IPI from CPU 1 -> rtas_percpu_suspend_me -> __rtas_suspend_last_cpu -- Linux partition suspended for > 30 seconds -- -> for_each_online_cpu(cpu) plpar_hcall_norets(H_PROD -> scsi_dispatch_cmd -> scsi_times_out -> scsi_abort_command -> queue_delayed_work -> ibmvfc_queuecommand_lck -> ibmvfc_send_event -> ibmvfc_send_crq - returns H_CLOSED <- returns SCSI_MLQUEUE_HOST_BUSY -> __blk_mq_requeue_request -> scmd_eh_abort_handler -> scsi_try_to_abort_cmd - returns SUCCESS -> scsi_queue_insert Normally, the SCMD_STATE_COMPLETE bit would protect against the command completion and the timeout, but that doesn't work here, since we don't check that at all in the SCSI_MLQUEUE_HOST_BUSY path. In this case we end up calling scsi_queue_insert on a request that has already been queued, or possibly even freed, and we crash. The patch below simply increases the default I/O timeout to avoid this race condition. This is also the timeout value that nearly all IBM SAN storage recommends setting as the default value. Link: https://lore.kernel.org/r/1610463998-19791-1-git-send-email-brking@linux.vnet.ibm.com Signed-off-by: Brian King Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 7312f31df878c..65f168c41d233 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -3007,8 +3007,10 @@ static int ibmvfc_slave_configure(struct scsi_device *sdev) unsigned long flags = 0; spin_lock_irqsave(shost->host_lock, flags); - if (sdev->type == TYPE_DISK) + if (sdev->type == TYPE_DISK) { sdev->allow_restart = 1; + blk_queue_rq_timeout(sdev->request_queue, 120 * HZ); + } spin_unlock_irqrestore(shost->host_lock, flags); return 0; } -- GitLab From bd7525dacd7e204f8cae061941fb9001c89d6988 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 14 Jan 2021 14:40:42 +0100 Subject: [PATCH 0868/2012] bpf: Move stack_map_get_build_id into lib Moving stack_map_get_build_id into lib with declaration in linux/buildid.h header: int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id); This function returns build id for given struct vm_area_struct. There is no functional change to stack_map_get_build_id function. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20210114134044.1418404-2-jolsa@kernel.org --- include/linux/buildid.h | 11 ++++ kernel/bpf/stackmap.c | 143 ++-------------------------------------- lib/Makefile | 3 +- lib/buildid.c | 136 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 153 insertions(+), 140 deletions(-) create mode 100644 include/linux/buildid.h create mode 100644 lib/buildid.c diff --git a/include/linux/buildid.h b/include/linux/buildid.h new file mode 100644 index 0000000000000..08028a212589f --- /dev/null +++ b/include/linux/buildid.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_BUILDID_H +#define _LINUX_BUILDID_H + +#include + +#define BUILD_ID_SIZE_MAX 20 + +int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id); + +#endif diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index aea96b6384734..55d254a59f073 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -7,10 +7,9 @@ #include #include #include -#include -#include #include #include +#include #include "percpu_freelist.h" #define STACK_CREATE_FLAG_MASK \ @@ -143,140 +142,6 @@ free_smap: return ERR_PTR(err); } -#define BPF_BUILD_ID 3 -/* - * Parse build id from the note segment. This logic can be shared between - * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are - * identical. - */ -static inline int stack_map_parse_build_id(void *page_addr, - unsigned char *build_id, - void *note_start, - Elf32_Word note_size) -{ - Elf32_Word note_offs = 0, new_offs; - - /* check for overflow */ - if (note_start < page_addr || note_start + note_size < note_start) - return -EINVAL; - - /* only supports note that fits in the first page */ - if (note_start + note_size > page_addr + PAGE_SIZE) - return -EINVAL; - - while (note_offs + sizeof(Elf32_Nhdr) < note_size) { - Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs); - - if (nhdr->n_type == BPF_BUILD_ID && - nhdr->n_namesz == sizeof("GNU") && - nhdr->n_descsz > 0 && - nhdr->n_descsz <= BPF_BUILD_ID_SIZE) { - memcpy(build_id, - note_start + note_offs + - ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), - nhdr->n_descsz); - memset(build_id + nhdr->n_descsz, 0, - BPF_BUILD_ID_SIZE - nhdr->n_descsz); - return 0; - } - new_offs = note_offs + sizeof(Elf32_Nhdr) + - ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4); - if (new_offs <= note_offs) /* overflow */ - break; - note_offs = new_offs; - } - return -EINVAL; -} - -/* Parse build ID from 32-bit ELF */ -static int stack_map_get_build_id_32(void *page_addr, - unsigned char *build_id) -{ - Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr; - Elf32_Phdr *phdr; - int i; - - /* only supports phdr that fits in one page */ - if (ehdr->e_phnum > - (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr)) - return -EINVAL; - - phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr)); - - for (i = 0; i < ehdr->e_phnum; ++i) { - if (phdr[i].p_type == PT_NOTE && - !stack_map_parse_build_id(page_addr, build_id, - page_addr + phdr[i].p_offset, - phdr[i].p_filesz)) - return 0; - } - return -EINVAL; -} - -/* Parse build ID from 64-bit ELF */ -static int stack_map_get_build_id_64(void *page_addr, - unsigned char *build_id) -{ - Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr; - Elf64_Phdr *phdr; - int i; - - /* only supports phdr that fits in one page */ - if (ehdr->e_phnum > - (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr)) - return -EINVAL; - - phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr)); - - for (i = 0; i < ehdr->e_phnum; ++i) { - if (phdr[i].p_type == PT_NOTE && - !stack_map_parse_build_id(page_addr, build_id, - page_addr + phdr[i].p_offset, - phdr[i].p_filesz)) - return 0; - } - return -EINVAL; -} - -/* Parse build ID of ELF file mapped to vma */ -static int stack_map_get_build_id(struct vm_area_struct *vma, - unsigned char *build_id) -{ - Elf32_Ehdr *ehdr; - struct page *page; - void *page_addr; - int ret; - - /* only works for page backed storage */ - if (!vma->vm_file) - return -EINVAL; - - page = find_get_page(vma->vm_file->f_mapping, 0); - if (!page) - return -EFAULT; /* page not mapped */ - - ret = -EINVAL; - page_addr = kmap_atomic(page); - ehdr = (Elf32_Ehdr *)page_addr; - - /* compare magic x7f "ELF" */ - if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) - goto out; - - /* only support executable file and shared object file */ - if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) - goto out; - - if (ehdr->e_ident[EI_CLASS] == ELFCLASS32) - ret = stack_map_get_build_id_32(page_addr, build_id); - else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) - ret = stack_map_get_build_id_64(page_addr, build_id); -out: - kunmap_atomic(page_addr); - put_page(page); - return ret; -} - static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, u64 *ips, u32 trace_nr, bool user) { @@ -317,18 +182,18 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, for (i = 0; i < trace_nr; i++) { id_offs[i].status = BPF_STACK_BUILD_ID_IP; id_offs[i].ip = ips[i]; - memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); + memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); } return; } for (i = 0; i < trace_nr; i++) { vma = find_vma(current->mm, ips[i]); - if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) { + if (!vma || build_id_parse(vma, id_offs[i].build_id)) { /* per entry fall back to ips */ id_offs[i].status = BPF_STACK_BUILD_ID_IP; id_offs[i].ip = ips[i]; - memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE); + memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX); continue; } id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i] diff --git a/lib/Makefile b/lib/Makefile index afeff05fa8c57..a6b160c3a4fac 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -36,7 +36,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ - nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o + nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o \ + buildid.o lib-$(CONFIG_PRINTK) += dump_stack.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/buildid.c b/lib/buildid.c new file mode 100644 index 0000000000000..4a4f520c0e293 --- /dev/null +++ b/lib/buildid.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +#define BUILD_ID 3 +/* + * Parse build id from the note segment. This logic can be shared between + * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are + * identical. + */ +static inline int parse_build_id(void *page_addr, + unsigned char *build_id, + void *note_start, + Elf32_Word note_size) +{ + Elf32_Word note_offs = 0, new_offs; + + /* check for overflow */ + if (note_start < page_addr || note_start + note_size < note_start) + return -EINVAL; + + /* only supports note that fits in the first page */ + if (note_start + note_size > page_addr + PAGE_SIZE) + return -EINVAL; + + while (note_offs + sizeof(Elf32_Nhdr) < note_size) { + Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs); + + if (nhdr->n_type == BUILD_ID && + nhdr->n_namesz == sizeof("GNU") && + nhdr->n_descsz > 0 && + nhdr->n_descsz <= BUILD_ID_SIZE_MAX) { + memcpy(build_id, + note_start + note_offs + + ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), + nhdr->n_descsz); + memset(build_id + nhdr->n_descsz, 0, + BUILD_ID_SIZE_MAX - nhdr->n_descsz); + return 0; + } + new_offs = note_offs + sizeof(Elf32_Nhdr) + + ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4); + if (new_offs <= note_offs) /* overflow */ + break; + note_offs = new_offs; + } + return -EINVAL; +} + +/* Parse build ID from 32-bit ELF */ +static int get_build_id_32(void *page_addr, unsigned char *build_id) +{ + Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr; + Elf32_Phdr *phdr; + int i; + + /* only supports phdr that fits in one page */ + if (ehdr->e_phnum > + (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr)) + return -EINVAL; + + phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr)); + + for (i = 0; i < ehdr->e_phnum; ++i) { + if (phdr[i].p_type == PT_NOTE && + !parse_build_id(page_addr, build_id, + page_addr + phdr[i].p_offset, + phdr[i].p_filesz)) + return 0; + } + return -EINVAL; +} + +/* Parse build ID from 64-bit ELF */ +static int get_build_id_64(void *page_addr, unsigned char *build_id) +{ + Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr; + Elf64_Phdr *phdr; + int i; + + /* only supports phdr that fits in one page */ + if (ehdr->e_phnum > + (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr)) + return -EINVAL; + + phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr)); + + for (i = 0; i < ehdr->e_phnum; ++i) { + if (phdr[i].p_type == PT_NOTE && + !parse_build_id(page_addr, build_id, + page_addr + phdr[i].p_offset, + phdr[i].p_filesz)) + return 0; + } + return -EINVAL; +} + +/* Parse build ID of ELF file mapped to vma */ +int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id) +{ + Elf32_Ehdr *ehdr; + struct page *page; + void *page_addr; + int ret; + + /* only works for page backed storage */ + if (!vma->vm_file) + return -EINVAL; + + page = find_get_page(vma->vm_file->f_mapping, 0); + if (!page) + return -EFAULT; /* page not mapped */ + + ret = -EINVAL; + page_addr = kmap_atomic(page); + ehdr = (Elf32_Ehdr *)page_addr; + + /* compare magic x7f "ELF" */ + if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) + goto out; + + /* only support executable file and shared object file */ + if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) + goto out; + + if (ehdr->e_ident[EI_CLASS] == ELFCLASS32) + ret = get_build_id_32(page_addr, build_id); + else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) + ret = get_build_id_64(page_addr, build_id); +out: + kunmap_atomic(page_addr); + put_page(page); + return ret; +} -- GitLab From 921f88fc891922b325b3668cd026a386571ed602 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 14 Jan 2021 14:40:43 +0100 Subject: [PATCH 0869/2012] bpf: Add size arg to build_id_parse function It's possible to have other build id types (other than default SHA1). Currently there's also ld support for MD5 build id. Adding size argument to build_id_parse function, that returns (if defined) size of the parsed build id, so we can recognize the build id type. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210114134044.1418404-3-jolsa@kernel.org --- include/linux/buildid.h | 3 ++- kernel/bpf/stackmap.c | 2 +- lib/buildid.c | 29 +++++++++++++++++++++-------- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/include/linux/buildid.h b/include/linux/buildid.h index 08028a212589f..40232f90db6e5 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -6,6 +6,7 @@ #define BUILD_ID_SIZE_MAX 20 -int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id); +int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, + __u32 *size); #endif diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 55d254a59f073..cabaf7db8efc0 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -189,7 +189,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, for (i = 0; i < trace_nr; i++) { vma = find_vma(current->mm, ips[i]); - if (!vma || build_id_parse(vma, id_offs[i].build_id)) { + if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) { /* per entry fall back to ips */ id_offs[i].status = BPF_STACK_BUILD_ID_IP; id_offs[i].ip = ips[i]; diff --git a/lib/buildid.c b/lib/buildid.c index 4a4f520c0e293..6156997c3895e 100644 --- a/lib/buildid.c +++ b/lib/buildid.c @@ -12,6 +12,7 @@ */ static inline int parse_build_id(void *page_addr, unsigned char *build_id, + __u32 *size, void *note_start, Elf32_Word note_size) { @@ -38,6 +39,8 @@ static inline int parse_build_id(void *page_addr, nhdr->n_descsz); memset(build_id + nhdr->n_descsz, 0, BUILD_ID_SIZE_MAX - nhdr->n_descsz); + if (size) + *size = nhdr->n_descsz; return 0; } new_offs = note_offs + sizeof(Elf32_Nhdr) + @@ -50,7 +53,8 @@ static inline int parse_build_id(void *page_addr, } /* Parse build ID from 32-bit ELF */ -static int get_build_id_32(void *page_addr, unsigned char *build_id) +static int get_build_id_32(void *page_addr, unsigned char *build_id, + __u32 *size) { Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr; Elf32_Phdr *phdr; @@ -65,7 +69,7 @@ static int get_build_id_32(void *page_addr, unsigned char *build_id) for (i = 0; i < ehdr->e_phnum; ++i) { if (phdr[i].p_type == PT_NOTE && - !parse_build_id(page_addr, build_id, + !parse_build_id(page_addr, build_id, size, page_addr + phdr[i].p_offset, phdr[i].p_filesz)) return 0; @@ -74,7 +78,8 @@ static int get_build_id_32(void *page_addr, unsigned char *build_id) } /* Parse build ID from 64-bit ELF */ -static int get_build_id_64(void *page_addr, unsigned char *build_id) +static int get_build_id_64(void *page_addr, unsigned char *build_id, + __u32 *size) { Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr; Elf64_Phdr *phdr; @@ -89,7 +94,7 @@ static int get_build_id_64(void *page_addr, unsigned char *build_id) for (i = 0; i < ehdr->e_phnum; ++i) { if (phdr[i].p_type == PT_NOTE && - !parse_build_id(page_addr, build_id, + !parse_build_id(page_addr, build_id, size, page_addr + phdr[i].p_offset, phdr[i].p_filesz)) return 0; @@ -97,8 +102,16 @@ static int get_build_id_64(void *page_addr, unsigned char *build_id) return -EINVAL; } -/* Parse build ID of ELF file mapped to vma */ -int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id) +/* + * Parse build ID of ELF file mapped to vma + * @vma: vma object + * @build_id: buffer to store build id, at least BUILD_ID_SIZE long + * @size: returns actual build id size in case of success + * + * Returns 0 on success, otherwise error (< 0). + */ +int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id, + __u32 *size) { Elf32_Ehdr *ehdr; struct page *page; @@ -126,9 +139,9 @@ int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id) goto out; if (ehdr->e_ident[EI_CLASS] == ELFCLASS32) - ret = get_build_id_32(page_addr, build_id); + ret = get_build_id_32(page_addr, build_id, size); else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64) - ret = get_build_id_64(page_addr, build_id); + ret = get_build_id_64(page_addr, build_id, size); out: kunmap_atomic(page_addr); put_page(page); -- GitLab From 88a16a1309333e43d328621ece3e9fa37027e8eb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 14 Jan 2021 14:40:44 +0100 Subject: [PATCH 0870/2012] perf: Add build id data in mmap2 event Adding support to carry build id data in mmap2 event. The build id data replaces maj/min/ino/ino_generation fields, which are also used to identify map's binary, so it's ok to replace them with build id data: union { struct { u32 maj; u32 min; u64 ino; u64 ino_generation; }; struct { u8 build_id_size; u8 __reserved_1; u16 __reserved_2; u8 build_id[20]; }; }; Replaced maj/min/ino/ino_generation fields give us size of 24 bytes. We use 20 bytes for build id data, 1 byte for size and rest is unused. There's new misc bit for mmap2 to signal there's build id data in it: #define PERF_RECORD_MISC_MMAP_BUILD_ID (1 << 14) Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/bpf/20210114134044.1418404-4-jolsa@kernel.org --- include/uapi/linux/perf_event.h | 42 +++++++++++++++++++++++++++++---- kernel/events/core.c | 32 +++++++++++++++++++++---- 2 files changed, 65 insertions(+), 9 deletions(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b15e3447cd9fe..cb6f841035608 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -386,7 +386,8 @@ struct perf_event_attr { aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ - __reserved_1 : 30; + build_id : 1, /* use build id in mmap2 events */ + __reserved_1 : 29; union { __u32 wakeup_events; /* wakeup every n events */ @@ -659,6 +660,22 @@ struct perf_event_mmap_page { __u64 aux_size; }; +/* + * The current state of perf_event_header::misc bits usage: + * ('|' used bit, '-' unused bit) + * + * 012 CDEF + * |||---------|||| + * + * Where: + * 0-2 CPUMODE_MASK + * + * C PROC_MAP_PARSE_TIMEOUT + * D MMAP_DATA / COMM_EXEC / FORK_EXEC / SWITCH_OUT + * E MMAP_BUILD_ID / EXACT_IP / SCHED_OUT_PREEMPT + * F (reserved) + */ + #define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) #define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) #define PERF_RECORD_MISC_KERNEL (1 << 0) @@ -690,6 +707,7 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events + * PERF_RECORD_MISC_MMAP_BUILD_ID - PERF_RECORD_MMAP2 event * * * PERF_RECORD_MISC_EXACT_IP: @@ -699,9 +717,13 @@ struct perf_event_mmap_page { * * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT: * Indicates that thread was preempted in TASK_RUNNING state. + * + * PERF_RECORD_MISC_MMAP_BUILD_ID: + * Indicates that mmap2 event carries build id data. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) +#define PERF_RECORD_MISC_MMAP_BUILD_ID (1 << 14) /* * Reserve the last bit to indicate some extended misc field */ @@ -915,10 +937,20 @@ enum perf_event_type { * u64 addr; * u64 len; * u64 pgoff; - * u32 maj; - * u32 min; - * u64 ino; - * u64 ino_generation; + * union { + * struct { + * u32 maj; + * u32 min; + * u64 ino; + * u64 ino_generation; + * }; + * struct { + * u8 build_id_size; + * u8 __reserved_1; + * u16 __reserved_2; + * u8 build_id[20]; + * }; + * }; * u32 prot, flags; * char filename[]; * struct sample_id sample_id; diff --git a/kernel/events/core.c b/kernel/events/core.c index 55d18791a72de..c37401e3e5f73 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -53,6 +53,7 @@ #include #include #include +#include #include "internal.h" @@ -397,6 +398,7 @@ static atomic_t nr_ksymbol_events __read_mostly; static atomic_t nr_bpf_events __read_mostly; static atomic_t nr_cgroup_events __read_mostly; static atomic_t nr_text_poke_events __read_mostly; +static atomic_t nr_build_id_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); @@ -4673,6 +4675,8 @@ static void unaccount_event(struct perf_event *event) dec = true; if (event->attr.mmap || event->attr.mmap_data) atomic_dec(&nr_mmap_events); + if (event->attr.build_id) + atomic_dec(&nr_build_id_events); if (event->attr.comm) atomic_dec(&nr_comm_events); if (event->attr.namespaces) @@ -8046,6 +8050,8 @@ struct perf_mmap_event { u64 ino; u64 ino_generation; u32 prot, flags; + u8 build_id[BUILD_ID_SIZE_MAX]; + u32 build_id_size; struct { struct perf_event_header header; @@ -8077,6 +8083,7 @@ static void perf_event_mmap_output(struct perf_event *event, struct perf_sample_data sample; int size = mmap_event->event_id.header.size; u32 type = mmap_event->event_id.header.type; + bool use_build_id; int ret; if (!perf_event_mmap_match(event, data)) @@ -8101,13 +8108,25 @@ static void perf_event_mmap_output(struct perf_event *event, mmap_event->event_id.pid = perf_event_pid(event, current); mmap_event->event_id.tid = perf_event_tid(event, current); + use_build_id = event->attr.build_id && mmap_event->build_id_size; + + if (event->attr.mmap2 && use_build_id) + mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID; + perf_output_put(&handle, mmap_event->event_id); if (event->attr.mmap2) { - perf_output_put(&handle, mmap_event->maj); - perf_output_put(&handle, mmap_event->min); - perf_output_put(&handle, mmap_event->ino); - perf_output_put(&handle, mmap_event->ino_generation); + if (use_build_id) { + u8 size[4] = { (u8) mmap_event->build_id_size, 0, 0, 0 }; + + __output_copy(&handle, size, 4); + __output_copy(&handle, mmap_event->build_id, BUILD_ID_SIZE_MAX); + } else { + perf_output_put(&handle, mmap_event->maj); + perf_output_put(&handle, mmap_event->min); + perf_output_put(&handle, mmap_event->ino); + perf_output_put(&handle, mmap_event->ino_generation); + } perf_output_put(&handle, mmap_event->prot); perf_output_put(&handle, mmap_event->flags); } @@ -8236,6 +8255,9 @@ got_name: mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; + if (atomic_read(&nr_build_id_events)) + build_id_parse(vma, mmap_event->build_id, &mmap_event->build_id_size); + perf_iterate_sb(perf_event_mmap_output, mmap_event, NULL); @@ -11172,6 +11194,8 @@ static void account_event(struct perf_event *event) inc = true; if (event->attr.mmap || event->attr.mmap_data) atomic_inc(&nr_mmap_events); + if (event->attr.build_id) + atomic_inc(&nr_build_id_events); if (event->attr.comm) atomic_inc(&nr_comm_events); if (event->attr.namespaces) -- GitLab From 217bfbb8b0bfa24619b11ab75c135fec99b99b20 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 15 Jan 2021 10:34:28 +0100 Subject: [PATCH 0871/2012] ALSA: seq: oss: Fix missing error check in snd_seq_oss_synth_make_info() snd_seq_oss_synth_make_info() didn't check the error code from snd_seq_oss_midi_make_info(), and this leads to the call of strlcpy() with the uninitialized string as the source, which may lead to the access over the limit. Add the proper error check for avoiding the failure. Reported-by: syzbot+e42504ff21cff05a595f@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/20210115093428.15882-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/oss/seq_oss_synth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c index 11554d0412f06..1b8409ec2c97f 100644 --- a/sound/core/seq/oss/seq_oss_synth.c +++ b/sound/core/seq/oss/seq_oss_synth.c @@ -611,7 +611,8 @@ snd_seq_oss_synth_make_info(struct seq_oss_devinfo *dp, int dev, struct synth_in if (info->is_midi) { struct midi_info minf; - snd_seq_oss_midi_make_info(dp, info->midi_mapped, &minf); + if (snd_seq_oss_midi_make_info(dp, info->midi_mapped, &minf)) + return -ENXIO; inf->synth_type = SYNTH_TYPE_MIDI; inf->synth_subtype = 0; inf->nr_voices = 16; -- GitLab From dca5244d2f5b94f1809f0c02a549edf41ccd5493 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 12 Jan 2021 22:48:32 +0000 Subject: [PATCH 0872/2012] compiler.h: Raise minimum version of GCC to 5.1 for arm64 GCC versions >= 4.9 and < 5.1 have been shown to emit memory references beyond the stack pointer, resulting in memory corruption if an interrupt is taken after the stack pointer has been adjusted but before the reference has been executed. This leads to subtle, infrequent data corruption such as the EXT4 problems reported by Russell King at the link below. Life is too short for buggy compilers, so raise the minimum GCC version required by arm64 to 5.1. Reported-by: Russell King Suggested-by: Arnd Bergmann Signed-off-by: Will Deacon Tested-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Acked-by: Linus Torvalds Cc: Cc: Theodore Ts'o Cc: Florian Weimer Cc: Peter Zijlstra Cc: Nick Desaulniers Link: https://lore.kernel.org/r/20210105154726.GD1551@shell.armlinux.org.uk Link: https://lore.kernel.org/r/20210112224832.10980-1-will@kernel.org Signed-off-by: Catalin Marinas --- include/linux/compiler-gcc.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 74c6c0486eed7..555ab0fddbef7 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -13,6 +13,12 @@ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */ #if GCC_VERSION < 40900 # error Sorry, your version of GCC is too old - please use 4.9 or newer. +#elif defined(CONFIG_ARM64) && GCC_VERSION < 50100 +/* + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63293 + * https://lore.kernel.org/r/20210107111841.GN1551@shell.armlinux.org.uk + */ +# error Sorry, your version of GCC is too old - please use 5.1 or newer. #endif /* -- GitLab From b6d8878d24e39f213df0f3ea7abebd15edc7be21 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 14 Jan 2021 12:48:12 +0000 Subject: [PATCH 0873/2012] arm64: syscall: include prototype for EL0 SVC functions The kbuild test robot reports that when building with W=1, GCC will warn for a couple of missing prototypes in syscall.c: | arch/arm64/kernel/syscall.c:157:6: warning: no previous prototype for 'do_el0_svc' [-Wmissing-prototypes] | 157 | void do_el0_svc(struct pt_regs *regs) | | ^~~~~~~~~~ | arch/arm64/kernel/syscall.c:164:6: warning: no previous prototype for 'do_el0_svc_compat' [-Wmissing-prototypes] | 164 | void do_el0_svc_compat(struct pt_regs *regs) | | ^~~~~~~~~~~~~~~~~ While this isn't a functional problem, as a general policy we should include the prototype for functions wherever possible to catch any accidental divergence between the prototype and implementation. Here we can easily include , so let's do so. While there are a number of warnings elsewhere and some warnings enabled under W=1 are of questionable benefit, this change helps to make the code more robust as it evolved and reduces the noise somewhat, so it seems worthwhile. Signed-off-by: Mark Rutland Reported-by: kernel test robot Cc: Will Deacon Link: https://lore.kernel.org/r/202101141046.n8iPO3mw-lkp@intel.com Link: https://lore.kernel.org/r/20210114124812.17754-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/syscall.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 0bfac95fe4643..c2877c332f2dc 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include -- GitLab From 3a57a643a851dbb1c4a1819394ca009e3bfa4813 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 8 Jan 2021 18:31:44 +0000 Subject: [PATCH 0874/2012] arm64: selftests: Fix spelling of 'Mismatch' The SVE and FPSIMD stress tests have a spelling mistake in the output, fix it. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210108183144.673-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/fp/fpsimd-test.S | 2 +- tools/testing/selftests/arm64/fp/sve-test.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S index 1c5556bdd11d1..0dbd594c2747c 100644 --- a/tools/testing/selftests/arm64/fp/fpsimd-test.S +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -457,7 +457,7 @@ function barf mov x11, x1 // actual data mov x12, x2 // data size - puts "Mistatch: PID=" + puts "Mismatch: PID=" mov x0, x20 bl putdec puts ", iteration=" diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index f95074c9b48b7..9210691aa9985 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -625,7 +625,7 @@ function barf mov x11, x1 // actual data mov x12, x2 // data size - puts "Mistatch: PID=" + puts "Mismatch: PID=" mov x0, x20 bl putdec puts ", iteration=" -- GitLab From f84d3a1ec375e46a55cc3ba85c04272b24bd3921 Mon Sep 17 00:00:00 2001 From: Kai-Chuan Hsieh Date: Fri, 15 Jan 2021 11:15:15 +0800 Subject: [PATCH 0875/2012] ALSA: hda: Add Cometlake-R PCI ID Add HD Audio Device PCI ID for the Intel Cometlake-R platform Reviewed-by: Kai Vehmanen Signed-off-by: Kai-Chuan Hsieh Link: https://lore.kernel.org/r/20210115031515.13100-1-kaichuan.hsieh@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 8d568277088a1..5a50d3a464459 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2484,6 +2484,9 @@ static const struct pci_device_id azx_ids[] = { /* CometLake-S */ { PCI_DEVICE(0x8086, 0xa3f0), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + /* CometLake-R */ + { PCI_DEVICE(0x8086, 0xf0c8), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Icelake */ { PCI_DEVICE(0x8086, 0x34c8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, -- GitLab From 668af87f995b6d6d09595c088ad1fb5dd9ff25d2 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 13 Jan 2021 15:48:34 +0106 Subject: [PATCH 0876/2012] printk: ringbuffer: fix line counting Counting text lines in a record simply involves counting the number of newline characters (+1). However, it is searching the full data block for newline characters, even though the text data can be (and often is) a subset of that area. Since the extra area in the data block was never initialized, the result is that extra newlines may be seen and counted. Restrict newline searching to the text data length. Fixes: b6cf8b3f3312 ("printk: add lockless ringbuffer") Signed-off-by: John Ogness Reviewed-by: Petr Mladek Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210113144234.6545-1-john.ogness@linutronix.de --- kernel/printk/printk_ringbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index 74e25a1704f2b..617dd63589650 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -1720,7 +1720,7 @@ static bool copy_data(struct prb_data_ring *data_ring, /* Caller interested in the line count? */ if (line_count) - *line_count = count_lines(data, data_size); + *line_count = count_lines(data, len); /* Caller interested in the data content? */ if (!buf || !buf_size) -- GitLab From 89ccf18f032f26946e2ea6258120472eec6aa745 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Wed, 13 Jan 2021 17:50:13 +0106 Subject: [PATCH 0877/2012] printk: fix kmsg_dump_get_buffer length calulations kmsg_dump_get_buffer() uses @syslog to determine if the syslog prefix should be written to the buffer. However, when calculating the maximum number of records that can fit into the buffer, it always counts the bytes from the syslog prefix. Use @syslog when calculating the maximum number of records that can fit into the buffer. Fixes: e2ae715d66bf ("kmsg - kmsg_dump() use iterator to receive log buffer content") Signed-off-by: John Ogness Reviewed-by: Petr Mladek Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210113164413.1599-1-john.ogness@linutronix.de --- kernel/printk/printk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c8847ee571f0e..96e24074c9626 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -3423,7 +3423,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, while (prb_read_valid_info(prb, seq, &info, &line_count)) { if (r.info->seq >= dumper->next_seq) break; - l += get_record_print_text_size(&info, line_count, true, time); + l += get_record_print_text_size(&info, line_count, syslog, time); seq = r.info->seq + 1; } @@ -3433,7 +3433,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, &info, &line_count)) { if (r.info->seq >= dumper->next_seq) break; - l -= get_record_print_text_size(&info, line_count, true, time); + l -= get_record_print_text_size(&info, line_count, syslog, time); seq = r.info->seq + 1; } -- GitLab From b503087445ce7e45fabdee87ca9e460d5b5b5168 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 14 Jan 2021 12:14:05 -0800 Subject: [PATCH 0878/2012] mmc: core: don't initialize block size from ext_csd if not present If extended CSD was not available, the eMMC driver would incorrectly set the block size to 0, as the data_sector_size field of ext_csd was never initialized. This issue was exposed by commit 817046ecddbc ("block: Align max_hw_sectors to logical blocksize") which caused max_sectors and max_hw_sectors to be set to 0 after setting the block size to 0, resulting in a kernel panic in bio_split when attempting to read from the device. Fix it by only reading the block size from ext_csd if it is available. Fixes: a5075eb94837 ("mmc: block: Allow disabling 512B sector size emulation") Signed-off-by: Peter Collingbourne Reviewed-by: Damien Le Moal Link: https://linux-review.googlesource.com/id/If244d178da4d86b52034459438fec295b02d6e60 Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210114201405.2934886-1-pcc@google.com Signed-off-by: Ulf Hansson --- drivers/mmc/core/queue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index de7cb0369c308..002426e3cf76c 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -384,8 +384,10 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) "merging was advertised but not possible"); blk_queue_max_segments(mq->queue, mmc_get_max_segments(host)); - if (mmc_card_mmc(card)) + if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) { block_size = card->ext_csd.data_sector_size; + WARN_ON(block_size != 512 && block_size != 4096); + } blk_queue_logical_block_size(mq->queue, block_size); /* -- GitLab From 4d163ad79b155c71bf30366dc38f8d2502f78844 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Thu, 14 Jan 2021 17:42:17 +0200 Subject: [PATCH 0879/2012] spi: cadence: cache reference clock rate during probe The issue is that using SPI from a callback under the CCF lock will deadlock, since this code uses clk_get_rate(). Fixes: c474b38665463 ("spi: Add driver for Cadence SPI controller") Signed-off-by: Michael Hennerich Signed-off-by: Alexandru Ardelean Link: https://lore.kernel.org/r/20210114154217.51996-1-alexandru.ardelean@analog.com Signed-off-by: Mark Brown --- drivers/spi/spi-cadence.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index 70467b9d61baa..a3afd1b9ac567 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -115,6 +115,7 @@ struct cdns_spi { void __iomem *regs; struct clk *ref_clk; struct clk *pclk; + unsigned int clk_rate; u32 speed_hz; const u8 *txbuf; u8 *rxbuf; @@ -250,7 +251,7 @@ static void cdns_spi_config_clock_freq(struct spi_device *spi, u32 ctrl_reg, baud_rate_val; unsigned long frequency; - frequency = clk_get_rate(xspi->ref_clk); + frequency = xspi->clk_rate; ctrl_reg = cdns_spi_read(xspi, CDNS_SPI_CR); @@ -558,8 +559,9 @@ static int cdns_spi_probe(struct platform_device *pdev) master->auto_runtime_pm = true; master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; + xspi->clk_rate = clk_get_rate(xspi->ref_clk); /* Set to default valid value */ - master->max_speed_hz = clk_get_rate(xspi->ref_clk) / 4; + master->max_speed_hz = xspi->clk_rate / 4; xspi->speed_hz = master->max_speed_hz; master->bits_per_word_mask = SPI_BPW_MASK(8); -- GitLab From c28095bc99073ddda65e4f31f6ae0d908d4d5cd8 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 14 Jan 2021 00:09:51 -0800 Subject: [PATCH 0880/2012] usb: udc: core: Use lock when write to soft_connect Use lock to guard against concurrent access for soft-connect/disconnect operations when writing to soft_connect sysfs. Fixes: 2ccea03a8f7e ("usb: gadget: introduce UDC Class") Cc: stable@vger.kernel.org Acked-by: Felipe Balbi Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/338ea01fbd69b1985ef58f0f59af02c805ddf189.1610611437.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/core.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 6a62bbd01324f..ea114f922ccf6 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1529,10 +1529,13 @@ static ssize_t soft_connect_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { struct usb_udc *udc = container_of(dev, struct usb_udc, dev); + ssize_t ret; + mutex_lock(&udc_lock); if (!udc->driver) { dev_err(dev, "soft-connect without a gadget driver\n"); - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + goto out; } if (sysfs_streq(buf, "connect")) { @@ -1543,10 +1546,14 @@ static ssize_t soft_connect_store(struct device *dev, usb_gadget_udc_stop(udc); } else { dev_err(dev, "unsupported command '%s'\n", buf); - return -EINVAL; + ret = -EINVAL; + goto out; } - return n; + ret = n; +out: + mutex_unlock(&udc_lock); + return ret; } static DEVICE_ATTR_WO(soft_connect); -- GitLab From 576667bad341516edc4e18eb85acb0a2b4c9c9d9 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 15 Jan 2021 18:19:06 +0200 Subject: [PATCH 0881/2012] xhci: make sure TRB is fully written before giving it to the controller Once the command ring doorbell is rung the xHC controller will parse all command TRBs on the command ring that have the cycle bit set properly. If the driver just started writing the next command TRB to the ring when hardware finished the previous TRB, then HW might fetch an incomplete TRB as long as its cycle bit set correctly. A command TRB is 16 bytes (128 bits) long. Driver writes the command TRB in four 32 bit chunks, with the chunk containing the cycle bit last. This does however not guarantee that chunks actually get written in that order. This was detected in stress testing when canceling URBs with several connected USB devices. Two consecutive "Set TR Dequeue pointer" commands got queued right after each other, and the second one was only partially written when the controller parsed it, causing the dequeue pointer to be set to bogus values. This was seen as error messages: "Mismatch between completed Set TR Deq Ptr command & xHCI internal state" Solution is to add a write memory barrier before writing the cycle bit. Cc: Tested-by: Ross Zwisler Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210115161907.2875631-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 5677b81c09155..cf0c93a90200f 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2931,6 +2931,8 @@ static void queue_trb(struct xhci_hcd *xhci, struct xhci_ring *ring, trb->field[0] = cpu_to_le32(field1); trb->field[1] = cpu_to_le32(field2); trb->field[2] = cpu_to_le32(field3); + /* make sure TRB is fully written before giving it to the controller */ + wmb(); trb->field[3] = cpu_to_le32(field4); trace_xhci_queue_trb(ring, trb); -- GitLab From da7e0c3c2909a3d9bf8acfe1db3cb213bd7febfb Mon Sep 17 00:00:00 2001 From: JC Kuo Date: Fri, 15 Jan 2021 18:19:07 +0200 Subject: [PATCH 0882/2012] xhci: tegra: Delay for disabling LFPS detector Occasionally, we are seeing some SuperSpeed devices resumes right after being directed to U3. This commits add 500us delay to ensure LFPS detector is disabled before sending ACK to firmware. [ 16.099363] tegra-xusb 70090000.usb: entering ELPG [ 16.104343] tegra-xusb 70090000.usb: 2-1 isn't suspended: 0x0c001203 [ 16.114576] tegra-xusb 70090000.usb: not all ports suspended: -16 [ 16.120789] tegra-xusb 70090000.usb: entering ELPG failed The register write passes through a few flop stages of 32KHz clock domain. NVIDIA ASIC designer reviewed RTL and suggests 500us delay. Cc: stable@vger.kernel.org Signed-off-by: JC Kuo Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210115161907.2875631-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-tegra.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index 934be16863523..50bb91b6a4b8d 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -623,6 +623,13 @@ static void tegra_xusb_mbox_handle(struct tegra_xusb *tegra, enable); if (err < 0) break; + + /* + * wait 500us for LFPS detector to be disabled before + * sending ACK + */ + if (!enable) + usleep_range(500, 1000); } if (err < 0) { -- GitLab From 1740e6736922cc1a5d061cc4240d08eacfbbaa71 Mon Sep 17 00:00:00 2001 From: Sumera Priyadarsini Date: Tue, 12 Jan 2021 10:25:49 +0100 Subject: [PATCH 0883/2012] bus: arm-integrator-lm: Add of_node_put() before return statement Every iteration of for_each_available_child_of_node() decrements the reference count of the previous node, however when control is transferred from the middle of the loop, as in the case of a return or break or goto, there is no decrement thus ultimately resulting in a memory leak. Fix a potential memory leak in arm-integrator-lm.c by inserting of_node_put() before a return statement. Issue found with Coccinelle. Signed-off-by: Sumera Priyadarsini Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20200829174154.GA9319@Kaladin Link: https://lore.kernel.org/r/20210112092549.251548-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann --- drivers/bus/arm-integrator-lm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bus/arm-integrator-lm.c b/drivers/bus/arm-integrator-lm.c index 845b6c43fef89..2344d560b1449 100644 --- a/drivers/bus/arm-integrator-lm.c +++ b/drivers/bus/arm-integrator-lm.c @@ -54,6 +54,7 @@ static int integrator_lm_populate(int num, struct device *dev) ret = of_platform_default_populate(child, NULL, dev); if (ret) { dev_err(dev, "failed to populate module\n"); + of_node_put(child); return ret; } } -- GitLab From 2004e62a2a06b9029f7d728a0b5d19a499569184 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Thu, 7 Jan 2021 07:22:28 -0600 Subject: [PATCH 0884/2012] arm64: defconfig: Drop unused K3 SoC specific options With [1] integrated and all users of the config symbols removed, we can safely remove the options from defconfig. [1] https://patchwork.kernel.org/project/linux-arm-kernel/patch/20201026170624.24241-1-nm@ti.com/ Signed-off-by: Nishanth Menon Link: https://lore.kernel.org/r/20210107132228.6577-1-nm@ti.com' Signed-off-by: Arnd Bergmann --- arch/arm64/configs/defconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 838301650a794..a0bcf02012614 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -991,8 +991,6 @@ CONFIG_ARCH_TEGRA_210_SOC=y CONFIG_ARCH_TEGRA_186_SOC=y CONFIG_ARCH_TEGRA_194_SOC=y CONFIG_ARCH_TEGRA_234_SOC=y -CONFIG_ARCH_K3_AM6_SOC=y -CONFIG_ARCH_K3_J721E_SOC=y CONFIG_TI_SCI_PM_DOMAINS=y CONFIG_EXTCON_PTN5150=m CONFIG_EXTCON_USB_GPIO=y -- GitLab From 8a996b2d8a03beae3cb6adfc12673778c192085d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 13 Dec 2020 23:55:17 +0100 Subject: [PATCH 0885/2012] ARM: dts: ux500: Reserve memory carveouts The Ux500 platforms have some memory carveouts set aside for communicating with the modem and for the initial secure software (ISSW). These areas are protected by the memory controller and will result in an external abort if accessed like common read/write memory. On the legacy boot loaders, these were set aside by using cmdline arguments such as this: mem=96M@0 mem_mtrace=15M@96M mem_mshared=1M@111M mem_modem=16M@112M mali.mali_mem=32M@128M mem=96M@160M hwmem=127M@256M mem_issw=1M@383M mem_ram_console=1M@384M mem=638M@385M Reserve the relevant areas in the device tree instead. The "mali", "hwmem", "mem_ram_console" and the trailing 1MB at the end of the memory reservations in the list are not relevant for the upstream kernel as these are nowadays replaced with upstream technologies such as CMA. The modem and ISSW reservations are necessary. This was manifested in a bug that surfaced in response to commit 7fef431be9c9 ("mm/page_alloc: place pages to tail in __free_pages_core()") which changes the behaviour of memory allocations in such a way that the platform will sooner run into these dangerous areas, with "Unhandled fault: imprecise external abort (0xc06) at 0xb6fd83dc" or similar: the real reason turns out to be that the PTE is pointing right into one of the reserved memory areas. We were just lucky until now. We need to augment the DB8500 and DB8520 SoCs similarly and also create a new include for the DB9500 used in the Snowball since this does not have a modem and thus does not need the modem memory reservation, albeit it needs the ISSW reservation. Signed-off-by: Linus Walleij Cc: stable@vger.kernel.org Cc: David Hildenbrand Link: https://lore.kernel.org/r/20201213225517.3838501-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/ste-db8500.dtsi | 38 ++++++++++++++++++++++++++++++ arch/arm/boot/dts/ste-db8520.dtsi | 38 ++++++++++++++++++++++++++++++ arch/arm/boot/dts/ste-db9500.dtsi | 35 +++++++++++++++++++++++++++ arch/arm/boot/dts/ste-snowball.dts | 2 +- 4 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 arch/arm/boot/dts/ste-db9500.dtsi diff --git a/arch/arm/boot/dts/ste-db8500.dtsi b/arch/arm/boot/dts/ste-db8500.dtsi index d309fad32229d..344d29853bf76 100644 --- a/arch/arm/boot/dts/ste-db8500.dtsi +++ b/arch/arm/boot/dts/ste-db8500.dtsi @@ -12,4 +12,42 @@ 200000 0>; }; }; + + reserved-memory { + #address-cells = <1>; + #size-cells = <1>; + ranges; + + /* Modem trace memory */ + ram@06000000 { + reg = <0x06000000 0x00f00000>; + no-map; + }; + + /* Modem shared memory */ + ram@06f00000 { + reg = <0x06f00000 0x00100000>; + no-map; + }; + + /* Modem private memory */ + ram@07000000 { + reg = <0x07000000 0x01000000>; + no-map; + }; + + /* + * Initial Secure Software ISSW memory + * + * This is probably only used if the kernel tries + * to actually call into trustzone to run secure + * applications, which the mainline kernel probably + * will not do on this old chipset. But you can never + * be too careful, so reserve this memory anyway. + */ + ram@17f00000 { + reg = <0x17f00000 0x00100000>; + no-map; + }; + }; }; diff --git a/arch/arm/boot/dts/ste-db8520.dtsi b/arch/arm/boot/dts/ste-db8520.dtsi index 48bd8728ae27f..287804e9e1836 100644 --- a/arch/arm/boot/dts/ste-db8520.dtsi +++ b/arch/arm/boot/dts/ste-db8520.dtsi @@ -12,4 +12,42 @@ 200000 0>; }; }; + + reserved-memory { + #address-cells = <1>; + #size-cells = <1>; + ranges; + + /* Modem trace memory */ + ram@06000000 { + reg = <0x06000000 0x00f00000>; + no-map; + }; + + /* Modem shared memory */ + ram@06f00000 { + reg = <0x06f00000 0x00100000>; + no-map; + }; + + /* Modem private memory */ + ram@07000000 { + reg = <0x07000000 0x01000000>; + no-map; + }; + + /* + * Initial Secure Software ISSW memory + * + * This is probably only used if the kernel tries + * to actually call into trustzone to run secure + * applications, which the mainline kernel probably + * will not do on this old chipset. But you can never + * be too careful, so reserve this memory anyway. + */ + ram@17f00000 { + reg = <0x17f00000 0x00100000>; + no-map; + }; + }; }; diff --git a/arch/arm/boot/dts/ste-db9500.dtsi b/arch/arm/boot/dts/ste-db9500.dtsi new file mode 100644 index 0000000000000..0afff703191c6 --- /dev/null +++ b/arch/arm/boot/dts/ste-db9500.dtsi @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "ste-dbx5x0.dtsi" + +/ { + cpus { + cpu@300 { + /* cpufreq controls */ + operating-points = <1152000 0 + 800000 0 + 400000 0 + 200000 0>; + }; + }; + + reserved-memory { + #address-cells = <1>; + #size-cells = <1>; + ranges; + + /* + * Initial Secure Software ISSW memory + * + * This is probably only used if the kernel tries + * to actually call into trustzone to run secure + * applications, which the mainline kernel probably + * will not do on this old chipset. But you can never + * be too careful, so reserve this memory anyway. + */ + ram@17f00000 { + reg = <0x17f00000 0x00100000>; + no-map; + }; + }; +}; diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts index be90e73c923ec..27d8a07718a00 100644 --- a/arch/arm/boot/dts/ste-snowball.dts +++ b/arch/arm/boot/dts/ste-snowball.dts @@ -4,7 +4,7 @@ */ /dts-v1/; -#include "ste-db8500.dtsi" +#include "ste-db9500.dtsi" #include "ste-href-ab8500.dtsi" #include "ste-href-family-pinctrl.dtsi" -- GitLab From f010505b78a4fa8d5b6480752566e7313fb5ca6e Mon Sep 17 00:00:00 2001 From: Marcelo Diop-Gonzalez Date: Fri, 15 Jan 2021 11:54:40 -0500 Subject: [PATCH 0886/2012] io_uring: flush timeouts that should already have expired Right now io_flush_timeouts() checks if the current number of events is equal to ->timeout.target_seq, but this will miss some timeouts if there have been more than 1 event added since the last time they were flushed (possible in io_submit_flush_completions(), for example). Fix it by recording the last sequence at which timeouts were flushed so that the number of events seen can be compared to the number of events needed without overflow. Signed-off-by: Marcelo Diop-Gonzalez Reviewed-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 372be9caf3402..06cc79d39586d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -354,6 +354,7 @@ struct io_ring_ctx { unsigned cq_entries; unsigned cq_mask; atomic_t cq_timeouts; + unsigned cq_last_tm_flush; unsigned long cq_check_overflow; struct wait_queue_head cq_wait; struct fasync_struct *cq_fasync; @@ -1639,19 +1640,38 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx) static void io_flush_timeouts(struct io_ring_ctx *ctx) { - while (!list_empty(&ctx->timeout_list)) { + u32 seq; + + if (list_empty(&ctx->timeout_list)) + return; + + seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + + do { + u32 events_needed, events_got; struct io_kiocb *req = list_first_entry(&ctx->timeout_list, struct io_kiocb, timeout.list); if (io_is_timeout_noseq(req)) break; - if (req->timeout.target_seq != ctx->cached_cq_tail - - atomic_read(&ctx->cq_timeouts)) + + /* + * Since seq can easily wrap around over time, subtract + * the last seq at which timeouts were flushed before comparing. + * Assuming not more than 2^31-1 events have happened since, + * these subtractions won't have wrapped, so we can check if + * target is in [last_seq, current_seq] by comparing the two. + */ + events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush; + events_got = seq - ctx->cq_last_tm_flush; + if (events_got < events_needed) break; list_del_init(&req->timeout.list); io_kill_timeout(req); - } + } while (!list_empty(&ctx->timeout_list)); + + ctx->cq_last_tm_flush = seq; } static void io_commit_cqring(struct io_ring_ctx *ctx) @@ -5837,6 +5857,12 @@ static int io_timeout(struct io_kiocb *req) tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); req->timeout.target_seq = tail + off; + /* Update the last seq here in case io_flush_timeouts() hasn't. + * This is safe because ->completion_lock is held, and submissions + * and completions are never mixed in the same ->completion_lock section. + */ + ctx->cq_last_tm_flush = tail; + /* * Insertion sort, ensuring the first entry in the list is always * the one we need first. -- GitLab From 9b268be3adbb410cc1a857477b638a71258891a8 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 15 Jan 2021 16:55:19 +0000 Subject: [PATCH 0887/2012] MAINTAINERS: update maintainers of qcom audio Add myself as maintainer of qcom audio drivers, as Patrick has very little time to look at the patches. Signed-off-by: Srinivas Kandagatla Reviewed-by: Banajit Goswami Acked-by: Patrick Lai Link: https://lore.kernel.org/r/20210115165520.6023-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index bff306838d571..3fc059ec786ac 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14510,7 +14510,7 @@ S: Supported F: drivers/crypto/qat/ QCOM AUDIO (ASoC) DRIVERS -M: Patrick Lai +M: Srinivas Kandagatla M: Banajit Goswami L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported -- GitLab From 7505c06dabb5e814bda610c8d83338544f15db45 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 15 Jan 2021 16:55:20 +0000 Subject: [PATCH 0888/2012] MAINTAINERS: update qcom ASoC drivers list Add full list of ASoC drivers that are maintained! Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210115165520.6023-2-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3fc059ec786ac..f32dcf49c27cd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14514,6 +14514,14 @@ M: Srinivas Kandagatla M: Banajit Goswami L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported +F: sound/soc/codecs/lpass-va-macro.c +F: sound/soc/codecs/lpass-wsa-macro.* +F: sound/soc/codecs/msm8916-wcd-analog.c +F: sound/soc/codecs/msm8916-wcd-digital.c +F: sound/soc/codecs/wcd9335.* +F: sound/soc/codecs/wcd934x.c +F: sound/soc/codecs/wcd-clsh-v2.* +F: sound/soc/codecs/wsa881x.c F: sound/soc/qcom/ QCOM IPA DRIVER -- GitLab From 301f0203e04293c13372c032198665bd75adf81b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 29 Dec 2020 15:41:19 -0300 Subject: [PATCH 0889/2012] perf bpf examples: Fix bpf.h header include directive in 5sec.c example It was looking at bpf/bpf.h, which caused this problem: # perf trace -e tools/perf/examples/bpf/5sec.c /home/acme/git/perf/tools/perf/examples/bpf/5sec.c:42:10: fatal error: 'bpf/bpf.h' file not found #include ^~~~~~~~~~~ 1 error generated. ERROR: unable to compile tools/perf/examples/bpf/5sec.c Hint: Check error message shown above. Hint: You can also pre-compile it into .o using: clang -target bpf -O2 -c tools/perf/examples/bpf/5sec.c with proper -I and -D options. event syntax error: 'tools/perf/examples/bpf/5sec.c' \___ Failed to load tools/perf/examples/bpf/5sec.c from source: Error when compiling BPF scriptlet # Change that to plain bpf.h, to make it work again: # perf trace -e tools/perf/examples/bpf/5sec.c sleep 5s 0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1776891872, rqtp: 5000000000) # perf trace -e tools/perf/examples/bpf/5sec.c/max-stack=16/ sleep 5s 0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1776891872, rqtp: 5000000000) hrtimer_nanosleep ([kernel.kallsyms]) common_nsleep ([kernel.kallsyms]) __x64_sys_clock_nanosleep ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __clock_nanosleep_2 (/usr/lib64/libc-2.32.so) # perf trace -e tools/perf/examples/bpf/5sec.c sleep 4s # Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/examples/bpf/5sec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c index 65c4ff6892d96..e6b6181c6dc63 100644 --- a/tools/perf/examples/bpf/5sec.c +++ b/tools/perf/examples/bpf/5sec.c @@ -39,7 +39,7 @@ Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo */ -#include +#include #define NSEC_PER_SEC 1000000000L -- GitLab From 38c53947a7dcb6d295769830c9085b0409921ec9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2021 12:26:08 -0300 Subject: [PATCH 0890/2012] tools headers UAPI: Sync kvm.h headers with the kernel sources To pick the changes in: 647daca25d24fb6e ("KVM: SVM: Add support for booting APs in an SEV-ES guest") That don't cause any tooling change, just silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Tom Lendacky Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 886802b8ffba3..374c67875cdbd 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -251,6 +251,7 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_RDMSR 29 #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_DIRTY_RING_FULL 31 +#define KVM_EXIT_AP_RESET_HOLD 32 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -573,6 +574,7 @@ struct kvm_vapic_addr { #define KVM_MP_STATE_CHECK_STOP 6 #define KVM_MP_STATE_OPERATING 7 #define KVM_MP_STATE_LOAD 8 +#define KVM_MP_STATE_AP_RESET_HOLD 9 struct kvm_mp_state { __u32 mp_state; -- GitLab From addbdff24293ef772a1b8e5d127b570e70f08cdc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Jan 2021 12:35:27 -0300 Subject: [PATCH 0891/2012] tools headers: Syncronize linux/build_bug.h with the kernel sources To pick up the changes in: 3a176b94609a18f5 ("Revert "kbuild: avoid static_assert for genksyms"") And silence this perf build warning: Warning: Kernel ABI header at 'tools/include/linux/build_bug.h' differs from latest version at 'include/linux/build_bug.h' diff -u tools/include/linux/build_bug.h include/linux/build_bug.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Masahiro Yamada Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/build_bug.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index ce365d2127682..cc7070c7439ba 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -79,9 +79,4 @@ #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) #endif // static_assert -#ifdef __GENKSYMS__ -/* genksyms gets confused by _Static_assert */ -#define _Static_assert(expr, ...) -#endif - #endif /* _LINUX_BUILD_BUG_H */ -- GitLab From a042a82ddbb3434f523c0671f5301d1fe796b4eb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 14 Jan 2021 14:06:09 +0900 Subject: [PATCH 0892/2012] perf test: Fix shadow stat test for non-bash shells It was using some bash-specific features and failed to parse when running with a different shell like below: root@kbl-ppc:~/kbl-ws/perf-dev/lck-9077/acme.tmp/tools/perf# ./perf test 83 -vv 83: perf stat metrics (shadow stat) test : --- start --- test child forked, pid 3922 ./tests/shell/stat+shadow_stat.sh: 19: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 24: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 30: ./tests/shell/stat+shadow_stat.sh: [[: not found (standard_in) 2: syntax error ./tests/shell/stat+shadow_stat.sh: 36: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 19: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 24: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 30: ./tests/shell/stat+shadow_stat.sh: [[: not found (standard_in) 2: syntax error ./tests/shell/stat+shadow_stat.sh: 36: ./tests/shell/stat+shadow_stat.sh: [[: not found ./tests/shell/stat+shadow_stat.sh: 45: ./tests/shell/stat+shadow_stat.sh: declare: not found test child finished with -1 ---- end ---- perf stat metrics (shadow stat) test: FAILED! Reported-by: Jin Yao Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Laight Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114050609.1258820-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat+shadow_stat.sh | 30 ++++++++++------------ 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh index 249dfe48cf6ad..ebebd3596cf99 100755 --- a/tools/perf/tests/shell/stat+shadow_stat.sh +++ b/tools/perf/tests/shell/stat+shadow_stat.sh @@ -9,31 +9,29 @@ perf stat -a true > /dev/null 2>&1 || exit 2 test_global_aggr() { - local cyc - perf stat -a --no-big-num -e cycles,instructions sleep 1 2>&1 | \ grep -e cycles -e instructions | \ while read num evt hash ipc rest do # skip not counted events - if [[ $num == "&1 | \ grep ^CPU | \ while read cpu num evt hash ipc rest do # skip not counted events - if [[ $num == " Date: Fri, 15 Jan 2021 06:10:04 -0800 Subject: [PATCH 0893/2012] octeontx2-af: Fix missing check bugs in rvu_cgx.c In rvu_mbox_handler_cgx_mac_addr_get() and rvu_mbox_handler_cgx_mac_addr_set(), the msg is expected only from PFs that are mapped to CGX LMACs. It should be checked before mapping, so we add the is_cgx_config_permitted() in the functions. Fixes: 96be2e0da85e ("octeontx2-af: Support for MAC address filters in CGX") Signed-off-by: Yingjie Wang Reviewed-by: Geetha sowjanya Link: https://lore.kernel.org/r/1610719804-35230-1-git-send-email-wangyingjie55@126.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index d298b93571778..6c6b411e78fd8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -469,6 +469,9 @@ int rvu_mbox_handler_cgx_mac_addr_set(struct rvu *rvu, int pf = rvu_get_pf(req->hdr.pcifunc); u8 cgx_id, lmac_id; + if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) + return -EPERM; + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); cgx_lmac_addr_set(cgx_id, lmac_id, req->mac_addr); @@ -485,6 +488,9 @@ int rvu_mbox_handler_cgx_mac_addr_get(struct rvu *rvu, int rc = 0, i; u64 cfg; + if (!is_cgx_config_permitted(rvu, req->hdr.pcifunc)) + return -EPERM; + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); rsp->hdr.rc = rc; -- GitLab From 23dd561ad9eae02b4d51bb502fe4e1a0666e9567 Mon Sep 17 00:00:00 2001 From: Yi Li Date: Wed, 30 Dec 2020 11:38:27 +0800 Subject: [PATCH 0894/2012] ext4: use IS_ERR instead of IS_ERR_OR_NULL and set inode null when IS_ERR 1: ext4_iget/ext4_find_extent never returns NULL, use IS_ERR instead of IS_ERR_OR_NULL to fix this. 2: ext4_fc_replay_inode should set the inode to NULL when IS_ERR. and go to call iput properly. Fixes: 8016e29f4362 ("ext4: fast commit recovery path") Signed-off-by: Yi Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20201230033827.3996064-1-yili@winhong.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/fast_commit.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 4fcc21c25e793..6b5489273c859 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1318,14 +1318,14 @@ static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl) entry.len = darg.dname_len; inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode %d not found", darg.ino); return 0; } old_parent = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(old_parent)) { + if (IS_ERR(old_parent)) { jbd_debug(1, "Dir with inode %d not found", darg.parent_ino); iput(inode); return 0; @@ -1410,7 +1410,7 @@ static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl) darg.parent_ino, darg.dname_len); inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode not found."); return 0; } @@ -1466,10 +1466,11 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl) trace_ext4_fc_replay(sb, tag, ino, 0, 0); inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); - if (!IS_ERR_OR_NULL(inode)) { + if (!IS_ERR(inode)) { ext4_ext_clear_bb(inode); iput(inode); } + inode = NULL; ext4_fc_record_modified_inode(sb, ino); @@ -1512,7 +1513,7 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl) /* Given that we just wrote the inode on disk, this SHOULD succeed. */ inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode not found."); return -EFSCORRUPTED; } @@ -1564,7 +1565,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl) goto out; inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "inode %d not found.", darg.ino); inode = NULL; ret = -EINVAL; @@ -1577,7 +1578,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl) * dot and dot dot dirents are setup properly. */ dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(dir)) { + if (IS_ERR(dir)) { jbd_debug(1, "Dir %d not found.", darg.ino); goto out; } @@ -1653,7 +1654,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb, inode = ext4_iget(sb, le32_to_cpu(fc_add_ex->fc_ino), EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode not found."); return 0; } @@ -1777,7 +1778,7 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl) le32_to_cpu(lrange->fc_ino), cur, remaining); inode = ext4_iget(sb, le32_to_cpu(lrange->fc_ino), EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange->fc_ino)); return 0; } @@ -1832,7 +1833,7 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) for (i = 0; i < state->fc_modified_inodes_used; i++) { inode = ext4_iget(sb, state->fc_modified_inodes[i], EXT4_IGET_NORMAL); - if (IS_ERR_OR_NULL(inode)) { + if (IS_ERR(inode)) { jbd_debug(1, "Inode %d not found.", state->fc_modified_inodes[i]); continue; @@ -1849,7 +1850,7 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb) if (ret > 0) { path = ext4_find_extent(inode, map.m_lblk, NULL, 0); - if (!IS_ERR_OR_NULL(path)) { + if (!IS_ERR(path)) { for (j = 0; j < path->p_depth; j++) ext4_mb_mark_bb(inode->i_sb, path[j].p_block, 1, 1); -- GitLab From 31e203e09f036f48e7c567c2d32df0196bbd303f Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Wed, 30 Dec 2020 18:48:51 +0900 Subject: [PATCH 0895/2012] ext4: fix wrong list_splice in ext4_fc_cleanup After full/fast commit, entries in staging queue are promoted to main queue. In ext4_fs_cleanup function, it splice to staging queue to staging queue. Fixes: aa75f4d3daaeb ("ext4: main fast-commit commit path") Signed-off-by: Daejun Park Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20201230094851epcms2p6eeead8cc984379b37b2efd21af90fd1a@epcms2p6 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/fast_commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 6b5489273c859..ec5316fbcd1d4 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1268,7 +1268,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full) list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING], &sbi->s_fc_dentry_q[FC_Q_MAIN]); list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], - &sbi->s_fc_q[FC_Q_STAGING]); + &sbi->s_fc_q[FC_Q_MAIN]); ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); -- GitLab From 6b4b8e6b4ad8553660421d6360678b3811d5deb9 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Tue, 5 Jan 2021 14:28:57 +0800 Subject: [PATCH 0896/2012] ext4: fix bug for rename with RENAME_WHITEOUT We got a "deleted inode referenced" warning cross our fsstress test. The bug can be reproduced easily with following steps: cd /dev/shm mkdir test/ fallocate -l 128M img mkfs.ext4 -b 1024 img mount img test/ dd if=/dev/zero of=test/foo bs=1M count=128 mkdir test/dir/ && cd test/dir/ for ((i=0;i<1000;i++)); do touch file$i; done # consume all block cd ~ && renameat2(AT_FDCWD, /dev/shm/test/dir/file1, AT_FDCWD, /dev/shm/test/dir/dst_file, RENAME_WHITEOUT) # ext4_add_entry in ext4_rename will return ENOSPC!! cd /dev/shm/ && umount test/ && mount img test/ && ls -li test/dir/file1 We will get the output: "ls: cannot access 'test/dir/file1': Structure needs cleaning" and the dmesg show: "EXT4-fs error (device loop0): ext4_lookup:1626: inode #2049: comm ls: deleted inode referenced: 139" ext4_rename will create a special inode for whiteout and use this 'ino' to replace the source file's dir entry 'ino'. Once error happens latter(the error above was the ENOSPC return from ext4_add_entry in ext4_rename since all space has been consumed), the cleanup do drop the nlink for whiteout, but forget to restore 'ino' with source file. This will trigger the bug describle as above. Signed-off-by: yangerkun Reviewed-by: Jan Kara Cc: stable@vger.kernel.org Fixes: cd808deced43 ("ext4: support RENAME_WHITEOUT") Link: https://lore.kernel.org/r/20210105062857.3566-1-yangerkun@huawei.com Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a3b28ef2455a8..fa625a247e9a3 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3601,9 +3601,6 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, return retval2; } } - brelse(ent->bh); - ent->bh = NULL; - return retval; } @@ -3802,6 +3799,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, } } + old_file_type = old.de->file_type; if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) ext4_handle_sync(handle); @@ -3829,7 +3827,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, force_reread = (new.dir->i_ino == old.dir->i_ino && ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); - old_file_type = old.de->file_type; if (whiteout) { /* * Do this before adding a new entry, so the old entry is sure @@ -3927,15 +3924,19 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, retval = 0; end_rename: - brelse(old.dir_bh); - brelse(old.bh); - brelse(new.bh); if (whiteout) { - if (retval) + if (retval) { + ext4_setent(handle, &old, + old.inode->i_ino, old_file_type); drop_nlink(whiteout); + } unlock_new_inode(whiteout); iput(whiteout); + } + brelse(old.dir_bh); + brelse(old.bh); + brelse(new.bh); if (handle) ext4_journal_stop(handle); return retval; -- GitLab From e9f53353e166a67dfe4f8295100f8ac39d6cf10b Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Wed, 6 Jan 2021 10:32:42 +0900 Subject: [PATCH 0897/2012] ext4: remove expensive flush on fast commit In the fast commit, it adds REQ_FUA and REQ_PREFLUSH on each fast commit block when barrier is enabled. However, in recovery phase, ext4 compares CRC value in the tail. So it is sufficient to add REQ_FUA and REQ_PREFLUSH on the block that has tail. Signed-off-by: Daejun Park Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20210106013242epcms2p5b6b4ed8ca86f29456fdf56aa580e74b4@epcms2p5 Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index ec5316fbcd1d4..0a14a7c87bf82 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -604,13 +604,13 @@ void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t star trace_ext4_fc_track_range(inode, start, end, ret); } -static void ext4_fc_submit_bh(struct super_block *sb) +static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) { int write_flags = REQ_SYNC; struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh; - /* TODO: REQ_FUA | REQ_PREFLUSH is unnecessarily expensive. */ - if (test_opt(sb, BARRIER)) + /* Add REQ_FUA | REQ_PREFLUSH only its tail */ + if (test_opt(sb, BARRIER) && is_tail) write_flags |= REQ_FUA | REQ_PREFLUSH; lock_buffer(bh); set_buffer_dirty(bh); @@ -684,7 +684,7 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl)); if (pad_len > 0) ext4_fc_memzero(sb, tl + 1, pad_len, crc); - ext4_fc_submit_bh(sb); + ext4_fc_submit_bh(sb, false); ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); if (ret) @@ -741,7 +741,7 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc) tail.fc_crc = cpu_to_le32(crc); ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL); - ext4_fc_submit_bh(sb); + ext4_fc_submit_bh(sb, true); return 0; } -- GitLab From be82fddca81eefd1edbd9b290dfcb2177e24785b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 14 Jan 2021 13:23:04 -0800 Subject: [PATCH 0898/2012] libperf tests: Avoid uninitialized variable warning The variable 'bf' is read (for a write call) without being initialized triggering a memory sanitizer warning. Use 'bf' in the read and switch the write to reading from a string. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114212304.4018119-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/tests/test-evlist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 6d8ebe0c25042..1b225fe34a722 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -208,7 +208,6 @@ static int test_mmap_thread(void) char path[PATH_MAX]; int id, err, pid, go_pipe[2]; union perf_event *event; - char bf; int count = 0; snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id", @@ -229,6 +228,7 @@ static int test_mmap_thread(void) pid = fork(); if (!pid) { int i; + char bf; read(go_pipe[0], &bf, 1); @@ -266,7 +266,7 @@ static int test_mmap_thread(void) perf_evlist__enable(evlist); /* kick the child and wait for it to finish */ - write(go_pipe[1], &bf, 1); + write(go_pipe[1], "A", 1); waitpid(pid, NULL, 0); /* -- GitLab From bba2ea17ef553aea0df80cb64399fe2f70f225dd Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 14 Jan 2021 10:02:49 -0800 Subject: [PATCH 0899/2012] libperf tests: If a test fails return non-zero If a test fails return -1 rather than 0. This is consistent with the return value in test-cpumap.c Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114180250.3853825-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/tests/test-cpumap.c | 2 +- tools/lib/perf/tests/test-evlist.c | 2 +- tools/lib/perf/tests/test-evsel.c | 2 +- tools/lib/perf/tests/test-threadmap.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c index c8d45091e7c26..c70e9e03af3e9 100644 --- a/tools/lib/perf/tests/test-cpumap.c +++ b/tools/lib/perf/tests/test-cpumap.c @@ -27,5 +27,5 @@ int main(int argc, char **argv) perf_cpu_map__put(cpus); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 1b225fe34a722..220a95be47c34 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -409,5 +409,5 @@ int main(int argc, char **argv) test_mmap_cpus(); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 135722ac965bf..0ad82d7a2a51b 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -131,5 +131,5 @@ int main(int argc, char **argv) test_stat_thread_enable(); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c index 7dc4d6fbeddee..384471441b484 100644 --- a/tools/lib/perf/tests/test-threadmap.c +++ b/tools/lib/perf/tests/test-threadmap.c @@ -27,5 +27,5 @@ int main(int argc, char **argv) perf_thread_map__put(threads); __T_END; - return 0; + return tests_failed == 0 ? 0 : -1; } -- GitLab From 66dd86b2a2bee129c70f7ff054d3a6a2e5f8eb20 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 14 Jan 2021 10:02:50 -0800 Subject: [PATCH 0900/2012] libperf tests: Fail when failing to get a tracepoint id Permissions are necessary to get a tracepoint id. Fail the test when the read fails. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210114180250.3853825-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/tests/test-evlist.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index 220a95be47c34..e2ac0b7f432ea 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -214,6 +214,7 @@ static int test_mmap_thread(void) sysfs__mountpoint()); if (filename__read_int(path, &id)) { + tests_failed++; fprintf(stderr, "error: failed to get tracepoint id: %s\n", path); return -1; } -- GitLab From 3ff1e7180abc7f6db413933c110df69157216715 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 15 Jan 2021 16:11:38 +0900 Subject: [PATCH 0901/2012] perf stat: Introduce struct runtime_stat_data To pass more info to the saved_value in the runtime_stat, add a new struct runtime_stat_data. Currently it only has 'ctx' field but later patch will add more. Note that we intentionally pass 0 as ctx to clock-related events for compatibility. It was already there in a few places. So move the code into the saved_value_lookup() explicitly and add a comment. Suggested-by: Andi Kleen Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jin Yao Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210115071139.257042-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 346 +++++++++++++++++----------------- 1 file changed, 173 insertions(+), 173 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 901265127e369..a1565b6e38f27 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -114,6 +114,10 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, rblist = &st->value_list; + /* don't use context info for clock events */ + if (type == STAT_NSECS) + dm.ctx = 0; + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); @@ -191,12 +195,17 @@ void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) reset_stat(st); } +struct runtime_stat_data { + int ctx; +}; + static void update_runtime_stat(struct runtime_stat *st, enum stat_type type, - int ctx, int cpu, u64 count) + int cpu, u64 count, + struct runtime_stat_data *rsd) { - struct saved_value *v = saved_value_lookup(NULL, cpu, true, - type, ctx, st); + struct saved_value *v = saved_value_lookup(NULL, cpu, true, type, + rsd->ctx, st); if (v) update_stats(&v->stats, count); @@ -210,73 +219,75 @@ static void update_runtime_stat(struct runtime_stat *st, void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, int cpu, struct runtime_stat *st) { - int ctx = evsel_context(counter); u64 count_ns = count; struct saved_value *v; + struct runtime_stat_data rsd = { + .ctx = evsel_context(counter), + }; count *= counter->scale; if (evsel__is_clock(counter)) - update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); + update_runtime_stat(st, STAT_NSECS, cpu, count_ns, &rsd); else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); + update_runtime_stat(st, STAT_CYCLES, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); + update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); + update_runtime_stat(st, STAT_TRANSACTION, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); + update_runtime_stat(st, STAT_ELISION, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING)) update_runtime_stat(st, STAT_TOPDOWN_RETIRING, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC)) update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND, - ctx, cpu, count); + cpu, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND, - ctx, cpu, count); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, - ctx, cpu, count); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, - ctx, cpu, count); + cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); + update_runtime_stat(st, STAT_BRANCHES, cpu, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); + update_runtime_stat(st, STAT_CACHEREFS, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_L1_DCACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_L1_ICACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_LL_CACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_DTLB_CACHE, cpu, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); + update_runtime_stat(st, STAT_ITLB_CACHE, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); + update_runtime_stat(st, STAT_SMI_NUM, cpu, count, &rsd); else if (perf_stat_evsel__is(counter, APERF)) - update_runtime_stat(st, STAT_APERF, ctx, cpu, count); + update_runtime_stat(st, STAT_APERF, cpu, count, &rsd); if (counter->collect_stat) { v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); @@ -422,11 +433,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) } static double runtime_stat_avg(struct runtime_stat *st, - enum stat_type type, int ctx, int cpu) + enum stat_type type, int cpu, + struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); if (!v) return 0.0; @@ -434,11 +446,12 @@ static double runtime_stat_avg(struct runtime_stat *st, } static double runtime_stat_n(struct runtime_stat *st, - enum stat_type type, int ctx, int cpu) + enum stat_type type, int cpu, + struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); if (!v) return 0.0; @@ -446,16 +459,15 @@ static double runtime_stat_n(struct runtime_stat *st, } static void print_stalled_cycles_frontend(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -470,16 +482,15 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config, } static void print_stalled_cycles_backend(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -490,17 +501,15 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config, } static void print_branch_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); + total = runtime_stat_avg(st, STAT_BRANCHES, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -511,18 +520,15 @@ static void print_branch_misses(struct perf_stat_config *config, } static void print_l1_dcache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) - + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -533,18 +539,15 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, } static void print_l1_icache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) - + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -554,17 +557,15 @@ static void print_l1_icache_misses(struct perf_stat_config *config, } static void print_dtlb_cache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -574,17 +575,15 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, } static void print_itlb_cache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -594,17 +593,15 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, } static void print_ll_cache_misses(struct perf_stat_config *config, - int cpu, - struct evsel *evsel, - double avg, + int cpu, double avg, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double total, ratio = 0.0; const char *color; - int ctx = evsel_context(evsel); - total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); + total = runtime_stat_avg(st, STAT_LL_CACHE, cpu, rsd); if (total) ratio = avg / total * 100.0; @@ -662,56 +659,61 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) +static double td_total_slots(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { - return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu, rsd); } -static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) +static double td_bad_spec(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { double bad_spec = 0; double total_slots; double total; - total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - - runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + - runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu, rsd) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu, rsd) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu, rsd); - total_slots = td_total_slots(ctx, cpu, st); + total_slots = td_total_slots(cpu, st, rsd); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int ctx, int cpu, struct runtime_stat *st) +static double td_retiring(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { double retiring = 0; - double total_slots = td_total_slots(ctx, cpu, st); + double total_slots = td_total_slots(cpu, st, rsd); double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, - ctx, cpu); + cpu, rsd); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) +static double td_fe_bound(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { double fe_bound = 0; - double total_slots = td_total_slots(ctx, cpu, st); + double total_slots = td_total_slots(cpu, st, rsd); double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, - ctx, cpu); + cpu, rsd); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) +static double td_be_bound(int cpu, struct runtime_stat *st, + struct runtime_stat_data *rsd) { - double sum = (td_fe_bound(ctx, cpu, st) + - td_bad_spec(ctx, cpu, st) + - td_retiring(ctx, cpu, st)); + double sum = (td_fe_bound(cpu, st, rsd) + + td_bad_spec(cpu, st, rsd) + + td_retiring(cpu, st, rsd)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); @@ -722,15 +724,15 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) * the ratios we need to recreate the sum. */ -static double td_metric_ratio(int ctx, int cpu, - enum stat_type type, - struct runtime_stat *stat) +static double td_metric_ratio(int cpu, enum stat_type type, + struct runtime_stat *stat, + struct runtime_stat_data *rsd) { - double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) + - runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) + - runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) + - runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu); - double d = runtime_stat_avg(stat, type, ctx, cpu); + double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd); + double d = runtime_stat_avg(stat, type, cpu, rsd); if (sum) return d / sum; @@ -742,34 +744,33 @@ static double td_metric_ratio(int ctx, int cpu, * We allow two missing. */ -static bool full_td(int ctx, int cpu, - struct runtime_stat *stat) +static bool full_td(int cpu, struct runtime_stat *stat, + struct runtime_stat_data *rsd) { int c = 0; - if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd) > 0) c++; return c >= 2; } -static void print_smi_cost(struct perf_stat_config *config, - int cpu, struct evsel *evsel, +static void print_smi_cost(struct perf_stat_config *config, int cpu, struct perf_stat_output_ctx *out, - struct runtime_stat *st) + struct runtime_stat *st, + struct runtime_stat_data *rsd) { double smi_num, aperf, cycles, cost = 0.0; - int ctx = evsel_context(evsel); const char *color = NULL; - smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); - aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); - cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu, rsd); + aperf = runtime_stat_avg(st, STAT_APERF, cpu, rsd); + cycles = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); if ((cycles == 0) || (aperf == 0)) return; @@ -930,12 +931,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric_t print_metric = out->print_metric; double total, ratio = 0.0, total2; const char *color = NULL; - int ctx = evsel_context(evsel); + struct runtime_stat_data rsd = { + .ctx = evsel_context(evsel), + }; struct metric_event *me; int num = 1; if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); if (total) { ratio = avg / total; @@ -945,12 +948,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); } - total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, - ctx, cpu); + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu, &rsd); total = max(total, runtime_stat_avg(st, STAT_STALLED_CYCLES_BACK, - ctx, cpu)); + cpu, &rsd)); if (total && avg) { out->new_line(config, ctxp); @@ -960,8 +962,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ratio); } } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) - print_branch_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_BRANCHES, cpu, &rsd) != 0) + print_branch_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -970,8 +972,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) - print_l1_dcache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_L1_DCACHE, cpu, &rsd) != 0) + print_l1_dcache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0); } else if ( @@ -980,8 +982,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) - print_l1_icache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_L1_ICACHE, cpu, &rsd) != 0) + print_l1_icache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0); } else if ( @@ -990,8 +992,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) - print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu, &rsd) != 0) + print_dtlb_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0); } else if ( @@ -1000,8 +1002,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) - print_itlb_cache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu, &rsd) != 0) + print_itlb_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0); } else if ( @@ -1010,27 +1012,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) - print_ll_cache_misses(config, cpu, evsel, avg, out, st); + if (runtime_stat_n(st, STAT_LL_CACHE, cpu, &rsd) != 0) + print_ll_cache_misses(config, cpu, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0); } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); + total = runtime_stat_avg(st, STAT_CACHEREFS, cpu, &rsd); if (total) ratio = avg * 100 / total; - if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, cpu, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); + print_stalled_cycles_frontend(config, cpu, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); + print_stalled_cycles_backend(config, cpu, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); + total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); if (total) { ratio = avg / total; @@ -1039,7 +1041,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); if (total) print_metric(config, ctxp, NULL, @@ -1049,8 +1051,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); - total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (total2 < avg) total2 = avg; @@ -1060,21 +1062,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, - ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (avg) ratio = total / avg; - if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, - ctx, cpu); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); if (avg) ratio = total / avg; @@ -1087,28 +1087,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(ctx, cpu, st); + double fe_bound = td_fe_bound(cpu, st, &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(ctx, cpu, st); + double retiring = td_retiring(cpu, st, &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(ctx, cpu, st); + double bad_spec = td_bad_spec(cpu, st, &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(ctx, cpu, st); + double be_bound = td_be_bound(cpu, st, &rsd); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -1121,43 +1121,43 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(ctx, cpu, st) > 0) + if (td_total_slots(cpu, st, &rsd) > 0) print_metric(config, ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(config, ctxp, NULL, NULL, name, 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) && - full_td(ctx, cpu, st)) { - double retiring = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_RETIRING, st); - + full_td(cpu, st, &rsd)) { + double retiring = td_metric_ratio(cpu, + STAT_TOPDOWN_RETIRING, st, + &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) && - full_td(ctx, cpu, st)) { - double fe_bound = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_FE_BOUND, st); - + full_td(cpu, st, &rsd)) { + double fe_bound = td_metric_ratio(cpu, + STAT_TOPDOWN_FE_BOUND, st, + &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) && - full_td(ctx, cpu, st)) { - double be_bound = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_BE_BOUND, st); - + full_td(cpu, st, &rsd)) { + double be_bound = td_metric_ratio(cpu, + STAT_TOPDOWN_BE_BOUND, st, + &rsd); if (be_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "backend bound", be_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) && - full_td(ctx, cpu, st)) { - double bad_spec = td_metric_ratio(ctx, cpu, - STAT_TOPDOWN_BAD_SPEC, st); - + full_td(cpu, st, &rsd)) { + double bad_spec = td_metric_ratio(cpu, + STAT_TOPDOWN_BAD_SPEC, st, + &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", @@ -1165,11 +1165,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); - } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { + } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) { char unit = 'M'; char unit_buf[10]; - total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); + total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); if (total) ratio = 1000.0 * avg / total; @@ -1180,7 +1180,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(config, cpu, evsel, out, st); + print_smi_cost(config, cpu, out, st, &rsd); } else { num = 0; } -- GitLab From a1bf23052bdfe30ec3c693cf32feb2d79114ac16 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 15 Jan 2021 16:11:39 +0900 Subject: [PATCH 0902/2012] perf stat: Take cgroups into account for shadow stats As of now it doesn't consider cgroups when collecting shadow stats and metrics so counter values from different cgroups will be saved in a same slot. This resulted in incorrect numbers when those cgroups have different workloads. For example, let's look at the scenario below: cgroups A and C runs same workload which burns a cpu while cgroup B runs a light workload. $ perf stat -a -e cycles,instructions --for-each-cgroup A,B,C sleep 1 Performance counter stats for 'system wide': 3,958,116,522 cycles A 6,722,650,929 instructions A # 2.53 insn per cycle 1,132,741 cycles B 571,743 instructions B # 0.00 insn per cycle 4,007,799,935 cycles C 6,793,181,523 instructions C # 2.56 insn per cycle 1.001050869 seconds time elapsed When I run 'perf stat' with single workload, it usually shows IPC around 1.7. We can verify it (6,722,650,929.0 / 3,958,116,522 = 1.698) for cgroup A. But in this case, since cgroups are ignored, cycles are averaged so it used the lower value for IPC calculation and resulted in around 2.5. avg cycle: (3958116522 + 1132741 + 4007799935) / 3 = 2655683066 IPC (A) : 6722650929 / 2655683066 = 2.531 IPC (B) : 571743 / 2655683066 = 0.0002 IPC (C) : 6793181523 / 2655683066 = 2.557 We can simply compare cgroup pointers in the evsel and it'll be NULL when cgroups are not specified. With this patch, I can see correct numbers like below: $ perf stat -a -e cycles,instructions --for-each-cgroup A,B,C sleep 1 Performance counter stats for 'system wide': 4,171,051,687 cycles A 7,219,793,922 instructions A # 1.73 insn per cycle 1,051,189 cycles B 583,102 instructions B # 0.55 insn per cycle 4,171,124,710 cycles C 7,192,944,580 instructions C # 1.72 insn per cycle 1.007909814 seconds time elapsed Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jin Yao Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210115071139.257042-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index a1565b6e38f27..12eafd12a693c 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -8,6 +8,7 @@ #include "evlist.h" #include "expr.h" #include "metricgroup.h" +#include "cgroup.h" #include /* @@ -28,6 +29,7 @@ struct saved_value { enum stat_type type; int ctx; int cpu; + struct cgroup *cgrp; struct runtime_stat *stat; struct stats stats; u64 metric_total; @@ -57,6 +59,9 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) if (a->ctx != b->ctx) return a->ctx - b->ctx; + if (a->cgrp != b->cgrp) + return (char *)a->cgrp < (char *)b->cgrp ? -1 : +1; + if (a->evsel == NULL && b->evsel == NULL) { if (a->stat == b->stat) return 0; @@ -100,7 +105,8 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, bool create, enum stat_type type, int ctx, - struct runtime_stat *st) + struct runtime_stat *st, + struct cgroup *cgrp) { struct rblist *rblist; struct rb_node *nd; @@ -110,6 +116,7 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, .type = type, .ctx = ctx, .stat = st, + .cgrp = cgrp, }; rblist = &st->value_list; @@ -197,6 +204,7 @@ void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) struct runtime_stat_data { int ctx; + struct cgroup *cgrp; }; static void update_runtime_stat(struct runtime_stat *st, @@ -205,7 +213,7 @@ static void update_runtime_stat(struct runtime_stat *st, struct runtime_stat_data *rsd) { struct saved_value *v = saved_value_lookup(NULL, cpu, true, type, - rsd->ctx, st); + rsd->ctx, st, rsd->cgrp); if (v) update_stats(&v->stats, count); @@ -223,6 +231,7 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, struct saved_value *v; struct runtime_stat_data rsd = { .ctx = evsel_context(counter), + .cgrp = counter->cgrp, }; count *= counter->scale; @@ -290,13 +299,14 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, update_runtime_stat(st, STAT_APERF, cpu, count, &rsd); if (counter->collect_stat) { - v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st); + v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st, + rsd.cgrp); update_stats(&v->stats, count); if (counter->metric_leader) v->metric_total += count; } else if (counter->metric_leader) { v = saved_value_lookup(counter->metric_leader, - cpu, true, STAT_NONE, 0, st); + cpu, true, STAT_NONE, 0, st, rsd.cgrp); v->metric_total += count; v->metric_other++; } @@ -438,7 +448,7 @@ static double runtime_stat_avg(struct runtime_stat *st, { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -451,7 +461,7 @@ static double runtime_stat_n(struct runtime_stat *st, { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st); + v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -805,7 +815,8 @@ static int prepare_metric(struct evsel **metric_events, scale = 1e-9; } else { v = saved_value_lookup(metric_events[i], cpu, false, - STAT_NONE, 0, st); + STAT_NONE, 0, st, + metric_events[i]->cgrp); if (!v) break; stats = &v->stats; @@ -933,6 +944,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, const char *color = NULL; struct runtime_stat_data rsd = { .ctx = evsel_context(evsel), + .cgrp = evsel->cgrp, }; struct metric_event *me; int num = 1; -- GitLab From 5501e9229a80d95a1ea68609f44c447a75d23ed5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 7 Jan 2021 19:41:59 +0200 Subject: [PATCH 0903/2012] perf intel-pt: Fix 'CPU too large' error In some cases, the number of cpus (nr_cpus_online) is confused with the maximum cpu number (nr_cpus_avail), which results in the error in the example below: Example on system with 8 cpus: Before: # echo 0 > /sys/devices/system/cpu/cpu2/online # ./perf record --kcore -e intel_pt// taskset --cpu-list 7 uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.147 MB perf.data ] # ./perf script --itrace=e Requested CPU 7 too large. Consider raising MAX_NR_CPUS 0x25908 [0x8]: failed to process type: 68 [Invalid argument] After: # ./perf script --itrace=e # Fixes: 8c7274691f0d ("perf machine: Replace MAX_NR_CPUS with perf_env::nr_cpus_online") Fixes: 7df4e36a4785 ("perf session: Replace MAX_NR_CPUS with perf_env::nr_cpus_online") Signed-off-by: Adrian Hunter Tested-by: Kan Liang Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20210107174159.24897-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 4 ++-- tools/perf/util/session.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index f841f3503cae6..1e9d3f982b477 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2980,7 +2980,7 @@ int machines__for_each_thread(struct machines *machines, pid_t machine__get_current_tid(struct machine *machine, int cpu) { - int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS); if (cpu < 0 || cpu >= nr_cpus || !machine->current_tid) return -1; @@ -2992,7 +2992,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, pid_t tid) { struct thread *thread; - int nr_cpus = min(machine->env->nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(machine->env->nr_cpus_avail, MAX_NR_CPUS); if (cpu < 0) return -EINVAL; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 50ff9795a4f11..25adbcce02814 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2404,7 +2404,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, { int i, err = -1; struct perf_cpu_map *map; - int nr_cpus = min(session->header.env.nr_cpus_online, MAX_NR_CPUS); + int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS); for (i = 0; i < PERF_TYPE_MAX; ++i) { struct evsel *evsel; -- GitLab From 648b054a4647cd62e13ba79f398b8b97a7c82b19 Mon Sep 17 00:00:00 2001 From: Al Grant Date: Tue, 24 Nov 2020 19:58:17 +0000 Subject: [PATCH 0904/2012] perf inject: Correct event attribute sizes When 'perf inject' reads a perf.data file from an older version of perf, it writes event attributes into the output with the original size field, but lays them out as if they had the size currently used. Readers see a corrupt file. Update the size field to match the layout. Signed-off-by: Al Grant Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20201124195818.30603-1-al.grant@arm.com Signed-off-by: Denis Nikitin Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 062383e225a3e..c4ed3dc2c8f40 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3323,6 +3323,14 @@ int perf_session__write_header(struct perf_session *session, attr_offset = lseek(ff.fd, 0, SEEK_CUR); evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.size < sizeof(evsel->core.attr)) { + /* + * We are likely in "perf inject" and have read + * from an older file. Update attr size so that + * reader gets the right offset to the ids. + */ + evsel->core.attr.size = sizeof(evsel->core.attr); + } f_attr = (struct perf_file_attr){ .attr = evsel->core.attr, .ids = { -- GitLab From 235ecd36c7a93e4d6c73ac71137b8f1fa31148dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 15 Jan 2021 11:43:37 +0100 Subject: [PATCH 0905/2012] MAINTAINERS: Update my email address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My Intel email will stop working in a not too distant future. Move my MAINTAINERS entries to my kernel.org address. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210115104337.7751-1-bjorn.topel@gmail.com --- .mailmap | 2 ++ MAINTAINERS | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index 632700cee55cd..b1ab0129c7d6b 100644 --- a/.mailmap +++ b/.mailmap @@ -55,6 +55,8 @@ Bart Van Assche Ben Gardner Ben M Cahill Björn Steinbrink +Björn Töpel +Björn Töpel Boris Brezillon Boris Brezillon Boris Brezillon diff --git a/MAINTAINERS b/MAINTAINERS index b15514a770e3c..0dfd1a67d430b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3336,7 +3336,7 @@ F: arch/riscv/net/ X: arch/riscv/net/bpf_jit_comp64.c BPF JIT for RISC-V (64-bit) -M: Björn Töpel +M: Björn Töpel L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Maintained @@ -19409,7 +19409,7 @@ F: drivers/net/ethernet/*/*/*xdp* K: (?:\b|_)xdp(?:\b|_) XDP SOCKETS (AF_XDP) -M: Björn Töpel +M: Björn Töpel M: Magnus Karlsson R: Jonathan Lemon L: netdev@vger.kernel.org -- GitLab From a8d13dbccb137c46fead2ec1a4f1fbc8cfc9ea91 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Jan 2021 16:04:23 -0700 Subject: [PATCH 0906/2012] io_uring: ensure finish_wait() is always called in __io_uring_task_cancel() If we enter with requests pending and performm cancelations, we'll have a different inflight count before and after calling prepare_to_wait(). This causes the loop to restart. If we actually ended up canceling everything, or everything completed in-between, then we'll break out of the loop without calling finish_wait() on the waitqueue. This can trigger a warning on exit_signals(), as we leave the task state in TASK_UNINTERRUPTIBLE. Put a finish_wait() after the loop to catch that case. Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 06cc79d39586d..985a9e3f976d3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9101,6 +9101,7 @@ void __io_uring_task_cancel(void) finish_wait(&tctx->wait, &wait); } while (1); + finish_wait(&tctx->wait, &wait); atomic_dec(&tctx->in_idle); io_uring_remove_task_files(tctx); -- GitLab From 623c13295cf4e2d6ee80a6e8bae43529c0f020c9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 14 Jan 2021 10:45:44 +0000 Subject: [PATCH 0907/2012] dt: ar803x: document SmartEEE properties The SmartEEE feature of Atheros AR803x PHYs can cause the link to bounce. Add DT properties to allow SmartEEE to be disabled, and to allow the Tw parameters for 100M and 1G links to be configured. Signed-off-by: Russell King Reviewed-by: Rob Herring Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/qca,ar803x.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Documentation/devicetree/bindings/net/qca,ar803x.yaml b/Documentation/devicetree/bindings/net/qca,ar803x.yaml index 64b3357ade8a0..b3d4013b7ca6c 100644 --- a/Documentation/devicetree/bindings/net/qca,ar803x.yaml +++ b/Documentation/devicetree/bindings/net/qca,ar803x.yaml @@ -28,6 +28,10 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 1, 2] + qca,disable-smarteee: + description: Disable Atheros SmartEEE feature. + type: boolean + qca,keep-pll-enabled: description: | If set, keep the PLL enabled even if there is no link. Useful if you @@ -36,6 +40,18 @@ properties: Only supported on the AR8031. type: boolean + qca,smarteee-tw-us-100m: + description: EEE Tw parameter for 100M links. + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 1 + maximum: 255 + + qca,smarteee-tw-us-1g: + description: EEE Tw parameter for gigabit links. + $ref: /schemas/types.yaml#/definitions/uint32 + minimum: 1 + maximum: 255 + vddio-supply: description: | RGMII I/O voltage regulator (see regulator/regulator.yaml). -- GitLab From 390b4cad81484124db2b676ed20a265adc032bae Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 14 Jan 2021 10:45:49 +0000 Subject: [PATCH 0908/2012] net: phy: at803x: add support for configuring SmartEEE SmartEEE for the atheros phy was deemed buggy by Freescale and commits were added to disable it for their boards. In initial testing, SolidRun found that the default settings were causing disconnects but by increasing the Tw buffer time we could allow enough time for all parts of the link to come out of a low power state and function properly without causing a disconnect. This allows us to have functional power savings of between 300 and 400mW, rather than disabling the feature altogether. This commit adds support for disabling SmartEEE and configuring the Tw parameters for 1G and 100M speeds. Signed-off-by: Russell King Signed-off-by: Jakub Kicinski --- drivers/net/phy/at803x.c | 65 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index c8d276d96798d..d67bddc111e3f 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -132,6 +132,11 @@ #define AT803X_MIN_DOWNSHIFT 2 #define AT803X_MAX_DOWNSHIFT 9 +#define AT803X_MMD3_SMARTEEE_CTL1 0x805b +#define AT803X_MMD3_SMARTEEE_CTL2 0x805c +#define AT803X_MMD3_SMARTEEE_CTL3 0x805d +#define AT803X_MMD3_SMARTEEE_CTL3_LPI_EN BIT(8) + #define ATH9331_PHY_ID 0x004dd041 #define ATH8030_PHY_ID 0x004dd076 #define ATH8031_PHY_ID 0x004dd074 @@ -146,8 +151,11 @@ MODULE_LICENSE("GPL"); struct at803x_priv { int flags; #define AT803X_KEEP_PLL_ENABLED BIT(0) /* don't turn off internal PLL */ +#define AT803X_DISABLE_SMARTEEE BIT(1) u16 clk_25m_reg; u16 clk_25m_mask; + u8 smarteee_lpi_tw_1g; + u8 smarteee_lpi_tw_100m; struct regulator_dev *vddio_rdev; struct regulator_dev *vddh_rdev; struct regulator *vddio; @@ -411,13 +419,32 @@ static int at803x_parse_dt(struct phy_device *phydev) { struct device_node *node = phydev->mdio.dev.of_node; struct at803x_priv *priv = phydev->priv; - u32 freq, strength; + u32 freq, strength, tw; unsigned int sel; int ret; if (!IS_ENABLED(CONFIG_OF_MDIO)) return 0; + if (of_property_read_bool(node, "qca,disable-smarteee")) + priv->flags |= AT803X_DISABLE_SMARTEEE; + + if (!of_property_read_u32(node, "qca,smarteee-tw-us-1g", &tw)) { + if (!tw || tw > 255) { + phydev_err(phydev, "invalid qca,smarteee-tw-us-1g\n"); + return -EINVAL; + } + priv->smarteee_lpi_tw_1g = tw; + } + + if (!of_property_read_u32(node, "qca,smarteee-tw-us-100m", &tw)) { + if (!tw || tw > 255) { + phydev_err(phydev, "invalid qca,smarteee-tw-us-100m\n"); + return -EINVAL; + } + priv->smarteee_lpi_tw_100m = tw; + } + ret = of_property_read_u32(node, "qca,clk-out-frequency", &freq); if (!ret) { switch (freq) { @@ -526,6 +553,38 @@ static void at803x_remove(struct phy_device *phydev) regulator_disable(priv->vddio); } +static int at803x_smarteee_config(struct phy_device *phydev) +{ + struct at803x_priv *priv = phydev->priv; + u16 mask = 0, val = 0; + int ret; + + if (priv->flags & AT803X_DISABLE_SMARTEEE) + return phy_modify_mmd(phydev, MDIO_MMD_PCS, + AT803X_MMD3_SMARTEEE_CTL3, + AT803X_MMD3_SMARTEEE_CTL3_LPI_EN, 0); + + if (priv->smarteee_lpi_tw_1g) { + mask |= 0xff00; + val |= priv->smarteee_lpi_tw_1g << 8; + } + if (priv->smarteee_lpi_tw_100m) { + mask |= 0x00ff; + val |= priv->smarteee_lpi_tw_100m; + } + if (!mask) + return 0; + + ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_MMD3_SMARTEEE_CTL1, + mask, val); + if (ret) + return ret; + + return phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_MMD3_SMARTEEE_CTL3, + AT803X_MMD3_SMARTEEE_CTL3_LPI_EN, + AT803X_MMD3_SMARTEEE_CTL3_LPI_EN); +} + static int at803x_clk_out_config(struct phy_device *phydev) { struct at803x_priv *priv = phydev->priv; @@ -577,6 +636,10 @@ static int at803x_config_init(struct phy_device *phydev) if (ret < 0) return ret; + ret = at803x_smarteee_config(phydev); + if (ret < 0) + return ret; + ret = at803x_clk_out_config(phydev); if (ret < 0) return ret; -- GitLab From 54a52823a2d606871c33ef9e2793299768d5d896 Mon Sep 17 00:00:00 2001 From: George McCollister Date: Thu, 14 Jan 2021 13:57:32 -0600 Subject: [PATCH 0909/2012] dsa: add support for Arrow XRS700x tag trailer Add support for Arrow SpeedChips XRS700x single byte tag trailer. This is modeled on tag_trailer.c which works in a similar way. Signed-off-by: George McCollister Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 2 ++ net/dsa/Kconfig | 6 +++++ net/dsa/Makefile | 1 + net/dsa/tag_xrs700x.c | 61 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+) create mode 100644 net/dsa/tag_xrs700x.c diff --git a/include/net/dsa.h b/include/net/dsa.h index 06e3784ec55c8..fa537afcab532 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -46,6 +46,7 @@ struct phylink_link_state; #define DSA_TAG_PROTO_AR9331_VALUE 16 #define DSA_TAG_PROTO_RTL4_A_VALUE 17 #define DSA_TAG_PROTO_HELLCREEK_VALUE 18 +#define DSA_TAG_PROTO_XRS700X_VALUE 19 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -67,6 +68,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_AR9331 = DSA_TAG_PROTO_AR9331_VALUE, DSA_TAG_PROTO_RTL4_A = DSA_TAG_PROTO_RTL4_A_VALUE, DSA_TAG_PROTO_HELLCREEK = DSA_TAG_PROTO_HELLCREEK_VALUE, + DSA_TAG_PROTO_XRS700X = DSA_TAG_PROTO_XRS700X_VALUE, }; struct packet_type; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index dfecd7b22fd73..2d226a5c085fb 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -139,4 +139,10 @@ config NET_DSA_TAG_TRAILER Say Y or M if you want to enable support for tagging frames at with a trailed. e.g. Marvell 88E6060. +config NET_DSA_TAG_XRS700X + tristate "Tag driver for XRS700x switches" + help + Say Y or M if you want to enable support for tagging frames for + Arrow SpeedChips XRS700x switches that use a single byte tag trailer. + endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 0fb2b75a7ae37..92cea21322415 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -18,3 +18,4 @@ obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o +obj-$(CONFIG_NET_DSA_TAG_XRS700X) += tag_xrs700x.o diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c new file mode 100644 index 0000000000000..db0ed1a5fcb7e --- /dev/null +++ b/net/dsa/tag_xrs700x.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * XRS700x tag format handling + * Copyright (c) 2008-2009 Marvell Semiconductor + * Copyright (c) 2020 NovaTech LLC + */ + +#include + +#include "dsa_priv.h" + +static struct sk_buff *xrs700x_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + u8 *trailer; + + trailer = skb_put(skb, 1); + trailer[0] = BIT(dp->index); + + return skb; +} + +static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt) +{ + int source_port; + u8 *trailer; + + trailer = skb_tail_pointer(skb) - 1; + + source_port = ffs((int)trailer[0]) - 1; + + if (source_port < 0) + return NULL; + + skb->dev = dsa_master_find_slave(dev, 0, source_port); + if (!skb->dev) + return NULL; + + if (pskb_trim_rcsum(skb, skb->len - 1)) + return NULL; + + /* Frame is forwarded by hardware, don't forward in software. */ + skb->offload_fwd_mark = 1; + + return skb; +} + +static const struct dsa_device_ops xrs700x_netdev_ops = { + .name = "xrs700x", + .proto = DSA_TAG_PROTO_XRS700X, + .xmit = xrs700x_xmit, + .rcv = xrs700x_rcv, + .overhead = 1, + .tail_tag = true, +}; + +MODULE_LICENSE("GPL"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_XRS700X); + +module_dsa_tag_driver(xrs700x_netdev_ops); -- GitLab From ee00b24f32eb822f55190efd1078fe572e931d5c Mon Sep 17 00:00:00 2001 From: George McCollister Date: Thu, 14 Jan 2021 13:57:33 -0600 Subject: [PATCH 0910/2012] net: dsa: add Arrow SpeedChips XRS700x driver Add a driver with initial support for the Arrow SpeedChips XRS7000 series of gigabit Ethernet switch chips which are typically used in critical networking applications. The switches have up to three RGMII ports and one RMII port. Management to the switches can be performed over i2c or mdio. Support for advanced features such as PTP and HSR/PRP (IEC 62439-3 Clause 5 & 4) is not included in this patch and may be added at a later date. Signed-off-by: George McCollister Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/Kconfig | 2 + drivers/net/dsa/Makefile | 1 + drivers/net/dsa/xrs700x/Kconfig | 26 ++ drivers/net/dsa/xrs700x/Makefile | 4 + drivers/net/dsa/xrs700x/xrs700x.c | 622 +++++++++++++++++++++++++ drivers/net/dsa/xrs700x/xrs700x.h | 42 ++ drivers/net/dsa/xrs700x/xrs700x_i2c.c | 150 ++++++ drivers/net/dsa/xrs700x/xrs700x_mdio.c | 163 +++++++ drivers/net/dsa/xrs700x/xrs700x_reg.h | 203 ++++++++ 9 files changed, 1213 insertions(+) create mode 100644 drivers/net/dsa/xrs700x/Kconfig create mode 100644 drivers/net/dsa/xrs700x/Makefile create mode 100644 drivers/net/dsa/xrs700x/xrs700x.c create mode 100644 drivers/net/dsa/xrs700x/xrs700x.h create mode 100644 drivers/net/dsa/xrs700x/xrs700x_i2c.c create mode 100644 drivers/net/dsa/xrs700x/xrs700x_mdio.c create mode 100644 drivers/net/dsa/xrs700x/xrs700x_reg.h diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index f6a0488589fc4..3af373e90806f 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -60,6 +60,8 @@ source "drivers/net/dsa/qca/Kconfig" source "drivers/net/dsa/sja1105/Kconfig" +source "drivers/net/dsa/xrs700x/Kconfig" + config NET_DSA_QCA8K tristate "Qualcomm Atheros QCA8K Ethernet switch family support" depends on NET_DSA diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index a84adb140a049..f3598c0409945 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -24,3 +24,4 @@ obj-y += mv88e6xxx/ obj-y += ocelot/ obj-y += qca/ obj-y += sja1105/ +obj-y += xrs700x/ diff --git a/drivers/net/dsa/xrs700x/Kconfig b/drivers/net/dsa/xrs700x/Kconfig new file mode 100644 index 0000000000000..d10a4dce16764 --- /dev/null +++ b/drivers/net/dsa/xrs700x/Kconfig @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: GPL-2.0-only +config NET_DSA_XRS700X + tristate + depends on NET_DSA + select NET_DSA_TAG_XRS700X + select REGMAP + help + This enables support for Arrow SpeedChips XRS7003/7004 gigabit + Ethernet switches. + +config NET_DSA_XRS700X_I2C + tristate "Arrow XRS7000X series switch in I2C mode" + depends on NET_DSA && I2C + select NET_DSA_XRS700X + select REGMAP_I2C + help + Enable I2C support for Arrow SpeedChips XRS7003/7004 gigabit Ethernet + switches. + +config NET_DSA_XRS700X_MDIO + tristate "Arrow XRS7000X series switch in MDIO mode" + depends on NET_DSA + select NET_DSA_XRS700X + help + Enable MDIO support for Arrow SpeedChips XRS7003/7004 gigabit Ethernet + switches. diff --git a/drivers/net/dsa/xrs700x/Makefile b/drivers/net/dsa/xrs700x/Makefile new file mode 100644 index 0000000000000..51a3a7d9296ad --- /dev/null +++ b/drivers/net/dsa/xrs700x/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_NET_DSA_XRS700X) += xrs700x.o +obj-$(CONFIG_NET_DSA_XRS700X_I2C) += xrs700x_i2c.o +obj-$(CONFIG_NET_DSA_XRS700X_MDIO) += xrs700x_mdio.o diff --git a/drivers/net/dsa/xrs700x/xrs700x.c b/drivers/net/dsa/xrs700x/xrs700x.c new file mode 100644 index 0000000000000..259f5e657c46a --- /dev/null +++ b/drivers/net/dsa/xrs700x/xrs700x.c @@ -0,0 +1,622 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 NovaTech LLC + * George McCollister + */ + +#include +#include +#include +#include "xrs700x.h" +#include "xrs700x_reg.h" + +#define XRS700X_MIB_INTERVAL msecs_to_jiffies(3000) + +#define XRS7003E_ID 0x100 +#define XRS7003F_ID 0x101 +#define XRS7004E_ID 0x200 +#define XRS7004F_ID 0x201 + +const struct xrs700x_info xrs7003e_info = {XRS7003E_ID, "XRS7003E", 3}; +EXPORT_SYMBOL(xrs7003e_info); + +const struct xrs700x_info xrs7003f_info = {XRS7003F_ID, "XRS7003F", 3}; +EXPORT_SYMBOL(xrs7003f_info); + +const struct xrs700x_info xrs7004e_info = {XRS7004E_ID, "XRS7004E", 4}; +EXPORT_SYMBOL(xrs7004e_info); + +const struct xrs700x_info xrs7004f_info = {XRS7004F_ID, "XRS7004F", 4}; +EXPORT_SYMBOL(xrs7004f_info); + +struct xrs700x_regfield { + struct reg_field rf; + struct regmap_field **rmf; +}; + +struct xrs700x_mib { + unsigned int offset; + const char *name; + int stats64_offset; +}; + +#define XRS700X_MIB_ETHTOOL_ONLY(o, n) {o, n, -1} +#define XRS700X_MIB(o, n, m) {o, n, offsetof(struct rtnl_link_stats64, m)} + +static const struct xrs700x_mib xrs700x_mibs[] = { + XRS700X_MIB(XRS_RX_GOOD_OCTETS_L, "rx_good_octets", rx_bytes), + XRS700X_MIB_ETHTOOL_ONLY(XRS_RX_BAD_OCTETS_L, "rx_bad_octets"), + XRS700X_MIB(XRS_RX_UNICAST_L, "rx_unicast", rx_packets), + XRS700X_MIB(XRS_RX_BROADCAST_L, "rx_broadcast", rx_packets), + XRS700X_MIB(XRS_RX_MULTICAST_L, "rx_multicast", multicast), + XRS700X_MIB(XRS_RX_UNDERSIZE_L, "rx_undersize", rx_length_errors), + XRS700X_MIB(XRS_RX_FRAGMENTS_L, "rx_fragments", rx_length_errors), + XRS700X_MIB(XRS_RX_OVERSIZE_L, "rx_oversize", rx_length_errors), + XRS700X_MIB(XRS_RX_JABBER_L, "rx_jabber", rx_length_errors), + XRS700X_MIB(XRS_RX_ERR_L, "rx_err", rx_errors), + XRS700X_MIB(XRS_RX_CRC_L, "rx_crc", rx_crc_errors), + XRS700X_MIB_ETHTOOL_ONLY(XRS_RX_64_L, "rx_64"), + XRS700X_MIB_ETHTOOL_ONLY(XRS_RX_65_127_L, "rx_65_127"), + XRS700X_MIB_ETHTOOL_ONLY(XRS_RX_128_255_L, "rx_128_255"), + XRS700X_MIB_ETHTOOL_ONLY(XRS_RX_256_511_L, "rx_256_511"), + XRS700X_MIB_ETHTOOL_ONLY(XRS_RX_512_1023_L, "rx_512_1023"), + XRS700X_MIB_ETHTOOL_ONLY(XRS_RX_1024_1536_L, "rx_1024_1536"), + XRS700X_MIB_ETHTOOL_ONLY(XRS_RX_HSR_PRP_L, "rx_hsr_prp"), + XRS700X_MIB_ETHTOOL_ONLY(XRS_RX_WRONGLAN_L, "rx_wronglan"), + XRS700X_MIB_ETHTOOL_ONLY(XRS_RX_DUPLICATE_L, "rx_duplicate"), + XRS700X_MIB(XRS_TX_OCTETS_L, "tx_octets", tx_bytes), + XRS700X_MIB(XRS_TX_UNICAST_L, "tx_unicast", tx_packets), + XRS700X_MIB(XRS_TX_BROADCAST_L, "tx_broadcast", tx_packets), + XRS700X_MIB(XRS_TX_MULTICAST_L, "tx_multicast", tx_packets), + XRS700X_MIB_ETHTOOL_ONLY(XRS_TX_HSR_PRP_L, "tx_hsr_prp"), + XRS700X_MIB(XRS_PRIQ_DROP_L, "priq_drop", tx_dropped), + XRS700X_MIB(XRS_EARLY_DROP_L, "early_drop", tx_dropped), +}; + +static void xrs700x_get_strings(struct dsa_switch *ds, int port, + u32 stringset, u8 *data) +{ + int i; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < ARRAY_SIZE(xrs700x_mibs); i++) { + strscpy(data, xrs700x_mibs[i].name, ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } +} + +static int xrs700x_get_sset_count(struct dsa_switch *ds, int port, int sset) +{ + if (sset != ETH_SS_STATS) + return -EOPNOTSUPP; + + return ARRAY_SIZE(xrs700x_mibs); +} + +static void xrs700x_read_port_counters(struct xrs700x *priv, int port) +{ + struct xrs700x_port *p = &priv->ports[port]; + struct rtnl_link_stats64 stats; + int i; + + memset(&stats, 0, sizeof(stats)); + + mutex_lock(&p->mib_mutex); + + /* Capture counter values */ + regmap_write(priv->regmap, XRS_CNT_CTRL(port), 1); + + for (i = 0; i < ARRAY_SIZE(xrs700x_mibs); i++) { + unsigned int high = 0, low = 0, reg; + + reg = xrs700x_mibs[i].offset + XRS_PORT_OFFSET * port; + regmap_read(priv->regmap, reg, &low); + regmap_read(priv->regmap, reg + 2, &high); + + p->mib_data[i] += (high << 16) | low; + + if (xrs700x_mibs[i].stats64_offset >= 0) { + u8 *s = (u8 *)&stats + xrs700x_mibs[i].stats64_offset; + *(u64 *)s += p->mib_data[i]; + } + } + + /* multicast must be added to rx_packets (which already includes + * unicast and broadcast) + */ + stats.rx_packets += stats.multicast; + + u64_stats_update_begin(&p->syncp); + p->stats64 = stats; + u64_stats_update_end(&p->syncp); + + mutex_unlock(&p->mib_mutex); +} + +static void xrs700x_mib_work(struct work_struct *work) +{ + struct xrs700x *priv = container_of(work, struct xrs700x, + mib_work.work); + int i; + + for (i = 0; i < priv->ds->num_ports; i++) + xrs700x_read_port_counters(priv, i); + + schedule_delayed_work(&priv->mib_work, XRS700X_MIB_INTERVAL); +} + +static void xrs700x_get_ethtool_stats(struct dsa_switch *ds, int port, + u64 *data) +{ + struct xrs700x *priv = ds->priv; + struct xrs700x_port *p = &priv->ports[port]; + + xrs700x_read_port_counters(priv, port); + + mutex_lock(&p->mib_mutex); + memcpy(data, p->mib_data, sizeof(*data) * ARRAY_SIZE(xrs700x_mibs)); + mutex_unlock(&p->mib_mutex); +} + +static void xrs700x_get_stats64(struct dsa_switch *ds, int port, + struct rtnl_link_stats64 *s) +{ + struct xrs700x *priv = ds->priv; + struct xrs700x_port *p = &priv->ports[port]; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&p->syncp); + *s = p->stats64; + } while (u64_stats_fetch_retry(&p->syncp, start)); +} + +static int xrs700x_setup_regmap_range(struct xrs700x *priv) +{ + struct xrs700x_regfield regfields[] = { + { + .rf = REG_FIELD_ID(XRS_PORT_STATE(0), 0, 1, + priv->ds->num_ports, + XRS_PORT_OFFSET), + .rmf = &priv->ps_forward + }, + { + .rf = REG_FIELD_ID(XRS_PORT_STATE(0), 2, 3, + priv->ds->num_ports, + XRS_PORT_OFFSET), + .rmf = &priv->ps_management + }, + { + .rf = REG_FIELD_ID(XRS_PORT_STATE(0), 4, 9, + priv->ds->num_ports, + XRS_PORT_OFFSET), + .rmf = &priv->ps_sel_speed + }, + { + .rf = REG_FIELD_ID(XRS_PORT_STATE(0), 10, 11, + priv->ds->num_ports, + XRS_PORT_OFFSET), + .rmf = &priv->ps_cur_speed + } + }; + int i = 0; + + for (; i < ARRAY_SIZE(regfields); i++) { + *regfields[i].rmf = devm_regmap_field_alloc(priv->dev, + priv->regmap, + regfields[i].rf); + if (IS_ERR(*regfields[i].rmf)) + return PTR_ERR(*regfields[i].rmf); + } + + return 0; +} + +static enum dsa_tag_protocol xrs700x_get_tag_protocol(struct dsa_switch *ds, + int port, + enum dsa_tag_protocol m) +{ + return DSA_TAG_PROTO_XRS700X; +} + +static int xrs700x_reset(struct dsa_switch *ds) +{ + struct xrs700x *priv = ds->priv; + unsigned int val; + int ret; + + ret = regmap_write(priv->regmap, XRS_GENERAL, XRS_GENERAL_RESET); + if (ret) + goto error; + + ret = regmap_read_poll_timeout(priv->regmap, XRS_GENERAL, + val, !(val & XRS_GENERAL_RESET), + 10, 1000); +error: + if (ret) { + dev_err_ratelimited(priv->dev, "error resetting switch: %d\n", + ret); + } + + return ret; +} + +static void xrs700x_port_stp_state_set(struct dsa_switch *ds, int port, + u8 state) +{ + struct xrs700x *priv = ds->priv; + unsigned int bpdus = 1; + unsigned int val; + + switch (state) { + case BR_STATE_DISABLED: + bpdus = 0; + fallthrough; + case BR_STATE_BLOCKING: + case BR_STATE_LISTENING: + val = XRS_PORT_DISABLED; + break; + case BR_STATE_LEARNING: + val = XRS_PORT_LEARNING; + break; + case BR_STATE_FORWARDING: + val = XRS_PORT_FORWARDING; + break; + default: + dev_err(ds->dev, "invalid STP state: %d\n", state); + return; + } + + regmap_fields_write(priv->ps_forward, port, val); + + /* Enable/disable inbound policy added by xrs700x_port_add_bpdu_ipf() + * which allows BPDU forwarding to the CPU port when the front facing + * port is in disabled/learning state. + */ + regmap_update_bits(priv->regmap, XRS_ETH_ADDR_CFG(port, 0), 1, bpdus); + + dev_dbg_ratelimited(priv->dev, "%s - port: %d, state: %u, val: 0x%x\n", + __func__, port, state, val); +} + +/* Add an inbound policy filter which matches the BPDU destination MAC + * and forwards to the CPU port. Leave the policy disabled, it will be + * enabled as needed. + */ +static int xrs700x_port_add_bpdu_ipf(struct dsa_switch *ds, int port) +{ + struct xrs700x *priv = ds->priv; + unsigned int val = 0; + int i = 0; + int ret; + + /* Compare all 48 bits of the destination MAC address. */ + ret = regmap_write(priv->regmap, XRS_ETH_ADDR_CFG(port, 0), 48 << 2); + if (ret) + return ret; + + /* match BPDU destination 01:80:c2:00:00:00 */ + for (i = 0; i < sizeof(eth_stp_addr); i += 2) { + ret = regmap_write(priv->regmap, XRS_ETH_ADDR_0(port, 0) + i, + eth_stp_addr[i] | + (eth_stp_addr[i + 1] << 8)); + if (ret) + return ret; + } + + /* Mirror BPDU to CPU port */ + for (i = 0; i < ds->num_ports; i++) { + if (dsa_is_cpu_port(ds, i)) + val |= BIT(i); + } + + ret = regmap_write(priv->regmap, XRS_ETH_ADDR_FWD_MIRROR(port, 0), val); + if (ret) + return ret; + + ret = regmap_write(priv->regmap, XRS_ETH_ADDR_FWD_ALLOW(port, 0), 0); + if (ret) + return ret; + + return 0; +} + +static int xrs700x_port_setup(struct dsa_switch *ds, int port) +{ + bool cpu_port = dsa_is_cpu_port(ds, port); + struct xrs700x *priv = ds->priv; + unsigned int val = 0; + int ret, i; + + xrs700x_port_stp_state_set(ds, port, BR_STATE_DISABLED); + + /* Disable forwarding to non-CPU ports */ + for (i = 0; i < ds->num_ports; i++) { + if (!dsa_is_cpu_port(ds, i)) + val |= BIT(i); + } + + /* 1 = Disable forwarding to the port */ + ret = regmap_write(priv->regmap, XRS_PORT_FWD_MASK(port), val); + if (ret) + return ret; + + val = cpu_port ? XRS_PORT_MODE_MANAGEMENT : XRS_PORT_MODE_NORMAL; + ret = regmap_fields_write(priv->ps_management, port, val); + if (ret) + return ret; + + if (!cpu_port) { + ret = xrs700x_port_add_bpdu_ipf(ds, port); + if (ret) + return ret; + } + + return 0; +} + +static int xrs700x_setup(struct dsa_switch *ds) +{ + struct xrs700x *priv = ds->priv; + int ret, i; + + ret = xrs700x_reset(ds); + if (ret) + return ret; + + for (i = 0; i < ds->num_ports; i++) { + ret = xrs700x_port_setup(ds, i); + if (ret) + return ret; + } + + schedule_delayed_work(&priv->mib_work, XRS700X_MIB_INTERVAL); + + return 0; +} + +static void xrs700x_teardown(struct dsa_switch *ds) +{ + struct xrs700x *priv = ds->priv; + + cancel_delayed_work_sync(&priv->mib_work); +} + +static void xrs700x_phylink_validate(struct dsa_switch *ds, int port, + unsigned long *supported, + struct phylink_link_state *state) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; + + switch (port) { + case 0: + break; + case 1: + case 2: + case 3: + phylink_set(mask, 1000baseT_Full); + break; + default: + bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + dev_err(ds->dev, "Unsupported port: %i\n", port); + return; + } + + phylink_set_port_modes(mask); + + /* The switch only supports full duplex. */ + phylink_set(mask, 10baseT_Full); + phylink_set(mask, 100baseT_Full); + + bitmap_and(supported, supported, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_and(state->advertising, state->advertising, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static void xrs700x_mac_link_up(struct dsa_switch *ds, int port, + unsigned int mode, phy_interface_t interface, + struct phy_device *phydev, + int speed, int duplex, + bool tx_pause, bool rx_pause) +{ + struct xrs700x *priv = ds->priv; + unsigned int val; + + switch (speed) { + case SPEED_1000: + val = XRS_PORT_SPEED_1000; + break; + case SPEED_100: + val = XRS_PORT_SPEED_100; + break; + case SPEED_10: + val = XRS_PORT_SPEED_10; + break; + default: + return; + } + + regmap_fields_write(priv->ps_sel_speed, port, val); + + dev_dbg_ratelimited(priv->dev, "%s: port: %d mode: %u speed: %u\n", + __func__, port, mode, speed); +} + +static int xrs700x_bridge_common(struct dsa_switch *ds, int port, + struct net_device *bridge, bool join) +{ + unsigned int i, cpu_mask = 0, mask = 0; + struct xrs700x *priv = ds->priv; + int ret; + + for (i = 0; i < ds->num_ports; i++) { + if (dsa_is_cpu_port(ds, i)) + continue; + + cpu_mask |= BIT(i); + + if (dsa_to_port(ds, i)->bridge_dev == bridge) + continue; + + mask |= BIT(i); + } + + for (i = 0; i < ds->num_ports; i++) { + if (dsa_to_port(ds, i)->bridge_dev != bridge) + continue; + + /* 1 = Disable forwarding to the port */ + ret = regmap_write(priv->regmap, XRS_PORT_FWD_MASK(i), mask); + if (ret) + return ret; + } + + if (!join) { + ret = regmap_write(priv->regmap, XRS_PORT_FWD_MASK(port), + cpu_mask); + if (ret) + return ret; + } + + return 0; +} + +static int xrs700x_bridge_join(struct dsa_switch *ds, int port, + struct net_device *bridge) +{ + return xrs700x_bridge_common(ds, port, bridge, true); +} + +static void xrs700x_bridge_leave(struct dsa_switch *ds, int port, + struct net_device *bridge) +{ + xrs700x_bridge_common(ds, port, bridge, false); +} + +static const struct dsa_switch_ops xrs700x_ops = { + .get_tag_protocol = xrs700x_get_tag_protocol, + .setup = xrs700x_setup, + .teardown = xrs700x_teardown, + .port_stp_state_set = xrs700x_port_stp_state_set, + .phylink_validate = xrs700x_phylink_validate, + .phylink_mac_link_up = xrs700x_mac_link_up, + .get_strings = xrs700x_get_strings, + .get_sset_count = xrs700x_get_sset_count, + .get_ethtool_stats = xrs700x_get_ethtool_stats, + .get_stats64 = xrs700x_get_stats64, + .port_bridge_join = xrs700x_bridge_join, + .port_bridge_leave = xrs700x_bridge_leave, +}; + +static int xrs700x_detect(struct xrs700x *priv) +{ + const struct xrs700x_info *info; + unsigned int id; + int ret; + + ret = regmap_read(priv->regmap, XRS_DEV_ID0, &id); + if (ret) { + dev_err(priv->dev, "error %d while reading switch id.\n", + ret); + return ret; + } + + info = of_device_get_match_data(priv->dev); + if (!info) + return -EINVAL; + + if (info->id == id) { + priv->ds->num_ports = info->num_ports; + dev_info(priv->dev, "%s detected.\n", info->name); + return 0; + } + + dev_err(priv->dev, "expected switch id 0x%x but found 0x%x.\n", + info->id, id); + + return -ENODEV; +} + +struct xrs700x *xrs700x_switch_alloc(struct device *base, void *devpriv) +{ + struct dsa_switch *ds; + struct xrs700x *priv; + + ds = devm_kzalloc(base, sizeof(*ds), GFP_KERNEL); + if (!ds) + return NULL; + + ds->dev = base; + + priv = devm_kzalloc(base, sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + INIT_DELAYED_WORK(&priv->mib_work, xrs700x_mib_work); + + ds->ops = &xrs700x_ops; + ds->priv = priv; + priv->dev = base; + + priv->ds = ds; + priv->priv = devpriv; + + return priv; +} +EXPORT_SYMBOL(xrs700x_switch_alloc); + +static int xrs700x_alloc_port_mib(struct xrs700x *priv, int port) +{ + struct xrs700x_port *p = &priv->ports[port]; + + p->mib_data = devm_kcalloc(priv->dev, ARRAY_SIZE(xrs700x_mibs), + sizeof(*p->mib_data), GFP_KERNEL); + if (!p->mib_data) + return -ENOMEM; + + mutex_init(&p->mib_mutex); + u64_stats_init(&p->syncp); + + return 0; +} + +int xrs700x_switch_register(struct xrs700x *priv) +{ + int ret; + int i; + + ret = xrs700x_detect(priv); + if (ret) + return ret; + + ret = xrs700x_setup_regmap_range(priv); + if (ret) + return ret; + + priv->ports = devm_kcalloc(priv->dev, priv->ds->num_ports, + sizeof(*priv->ports), GFP_KERNEL); + if (!priv->ports) + return -ENOMEM; + + for (i = 0; i < priv->ds->num_ports; i++) { + ret = xrs700x_alloc_port_mib(priv, i); + if (ret) + return ret; + } + + return dsa_register_switch(priv->ds); +} +EXPORT_SYMBOL(xrs700x_switch_register); + +void xrs700x_switch_remove(struct xrs700x *priv) +{ + dsa_unregister_switch(priv->ds); +} +EXPORT_SYMBOL(xrs700x_switch_remove); + +MODULE_AUTHOR("George McCollister "); +MODULE_DESCRIPTION("Arrow SpeedChips XRS700x DSA driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/dsa/xrs700x/xrs700x.h b/drivers/net/dsa/xrs700x/xrs700x.h new file mode 100644 index 0000000000000..ff62cf61b0915 --- /dev/null +++ b/drivers/net/dsa/xrs700x/xrs700x.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include +#include + +struct xrs700x_info { + unsigned int id; + const char *name; + size_t num_ports; +}; + +extern const struct xrs700x_info xrs7003e_info; +extern const struct xrs700x_info xrs7003f_info; +extern const struct xrs700x_info xrs7004e_info; +extern const struct xrs700x_info xrs7004f_info; + +struct xrs700x_port { + struct mutex mib_mutex; /* protects mib_data */ + u64 *mib_data; + struct rtnl_link_stats64 stats64; + struct u64_stats_sync syncp; +}; + +struct xrs700x { + struct dsa_switch *ds; + struct device *dev; + void *priv; + struct regmap *regmap; + struct regmap_field *ps_forward; + struct regmap_field *ps_management; + struct regmap_field *ps_sel_speed; + struct regmap_field *ps_cur_speed; + struct delayed_work mib_work; + struct xrs700x_port *ports; +}; + +struct xrs700x *xrs700x_switch_alloc(struct device *base, void *devpriv); +int xrs700x_switch_register(struct xrs700x *priv); +void xrs700x_switch_remove(struct xrs700x *priv); diff --git a/drivers/net/dsa/xrs700x/xrs700x_i2c.c b/drivers/net/dsa/xrs700x/xrs700x_i2c.c new file mode 100644 index 0000000000000..a5f8883af8292 --- /dev/null +++ b/drivers/net/dsa/xrs700x/xrs700x_i2c.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 NovaTech LLC + * George McCollister + */ + +#include +#include +#include +#include "xrs700x.h" +#include "xrs700x_reg.h" + +static int xrs700x_i2c_reg_read(void *context, unsigned int reg, + unsigned int *val) +{ + struct device *dev = context; + struct i2c_client *i2c = to_i2c_client(dev); + unsigned char buf[4]; + int ret; + + buf[0] = reg >> 23 & 0xff; + buf[1] = reg >> 15 & 0xff; + buf[2] = reg >> 7 & 0xff; + buf[3] = (reg & 0x7f) << 1; + + ret = i2c_master_send(i2c, buf, sizeof(buf)); + if (ret < 0) { + dev_err(dev, "xrs i2c_master_send returned %d\n", ret); + return ret; + } + + ret = i2c_master_recv(i2c, buf, 2); + if (ret < 0) { + dev_err(dev, "xrs i2c_master_recv returned %d\n", ret); + return ret; + } + + *val = buf[0] << 8 | buf[1]; + + return 0; +} + +static int xrs700x_i2c_reg_write(void *context, unsigned int reg, + unsigned int val) +{ + struct device *dev = context; + struct i2c_client *i2c = to_i2c_client(dev); + unsigned char buf[6]; + int ret; + + buf[0] = reg >> 23 & 0xff; + buf[1] = reg >> 15 & 0xff; + buf[2] = reg >> 7 & 0xff; + buf[3] = (reg & 0x7f) << 1 | 1; + buf[4] = val >> 8 & 0xff; + buf[5] = val & 0xff; + + ret = i2c_master_send(i2c, buf, sizeof(buf)); + if (ret < 0) { + dev_err(dev, "xrs i2c_master_send returned %d\n", ret); + return ret; + } + + return 0; +} + +static const struct regmap_config xrs700x_i2c_regmap_config = { + .val_bits = 16, + .reg_stride = 2, + .reg_bits = 32, + .pad_bits = 0, + .write_flag_mask = 0, + .read_flag_mask = 0, + .reg_read = xrs700x_i2c_reg_read, + .reg_write = xrs700x_i2c_reg_write, + .max_register = 0, + .cache_type = REGCACHE_NONE, + .reg_format_endian = REGMAP_ENDIAN_BIG, + .val_format_endian = REGMAP_ENDIAN_BIG +}; + +static int xrs700x_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *i2c_id) +{ + struct xrs700x *priv; + int ret; + + priv = xrs700x_switch_alloc(&i2c->dev, i2c); + if (!priv) + return -ENOMEM; + + priv->regmap = devm_regmap_init(&i2c->dev, NULL, &i2c->dev, + &xrs700x_i2c_regmap_config); + if (IS_ERR(priv->regmap)) { + ret = PTR_ERR(priv->regmap); + dev_err(&i2c->dev, "Failed to initialize regmap: %d\n", ret); + return ret; + } + + i2c_set_clientdata(i2c, priv); + + ret = xrs700x_switch_register(priv); + + /* Main DSA driver may not be started yet. */ + if (ret) + return ret; + + return 0; +} + +static int xrs700x_i2c_remove(struct i2c_client *i2c) +{ + struct xrs700x *priv = i2c_get_clientdata(i2c); + + xrs700x_switch_remove(priv); + + return 0; +} + +static const struct i2c_device_id xrs700x_i2c_id[] = { + { "xrs700x-switch", 0 }, + {}, +}; + +MODULE_DEVICE_TABLE(i2c, xrs700x_i2c_id); + +static const struct of_device_id xrs700x_i2c_dt_ids[] = { + { .compatible = "arrow,xrs7003e", .data = &xrs7003e_info }, + { .compatible = "arrow,xrs7003f", .data = &xrs7003f_info }, + { .compatible = "arrow,xrs7004e", .data = &xrs7004e_info }, + { .compatible = "arrow,xrs7004f", .data = &xrs7004f_info }, + {}, +}; +MODULE_DEVICE_TABLE(of, xrs700x_i2c_dt_ids); + +static struct i2c_driver xrs700x_i2c_driver = { + .driver = { + .name = "xrs700x-i2c", + .of_match_table = of_match_ptr(xrs700x_i2c_dt_ids), + }, + .probe = xrs700x_i2c_probe, + .remove = xrs700x_i2c_remove, + .id_table = xrs700x_i2c_id, +}; + +module_i2c_driver(xrs700x_i2c_driver); + +MODULE_AUTHOR("George McCollister "); +MODULE_DESCRIPTION("Arrow SpeedChips XRS700x DSA I2C driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/dsa/xrs700x/xrs700x_mdio.c b/drivers/net/dsa/xrs700x/xrs700x_mdio.c new file mode 100644 index 0000000000000..a10ee28eb86e4 --- /dev/null +++ b/drivers/net/dsa/xrs700x/xrs700x_mdio.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 NovaTech LLC + * George McCollister + */ + +#include +#include +#include +#include +#include +#include +#include "xrs700x.h" +#include "xrs700x_reg.h" + +#define XRS_MDIO_IBA0 0x10 +#define XRS_MDIO_IBA1 0x11 +#define XRS_MDIO_IBD 0x14 + +#define XRS_IB_READ 0x0 +#define XRS_IB_WRITE 0x1 + +static int xrs700x_mdio_reg_read(void *context, unsigned int reg, + unsigned int *val) +{ + struct mdio_device *mdiodev = context; + struct device *dev = &mdiodev->dev; + u16 uval; + int ret; + + uval = (u16)FIELD_GET(GENMASK(31, 16), reg); + + ret = mdiobus_write(mdiodev->bus, mdiodev->addr, XRS_MDIO_IBA1, uval); + if (ret < 0) { + dev_err(dev, "xrs mdiobus_write returned %d\n", ret); + return ret; + } + + uval = (u16)((reg & GENMASK(15, 1)) | XRS_IB_READ); + + ret = mdiobus_write(mdiodev->bus, mdiodev->addr, XRS_MDIO_IBA0, uval); + if (ret < 0) { + dev_err(dev, "xrs mdiobus_write returned %d\n", ret); + return ret; + } + + ret = mdiobus_read(mdiodev->bus, mdiodev->addr, XRS_MDIO_IBD); + if (ret < 0) { + dev_err(dev, "xrs mdiobus_read returned %d\n", ret); + return ret; + } + + *val = (unsigned int)ret; + + return 0; +} + +static int xrs700x_mdio_reg_write(void *context, unsigned int reg, + unsigned int val) +{ + struct mdio_device *mdiodev = context; + struct device *dev = &mdiodev->dev; + u16 uval; + int ret; + + ret = mdiobus_write(mdiodev->bus, mdiodev->addr, XRS_MDIO_IBD, (u16)val); + if (ret < 0) { + dev_err(dev, "xrs mdiobus_write returned %d\n", ret); + return ret; + } + + uval = (u16)FIELD_GET(GENMASK(31, 16), reg); + + ret = mdiobus_write(mdiodev->bus, mdiodev->addr, XRS_MDIO_IBA1, uval); + if (ret < 0) { + dev_err(dev, "xrs mdiobus_write returned %d\n", ret); + return ret; + } + + uval = (u16)((reg & GENMASK(15, 1)) | XRS_IB_WRITE); + + ret = mdiobus_write(mdiodev->bus, mdiodev->addr, XRS_MDIO_IBA0, uval); + if (ret < 0) { + dev_err(dev, "xrs mdiobus_write returned %d\n", ret); + return ret; + } + + return 0; +} + +static const struct regmap_config xrs700x_mdio_regmap_config = { + .val_bits = 16, + .reg_stride = 2, + .reg_bits = 32, + .pad_bits = 0, + .write_flag_mask = 0, + .read_flag_mask = 0, + .reg_read = xrs700x_mdio_reg_read, + .reg_write = xrs700x_mdio_reg_write, + .max_register = XRS_VLAN(VLAN_N_VID - 1), + .cache_type = REGCACHE_NONE, + .reg_format_endian = REGMAP_ENDIAN_BIG, + .val_format_endian = REGMAP_ENDIAN_BIG +}; + +static int xrs700x_mdio_probe(struct mdio_device *mdiodev) +{ + struct xrs700x *priv; + int ret; + + priv = xrs700x_switch_alloc(&mdiodev->dev, mdiodev); + if (!priv) + return -ENOMEM; + + priv->regmap = devm_regmap_init(&mdiodev->dev, NULL, mdiodev, + &xrs700x_mdio_regmap_config); + if (IS_ERR(priv->regmap)) { + ret = PTR_ERR(priv->regmap); + dev_err(&mdiodev->dev, "Failed to initialize regmap: %d\n", ret); + return ret; + } + + dev_set_drvdata(&mdiodev->dev, priv); + + ret = xrs700x_switch_register(priv); + + /* Main DSA driver may not be started yet. */ + if (ret) + return ret; + + return 0; +} + +static void xrs700x_mdio_remove(struct mdio_device *mdiodev) +{ + struct xrs700x *priv = dev_get_drvdata(&mdiodev->dev); + + xrs700x_switch_remove(priv); +} + +static const struct of_device_id xrs700x_mdio_dt_ids[] = { + { .compatible = "arrow,xrs7003e", .data = &xrs7003e_info }, + { .compatible = "arrow,xrs7003f", .data = &xrs7003f_info }, + { .compatible = "arrow,xrs7004e", .data = &xrs7004e_info }, + { .compatible = "arrow,xrs7004f", .data = &xrs7004f_info }, + {}, +}; +MODULE_DEVICE_TABLE(of, xrs700x_mdio_dt_ids); + +static struct mdio_driver xrs700x_mdio_driver = { + .mdiodrv.driver = { + .name = "xrs700x-mdio", + .of_match_table = xrs700x_mdio_dt_ids, + }, + .probe = xrs700x_mdio_probe, + .remove = xrs700x_mdio_remove, +}; + +mdio_module_driver(xrs700x_mdio_driver); + +MODULE_AUTHOR("George McCollister "); +MODULE_DESCRIPTION("Arrow SpeedChips XRS700x DSA MDIO driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/dsa/xrs700x/xrs700x_reg.h b/drivers/net/dsa/xrs700x/xrs700x_reg.h new file mode 100644 index 0000000000000..a135d4d92b6d0 --- /dev/null +++ b/drivers/net/dsa/xrs700x/xrs700x_reg.h @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Register Base Addresses */ +#define XRS_DEVICE_ID_BASE 0x0 +#define XRS_GPIO_BASE 0x10000 +#define XRS_PORT_OFFSET 0x10000 +#define XRS_PORT_BASE(x) (0x200000 + XRS_PORT_OFFSET * (x)) +#define XRS_RTC_BASE 0x280000 +#define XRS_TS_OFFSET 0x8000 +#define XRS_TS_BASE(x) (0x290000 + XRS_TS_OFFSET * (x)) +#define XRS_SWITCH_CONF_BASE 0x300000 + +/* Device Identification Registers */ +#define XRS_DEV_ID0 (XRS_DEVICE_ID_BASE + 0) +#define XRS_DEV_ID1 (XRS_DEVICE_ID_BASE + 2) +#define XRS_INT_ID0 (XRS_DEVICE_ID_BASE + 4) +#define XRS_INT_ID1 (XRS_DEVICE_ID_BASE + 6) +#define XRS_REV_ID (XRS_DEVICE_ID_BASE + 8) + +/* GPIO Registers */ +#define XRS_CONFIG0 (XRS_GPIO_BASE + 0x1000) +#define XRS_INPUT_STATUS0 (XRS_GPIO_BASE + 0x1002) +#define XRS_CONFIG1 (XRS_GPIO_BASE + 0x1004) +#define XRS_INPUT_STATUS1 (XRS_GPIO_BASE + 0x1006) +#define XRS_CONFIG2 (XRS_GPIO_BASE + 0x1008) +#define XRS_INPUT_STATUS2 (XRS_GPIO_BASE + 0x100a) + +/* Port Configuration Registers */ +#define XRS_PORT_GEN_BASE(x) (XRS_PORT_BASE(x) + 0x0) +#define XRS_PORT_HSR_BASE(x) (XRS_PORT_BASE(x) + 0x2000) +#define XRS_PORT_PTP_BASE(x) (XRS_PORT_BASE(x) + 0x4000) +#define XRS_PORT_CNT_BASE(x) (XRS_PORT_BASE(x) + 0x6000) +#define XRS_PORT_IPO_BASE(x) (XRS_PORT_BASE(x) + 0x8000) + +/* Port Configuration Registers - General and State */ +#define XRS_PORT_STATE(x) (XRS_PORT_GEN_BASE(x) + 0x0) +#define XRS_PORT_FORWARDING 0 +#define XRS_PORT_LEARNING 1 +#define XRS_PORT_DISABLED 2 +#define XRS_PORT_MODE_NORMAL 0 +#define XRS_PORT_MODE_MANAGEMENT 1 +#define XRS_PORT_SPEED_1000 0x12 +#define XRS_PORT_SPEED_100 0x20 +#define XRS_PORT_SPEED_10 0x30 +#define XRS_PORT_VLAN(x) (XRS_PORT_GEN_BASE(x) + 0x10) +#define XRS_PORT_VLAN0_MAPPING(x) (XRS_PORT_GEN_BASE(x) + 0x12) +#define XRS_PORT_FWD_MASK(x) (XRS_PORT_GEN_BASE(x) + 0x14) +#define XRS_PORT_VLAN_PRIO(x) (XRS_PORT_GEN_BASE(x) + 0x16) + +/* Port Configuration Registers - HSR/PRP */ +#define XRS_HSR_CFG(x) (XRS_PORT_HSR_BASE(x) + 0x0) + +/* Port Configuration Registers - PTP */ +#define XRS_PTP_RX_SYNC_DELAY_NS_LO(x) (XRS_PORT_PTP_BASE(x) + 0x2) +#define XRS_PTP_RX_SYNC_DELAY_NS_HI(x) (XRS_PORT_PTP_BASE(x) + 0x4) +#define XRS_PTP_RX_EVENT_DELAY_NS(x) (XRS_PORT_PTP_BASE(x) + 0xa) +#define XRS_PTP_TX_EVENT_DELAY_NS(x) (XRS_PORT_PTP_BASE(x) + 0x12) + +/* Port Configuration Registers - Counter */ +#define XRS_CNT_CTRL(x) (XRS_PORT_CNT_BASE(x) + 0x0) +#define XRS_RX_GOOD_OCTETS_L (XRS_PORT_CNT_BASE(0) + 0x200) +#define XRS_RX_GOOD_OCTETS_H (XRS_PORT_CNT_BASE(0) + 0x202) +#define XRS_RX_BAD_OCTETS_L (XRS_PORT_CNT_BASE(0) + 0x204) +#define XRS_RX_BAD_OCTETS_H (XRS_PORT_CNT_BASE(0) + 0x206) +#define XRS_RX_UNICAST_L (XRS_PORT_CNT_BASE(0) + 0x208) +#define XRS_RX_UNICAST_H (XRS_PORT_CNT_BASE(0) + 0x20a) +#define XRS_RX_BROADCAST_L (XRS_PORT_CNT_BASE(0) + 0x20c) +#define XRS_RX_BROADCAST_H (XRS_PORT_CNT_BASE(0) + 0x20e) +#define XRS_RX_MULTICAST_L (XRS_PORT_CNT_BASE(0) + 0x210) +#define XRS_RX_MULTICAST_H (XRS_PORT_CNT_BASE(0) + 0x212) +#define XRS_RX_UNDERSIZE_L (XRS_PORT_CNT_BASE(0) + 0x214) +#define XRS_RX_UNDERSIZE_H (XRS_PORT_CNT_BASE(0) + 0x216) +#define XRS_RX_FRAGMENTS_L (XRS_PORT_CNT_BASE(0) + 0x218) +#define XRS_RX_FRAGMENTS_H (XRS_PORT_CNT_BASE(0) + 0x21a) +#define XRS_RX_OVERSIZE_L (XRS_PORT_CNT_BASE(0) + 0x21c) +#define XRS_RX_OVERSIZE_H (XRS_PORT_CNT_BASE(0) + 0x21e) +#define XRS_RX_JABBER_L (XRS_PORT_CNT_BASE(0) + 0x220) +#define XRS_RX_JABBER_H (XRS_PORT_CNT_BASE(0) + 0x222) +#define XRS_RX_ERR_L (XRS_PORT_CNT_BASE(0) + 0x224) +#define XRS_RX_ERR_H (XRS_PORT_CNT_BASE(0) + 0x226) +#define XRS_RX_CRC_L (XRS_PORT_CNT_BASE(0) + 0x228) +#define XRS_RX_CRC_H (XRS_PORT_CNT_BASE(0) + 0x22a) +#define XRS_RX_64_L (XRS_PORT_CNT_BASE(0) + 0x22c) +#define XRS_RX_64_H (XRS_PORT_CNT_BASE(0) + 0x22e) +#define XRS_RX_65_127_L (XRS_PORT_CNT_BASE(0) + 0x230) +#define XRS_RX_65_127_H (XRS_PORT_CNT_BASE(0) + 0x232) +#define XRS_RX_128_255_L (XRS_PORT_CNT_BASE(0) + 0x234) +#define XRS_RX_128_255_H (XRS_PORT_CNT_BASE(0) + 0x236) +#define XRS_RX_256_511_L (XRS_PORT_CNT_BASE(0) + 0x238) +#define XRS_RX_256_511_H (XRS_PORT_CNT_BASE(0) + 0x23a) +#define XRS_RX_512_1023_L (XRS_PORT_CNT_BASE(0) + 0x23c) +#define XRS_RX_512_1023_H (XRS_PORT_CNT_BASE(0) + 0x23e) +#define XRS_RX_1024_1536_L (XRS_PORT_CNT_BASE(0) + 0x240) +#define XRS_RX_1024_1536_H (XRS_PORT_CNT_BASE(0) + 0x242) +#define XRS_RX_HSR_PRP_L (XRS_PORT_CNT_BASE(0) + 0x244) +#define XRS_RX_HSR_PRP_H (XRS_PORT_CNT_BASE(0) + 0x246) +#define XRS_RX_WRONGLAN_L (XRS_PORT_CNT_BASE(0) + 0x248) +#define XRS_RX_WRONGLAN_H (XRS_PORT_CNT_BASE(0) + 0x24a) +#define XRS_RX_DUPLICATE_L (XRS_PORT_CNT_BASE(0) + 0x24c) +#define XRS_RX_DUPLICATE_H (XRS_PORT_CNT_BASE(0) + 0x24e) +#define XRS_TX_OCTETS_L (XRS_PORT_CNT_BASE(0) + 0x280) +#define XRS_TX_OCTETS_H (XRS_PORT_CNT_BASE(0) + 0x282) +#define XRS_TX_UNICAST_L (XRS_PORT_CNT_BASE(0) + 0x284) +#define XRS_TX_UNICAST_H (XRS_PORT_CNT_BASE(0) + 0x286) +#define XRS_TX_BROADCAST_L (XRS_PORT_CNT_BASE(0) + 0x288) +#define XRS_TX_BROADCAST_H (XRS_PORT_CNT_BASE(0) + 0x28a) +#define XRS_TX_MULTICAST_L (XRS_PORT_CNT_BASE(0) + 0x28c) +#define XRS_TX_MULTICAST_H (XRS_PORT_CNT_BASE(0) + 0x28e) +#define XRS_TX_HSR_PRP_L (XRS_PORT_CNT_BASE(0) + 0x290) +#define XRS_TX_HSR_PRP_H (XRS_PORT_CNT_BASE(0) + 0x292) +#define XRS_PRIQ_DROP_L (XRS_PORT_CNT_BASE(0) + 0x2c0) +#define XRS_PRIQ_DROP_H (XRS_PORT_CNT_BASE(0) + 0x2c2) +#define XRS_EARLY_DROP_L (XRS_PORT_CNT_BASE(0) + 0x2c4) +#define XRS_EARLY_DROP_H (XRS_PORT_CNT_BASE(0) + 0x2c6) + +/* Port Configuration Registers - Inbound Policy 0 - 15 */ +#define XRS_ETH_ADDR_CFG(x, p) (XRS_PORT_IPO_BASE(x) + \ + (p) * 0x20 + 0x0) +#define XRS_ETH_ADDR_FWD_ALLOW(x, p) (XRS_PORT_IPO_BASE(x) + \ + (p) * 0x20 + 0x2) +#define XRS_ETH_ADDR_FWD_MIRROR(x, p) (XRS_PORT_IPO_BASE(x) + \ + (p) * 0x20 + 0x4) +#define XRS_ETH_ADDR_0(x, p) (XRS_PORT_IPO_BASE(x) + \ + (p) * 0x20 + 0x8) +#define XRS_ETH_ADDR_1(x, p) (XRS_PORT_IPO_BASE(x) + \ + (p) * 0x20 + 0xa) +#define XRS_ETH_ADDR_2(x, p) (XRS_PORT_IPO_BASE(x) + \ + (p) * 0x20 + 0xc) + +/* RTC Registers */ +#define XRS_CUR_NSEC0 (XRS_RTC_BASE + 0x1004) +#define XRS_CUR_NSEC1 (XRS_RTC_BASE + 0x1006) +#define XRS_CUR_SEC0 (XRS_RTC_BASE + 0x1008) +#define XRS_CUR_SEC1 (XRS_RTC_BASE + 0x100a) +#define XRS_CUR_SEC2 (XRS_RTC_BASE + 0x100c) +#define XRS_TIME_CC0 (XRS_RTC_BASE + 0x1010) +#define XRS_TIME_CC1 (XRS_RTC_BASE + 0x1012) +#define XRS_TIME_CC2 (XRS_RTC_BASE + 0x1014) +#define XRS_STEP_SIZE0 (XRS_RTC_BASE + 0x1020) +#define XRS_STEP_SIZE1 (XRS_RTC_BASE + 0x1022) +#define XRS_STEP_SIZE2 (XRS_RTC_BASE + 0x1024) +#define XRS_ADJUST_NSEC0 (XRS_RTC_BASE + 0x1034) +#define XRS_ADJUST_NSEC1 (XRS_RTC_BASE + 0x1036) +#define XRS_ADJUST_SEC0 (XRS_RTC_BASE + 0x1038) +#define XRS_ADJUST_SEC1 (XRS_RTC_BASE + 0x103a) +#define XRS_ADJUST_SEC2 (XRS_RTC_BASE + 0x103c) +#define XRS_TIME_CMD (XRS_RTC_BASE + 0x1040) + +/* Time Stamper Registers */ +#define XRS_TS_CTRL(x) (XRS_TS_BASE(x) + 0x1000) +#define XRS_TS_INT_MASK(x) (XRS_TS_BASE(x) + 0x1008) +#define XRS_TS_INT_STATUS(x) (XRS_TS_BASE(x) + 0x1010) +#define XRS_TS_NSEC0(x) (XRS_TS_BASE(x) + 0x1104) +#define XRS_TS_NSEC1(x) (XRS_TS_BASE(x) + 0x1106) +#define XRS_TS_SEC0(x) (XRS_TS_BASE(x) + 0x1108) +#define XRS_TS_SEC1(x) (XRS_TS_BASE(x) + 0x110a) +#define XRS_TS_SEC2(x) (XRS_TS_BASE(x) + 0x110c) +#define XRS_PNCT0(x) (XRS_TS_BASE(x) + 0x1110) +#define XRS_PNCT1(x) (XRS_TS_BASE(x) + 0x1112) + +/* Switch Configuration Registers */ +#define XRS_SWITCH_GEN_BASE (XRS_SWITCH_CONF_BASE + 0x0) +#define XRS_SWITCH_TS_BASE (XRS_SWITCH_CONF_BASE + 0x2000) +#define XRS_SWITCH_VLAN_BASE (XRS_SWITCH_CONF_BASE + 0x4000) + +/* Switch Configuration Registers - General */ +#define XRS_GENERAL (XRS_SWITCH_GEN_BASE + 0x10) +#define XRS_GENERAL_TIME_TRAILER BIT(9) +#define XRS_GENERAL_MOD_SYNC BIT(10) +#define XRS_GENERAL_CUT_THRU BIT(13) +#define XRS_GENERAL_CLR_MAC_TBL BIT(14) +#define XRS_GENERAL_RESET BIT(15) +#define XRS_MT_CLEAR_MASK (XRS_SWITCH_GEN_BASE + 0x12) +#define XRS_ADDRESS_AGING (XRS_SWITCH_GEN_BASE + 0x20) +#define XRS_TS_CTRL_TX (XRS_SWITCH_GEN_BASE + 0x28) +#define XRS_TS_CTRL_RX (XRS_SWITCH_GEN_BASE + 0x2a) +#define XRS_INT_MASK (XRS_SWITCH_GEN_BASE + 0x2c) +#define XRS_INT_STATUS (XRS_SWITCH_GEN_BASE + 0x2e) +#define XRS_MAC_TABLE0 (XRS_SWITCH_GEN_BASE + 0x200) +#define XRS_MAC_TABLE1 (XRS_SWITCH_GEN_BASE + 0x202) +#define XRS_MAC_TABLE2 (XRS_SWITCH_GEN_BASE + 0x204) +#define XRS_MAC_TABLE3 (XRS_SWITCH_GEN_BASE + 0x206) + +/* Switch Configuration Registers - Frame Timestamp */ +#define XRS_TX_TS_NS_LO(t) (XRS_SWITCH_TS_BASE + 0x80 * (t) + 0x0) +#define XRS_TX_TS_NS_HI(t) (XRS_SWITCH_TS_BASE + 0x80 * (t) + 0x2) +#define XRS_TX_TS_S_LO(t) (XRS_SWITCH_TS_BASE + 0x80 * (t) + 0x4) +#define XRS_TX_TS_S_HI(t) (XRS_SWITCH_TS_BASE + 0x80 * (t) + 0x6) +#define XRS_TX_TS_HDR(t, h) (XRS_SWITCH_TS_BASE + 0x80 * (t) + \ + 0x2 * (h) + 0xe) +#define XRS_RX_TS_NS_LO(t) (XRS_SWITCH_TS_BASE + 0x80 * (t) + \ + 0x200) +#define XRS_RX_TS_NS_HI(t) (XRS_SWITCH_TS_BASE + 0x80 * (t) + \ + 0x202) +#define XRS_RX_TS_S_LO(t) (XRS_SWITCH_TS_BASE + 0x80 * (t) + \ + 0x204) +#define XRS_RX_TS_S_HI(t) (XRS_SWITCH_TS_BASE + 0x80 * (t) + \ + 0x206) +#define XRS_RX_TS_HDR(t, h) (XRS_SWITCH_TS_BASE + 0x80 * (t) + \ + 0x2 * (h) + 0xe) + +/* Switch Configuration Registers - VLAN */ +#define XRS_VLAN(v) (XRS_SWITCH_VLAN_BASE + 0x2 * (v)) -- GitLab From 8204c2b01cf998a28901af819227b46f0e4c67a8 Mon Sep 17 00:00:00 2001 From: George McCollister Date: Thu, 14 Jan 2021 13:57:34 -0600 Subject: [PATCH 0911/2012] dt-bindings: net: dsa: add bindings for xrs700x switches Add documentation and an example for Arrow SpeedChips XRS7000 Series single chip Ethernet switches. Signed-off-by: George McCollister Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- .../bindings/net/dsa/arrow,xrs700x.yaml | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml diff --git a/Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml b/Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml new file mode 100644 index 0000000000000..3f01b65f3b227 --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/dsa/arrow,xrs700x.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Arrow SpeedChips XRS7000 Series Switch Device Tree Bindings + +allOf: + - $ref: dsa.yaml# + +maintainers: + - George McCollister + +description: + The Arrow SpeedChips XRS7000 Series of single chip gigabit Ethernet switches + are designed for critical networking applications. They have up to three + RGMII ports and one RMII port and are managed via i2c or mdio. + +properties: + compatible: + oneOf: + - enum: + - arrow,xrs7003e + - arrow,xrs7003f + - arrow,xrs7004e + - arrow,xrs7004f + + reg: + maxItems: 1 + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + switch@8 { + compatible = "arrow,xrs7004e"; + reg = <0x8>; + + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + ethernet-port@1 { + reg = <1>; + label = "lan0"; + phy-handle = <&swphy0>; + phy-mode = "rgmii-id"; + }; + ethernet-port@2 { + reg = <2>; + label = "lan1"; + phy-handle = <&swphy1>; + phy-mode = "rgmii-id"; + }; + ethernet-port@3 { + reg = <3>; + label = "cpu"; + ethernet = <&fec1>; + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + }; + }; + }; -- GitLab From c96adff95619178e2118925578343ad54857c80c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 15 Jan 2021 10:50:24 -0800 Subject: [PATCH 0912/2012] cls_flower: call nla_ok() before nla_next() fl_set_enc_opt() simply checks if there are still bytes left to parse, but this is not sufficent as syzbot seems to be able to generate malformatted netlink messages. nla_ok() is more strict so should be used to validate the next nlattr here. And nla_validate_nested_deprecated() has less strict check too, it is probably too late to switch to the strict version, but we can just call nla_ok() too after it. Reported-and-tested-by: syzbot+2624e3778b18fc497c92@syzkaller.appspotmail.com Fixes: 0a6e77784f49 ("net/sched: allow flower to match tunnel options") Fixes: 79b1011cb33d ("net: sched: allow flower to match erspan options") Cc: Jamal Hadi Salim Cc: Xin Long Cc: Jiri Pirko Signed-off-by: Cong Wang Link: https://lore.kernel.org/r/20210115185024.72298-1-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/cls_flower.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1319986693fc8..84f932532db7d 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1272,6 +1272,10 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]); + if (!nla_ok(nla_opt_msk, msk_depth)) { + NL_SET_ERR_MSG(extack, "Invalid nested attribute for masks"); + return -EINVAL; + } } nla_for_each_attr(nla_opt_key, nla_enc_key, @@ -1307,9 +1311,6 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); return -EINVAL; } - - if (msk_depth) - nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); break; case TCA_FLOWER_KEY_ENC_OPTS_VXLAN: if (key->enc_opts.dst_opt_type) { @@ -1340,9 +1341,6 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); return -EINVAL; } - - if (msk_depth) - nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); break; case TCA_FLOWER_KEY_ENC_OPTS_ERSPAN: if (key->enc_opts.dst_opt_type) { @@ -1373,14 +1371,20 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, NL_SET_ERR_MSG(extack, "Key and mask miss aligned"); return -EINVAL; } - - if (msk_depth) - nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); break; default: NL_SET_ERR_MSG(extack, "Unknown tunnel option type"); return -EINVAL; } + + if (!msk_depth) + continue; + + if (!nla_ok(nla_opt_msk, msk_depth)) { + NL_SET_ERR_MSG(extack, "A mask attribute is invalid"); + return -EINVAL; + } + nla_opt_msk = nla_next(nla_opt_msk, &msk_depth); } return 0; -- GitLab From d38001d30d47051eec265d68378abca7f5109e97 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Fri, 15 Jan 2021 13:52:58 +0100 Subject: [PATCH 0913/2012] net: dsa: mv88e6xxx: Provide dummy implementations for trunk setters Support for Global 2 registers is build-time optional. In the case where it was not enabled the build would fail as no "dummy" implementation of these functions was available. Fixes: 57e661aae6a8 ("net: dsa: mv88e6xxx: Link aggregation support") Reported-by: kernel test robot Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Signed-off-by: Tobias Waldekranz Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/global2.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index 60febaf4da765..253a79582a1d0 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -525,6 +525,18 @@ static inline int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip) return -EOPNOTSUPP; } +static inline int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, + int num, bool hash, u16 mask) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, + int id, u16 map) +{ + return -EOPNOTSUPP; +} + static inline int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target, int port) { -- GitLab From b80dc51b72e29318e02baace1e3c4cfe6e772904 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Fri, 15 Jan 2021 13:52:59 +0100 Subject: [PATCH 0914/2012] net: dsa: mv88e6xxx: Only allow LAG offload on supported hardware There are chips that do have Global 2 registers, and therefore trunk mapping/mask tables are not available. Refuse the offload as early as possible on those devices. Fixes: 57e661aae6a8 ("net: dsa: mv88e6xxx: Link aggregation support") Signed-off-by: Tobias Waldekranz Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 6 +++++- drivers/net/dsa/mv88e6xxx/chip.h | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index dcb1726b68cc8..91286d7b12c75 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5385,9 +5385,13 @@ static bool mv88e6xxx_lag_can_offload(struct dsa_switch *ds, struct net_device *lag, struct netdev_lag_upper_info *info) { + struct mv88e6xxx_chip *chip = ds->priv; struct dsa_port *dp; int id, members = 0; + if (!mv88e6xxx_has_lag(chip)) + return false; + id = dsa_lag_id(ds->dst, lag); if (id < 0 || id >= ds->num_lag_ids) return false; @@ -5727,7 +5731,7 @@ static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip) * 5-bit port mode, which we do not support. 640k^W16 ought to * be enough for anyone. */ - ds->num_lag_ids = 16; + ds->num_lag_ids = mv88e6xxx_has_lag(chip) ? 16 : 0; dev_set_drvdata(dev, ds); diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 3543055bcb519..788b3f585ef35 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -662,6 +662,11 @@ static inline bool mv88e6xxx_has_pvt(struct mv88e6xxx_chip *chip) return chip->info->pvt; } +static inline bool mv88e6xxx_has_lag(struct mv88e6xxx_chip *chip) +{ + return !!chip->info->global2_addr; +} + static inline unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip) { return chip->info->num_databases; -- GitLab From 297af515d75f5cd20b012696cee5a4279fd2835c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 13 Jan 2021 21:25:18 +0100 Subject: [PATCH 0915/2012] netxen_nic: switch from 'pci_' to 'dma_' API The wrappers in include/linux/pci-dma-compat.h should go away. The patch has been generated with the coccinelle script below and has been hand modified to replace GFP_ with a correct flag. It has been compile tested. When memory is allocated in 'netxen_get_minidump_template()' GFP_KERNEL can be used because its only caller, ' netxen_setup_minidump(()' already uses it and no lock is acquired in the between. When memory is allocated in other function in 'netxen_nic_ctx.c' GFP_KERNEL can be used because the call chain already uses GFP_KERNEL and no lock is taken in the between. The call chain is: netxen_nic_attach() --> netxen_alloc_sw_resources() : already uses GFP_KERNEL --> netxen_alloc_hw_resources() --> nx_fw_cmd_create_rx_ctx() --> nx_fw_cmd_create_tx_ctx() When memory is allocated in 'netxen_init_dummy_dma()' GFP_KERNEL can be used because its only call chain already uses it and no lock is acquired in the between. The call chain is: --> netxen_start_firmware --> netxen_request_firmware() --> request_firmware() --> _request_firmware(() --> fw_get_filesystem_firmware() --> __getname() : already uses GFP_KERNEL --> netxen_init_dummy_dma() @@ @@ - PCI_DMA_BIDIRECTIONAL + DMA_BIDIRECTIONAL @@ @@ - PCI_DMA_TODEVICE + DMA_TO_DEVICE @@ @@ - PCI_DMA_FROMDEVICE + DMA_FROM_DEVICE @@ @@ - PCI_DMA_NONE + DMA_NONE @@ expression e1, e2, e3; @@ - pci_alloc_consistent(e1, e2, e3) + dma_alloc_coherent(&e1->dev, e2, e3, GFP_) @@ expression e1, e2, e3; @@ - pci_zalloc_consistent(e1, e2, e3) + dma_alloc_coherent(&e1->dev, e2, e3, GFP_) @@ expression e1, e2, e3, e4; @@ - pci_free_consistent(e1, e2, e3, e4) + dma_free_coherent(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_map_single(e1, e2, e3, e4) + dma_map_single(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_unmap_single(e1, e2, e3, e4) + dma_unmap_single(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4, e5; @@ - pci_map_page(e1, e2, e3, e4, e5) + dma_map_page(&e1->dev, e2, e3, e4, e5) @@ expression e1, e2, e3, e4; @@ - pci_unmap_page(e1, e2, e3, e4) + dma_unmap_page(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_map_sg(e1, e2, e3, e4) + dma_map_sg(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_unmap_sg(e1, e2, e3, e4) + dma_unmap_sg(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_single_for_cpu(e1, e2, e3, e4) + dma_sync_single_for_cpu(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_single_for_device(e1, e2, e3, e4) + dma_sync_single_for_device(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_sg_for_cpu(e1, e2, e3, e4) + dma_sync_sg_for_cpu(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_sg_for_device(e1, e2, e3, e4) + dma_sync_sg_for_device(&e1->dev, e2, e3, e4) @@ expression e1, e2; @@ - pci_dma_mapping_error(e1, e2) + dma_mapping_error(&e1->dev, e2) @@ expression e1, e2; @@ - pci_set_dma_mask(e1, e2) + dma_set_mask(&e1->dev, e2) @@ expression e1, e2; @@ - pci_set_consistent_dma_mask(e1, e2) + dma_set_coherent_mask(&e1->dev, e2) Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20210113202519.487672-1-christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- .../ethernet/qlogic/netxen/netxen_nic_ctx.c | 83 ++++++++++--------- .../ethernet/qlogic/netxen/netxen_nic_init.c | 49 ++++++----- .../ethernet/qlogic/netxen/netxen_nic_main.c | 22 ++--- 3 files changed, 77 insertions(+), 77 deletions(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c index 5e9f8ee998000..2fcbcecb41d17 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c @@ -113,7 +113,8 @@ netxen_get_minidump_template(struct netxen_adapter *adapter) return NX_RCODE_INVALID_ARGS; } - addr = pci_zalloc_consistent(adapter->pdev, size, &md_template_addr); + addr = dma_alloc_coherent(&adapter->pdev->dev, size, + &md_template_addr, GFP_KERNEL); if (!addr) { dev_err(&adapter->pdev->dev, "Unable to allocate dmable memory for template.\n"); return -ENOMEM; @@ -133,7 +134,7 @@ netxen_get_minidump_template(struct netxen_adapter *adapter) dev_err(&adapter->pdev->dev, "Failed to get minidump template, err_code : %d, requested_size : %d, actual_size : %d\n", cmd.rsp.cmd, size, cmd.rsp.arg2); } - pci_free_consistent(adapter->pdev, size, addr, md_template_addr); + dma_free_coherent(&adapter->pdev->dev, size, addr, md_template_addr); return 0; } @@ -281,14 +282,14 @@ nx_fw_cmd_create_rx_ctx(struct netxen_adapter *adapter) rsp_size = SIZEOF_CARDRSP_RX(nx_cardrsp_rx_ctx_t, nrds_rings, nsds_rings); - addr = pci_alloc_consistent(adapter->pdev, - rq_size, &hostrq_phys_addr); + addr = dma_alloc_coherent(&adapter->pdev->dev, rq_size, + &hostrq_phys_addr, GFP_KERNEL); if (addr == NULL) return -ENOMEM; prq = addr; - addr = pci_alloc_consistent(adapter->pdev, - rsp_size, &cardrsp_phys_addr); + addr = dma_alloc_coherent(&adapter->pdev->dev, rsp_size, + &cardrsp_phys_addr, GFP_KERNEL); if (addr == NULL) { err = -ENOMEM; goto out_free_rq; @@ -387,9 +388,10 @@ nx_fw_cmd_create_rx_ctx(struct netxen_adapter *adapter) recv_ctx->virt_port = prsp->virt_port; out_free_rsp: - pci_free_consistent(adapter->pdev, rsp_size, prsp, cardrsp_phys_addr); + dma_free_coherent(&adapter->pdev->dev, rsp_size, prsp, + cardrsp_phys_addr); out_free_rq: - pci_free_consistent(adapter->pdev, rq_size, prq, hostrq_phys_addr); + dma_free_coherent(&adapter->pdev->dev, rq_size, prq, hostrq_phys_addr); return err; } @@ -429,14 +431,14 @@ nx_fw_cmd_create_tx_ctx(struct netxen_adapter *adapter) struct netxen_cmd_args cmd; rq_size = SIZEOF_HOSTRQ_TX(nx_hostrq_tx_ctx_t); - rq_addr = pci_alloc_consistent(adapter->pdev, - rq_size, &rq_phys_addr); + rq_addr = dma_alloc_coherent(&adapter->pdev->dev, rq_size, + &rq_phys_addr, GFP_KERNEL); if (!rq_addr) return -ENOMEM; rsp_size = SIZEOF_CARDRSP_TX(nx_cardrsp_tx_ctx_t); - rsp_addr = pci_alloc_consistent(adapter->pdev, - rsp_size, &rsp_phys_addr); + rsp_addr = dma_alloc_coherent(&adapter->pdev->dev, rsp_size, + &rsp_phys_addr, GFP_KERNEL); if (!rsp_addr) { err = -ENOMEM; goto out_free_rq; @@ -491,10 +493,11 @@ nx_fw_cmd_create_tx_ctx(struct netxen_adapter *adapter) err = -EIO; } - pci_free_consistent(adapter->pdev, rsp_size, rsp_addr, rsp_phys_addr); + dma_free_coherent(&adapter->pdev->dev, rsp_size, rsp_addr, + rsp_phys_addr); out_free_rq: - pci_free_consistent(adapter->pdev, rq_size, rq_addr, rq_phys_addr); + dma_free_coherent(&adapter->pdev->dev, rq_size, rq_addr, rq_phys_addr); return err; } @@ -745,9 +748,9 @@ int netxen_alloc_hw_resources(struct netxen_adapter *adapter) recv_ctx = &adapter->recv_ctx; tx_ring = adapter->tx_ring; - addr = pci_alloc_consistent(pdev, - sizeof(struct netxen_ring_ctx) + sizeof(uint32_t), - &recv_ctx->phys_addr); + addr = dma_alloc_coherent(&pdev->dev, + sizeof(struct netxen_ring_ctx) + sizeof(uint32_t), + &recv_ctx->phys_addr, GFP_KERNEL); if (addr == NULL) { dev_err(&pdev->dev, "failed to allocate hw context\n"); return -ENOMEM; @@ -762,8 +765,8 @@ int netxen_alloc_hw_resources(struct netxen_adapter *adapter) (__le32 *)(((char *)addr) + sizeof(struct netxen_ring_ctx)); /* cmd desc ring */ - addr = pci_alloc_consistent(pdev, TX_DESC_RINGSIZE(tx_ring), - &tx_ring->phys_addr); + addr = dma_alloc_coherent(&pdev->dev, TX_DESC_RINGSIZE(tx_ring), + &tx_ring->phys_addr, GFP_KERNEL); if (addr == NULL) { dev_err(&pdev->dev, "%s: failed to allocate tx desc ring\n", @@ -776,9 +779,9 @@ int netxen_alloc_hw_resources(struct netxen_adapter *adapter) for (ring = 0; ring < adapter->max_rds_rings; ring++) { rds_ring = &recv_ctx->rds_rings[ring]; - addr = pci_alloc_consistent(adapter->pdev, - RCV_DESC_RINGSIZE(rds_ring), - &rds_ring->phys_addr); + addr = dma_alloc_coherent(&adapter->pdev->dev, + RCV_DESC_RINGSIZE(rds_ring), + &rds_ring->phys_addr, GFP_KERNEL); if (addr == NULL) { dev_err(&pdev->dev, "%s: failed to allocate rds ring [%d]\n", @@ -797,9 +800,9 @@ int netxen_alloc_hw_resources(struct netxen_adapter *adapter) for (ring = 0; ring < adapter->max_sds_rings; ring++) { sds_ring = &recv_ctx->sds_rings[ring]; - addr = pci_alloc_consistent(adapter->pdev, - STATUS_DESC_RINGSIZE(sds_ring), - &sds_ring->phys_addr); + addr = dma_alloc_coherent(&adapter->pdev->dev, + STATUS_DESC_RINGSIZE(sds_ring), + &sds_ring->phys_addr, GFP_KERNEL); if (addr == NULL) { dev_err(&pdev->dev, "%s: failed to allocate sds ring [%d]\n", @@ -874,19 +877,17 @@ done: recv_ctx = &adapter->recv_ctx; if (recv_ctx->hwctx != NULL) { - pci_free_consistent(adapter->pdev, - sizeof(struct netxen_ring_ctx) + - sizeof(uint32_t), - recv_ctx->hwctx, - recv_ctx->phys_addr); + dma_free_coherent(&adapter->pdev->dev, + sizeof(struct netxen_ring_ctx) + sizeof(uint32_t), + recv_ctx->hwctx, recv_ctx->phys_addr); recv_ctx->hwctx = NULL; } tx_ring = adapter->tx_ring; if (tx_ring->desc_head != NULL) { - pci_free_consistent(adapter->pdev, - TX_DESC_RINGSIZE(tx_ring), - tx_ring->desc_head, tx_ring->phys_addr); + dma_free_coherent(&adapter->pdev->dev, + TX_DESC_RINGSIZE(tx_ring), + tx_ring->desc_head, tx_ring->phys_addr); tx_ring->desc_head = NULL; } @@ -894,10 +895,10 @@ done: rds_ring = &recv_ctx->rds_rings[ring]; if (rds_ring->desc_head != NULL) { - pci_free_consistent(adapter->pdev, - RCV_DESC_RINGSIZE(rds_ring), - rds_ring->desc_head, - rds_ring->phys_addr); + dma_free_coherent(&adapter->pdev->dev, + RCV_DESC_RINGSIZE(rds_ring), + rds_ring->desc_head, + rds_ring->phys_addr); rds_ring->desc_head = NULL; } } @@ -906,10 +907,10 @@ done: sds_ring = &recv_ctx->sds_rings[ring]; if (sds_ring->desc_head != NULL) { - pci_free_consistent(adapter->pdev, - STATUS_DESC_RINGSIZE(sds_ring), - sds_ring->desc_head, - sds_ring->phys_addr); + dma_free_coherent(&adapter->pdev->dev, + STATUS_DESC_RINGSIZE(sds_ring), + sds_ring->desc_head, + sds_ring->phys_addr); sds_ring->desc_head = NULL; } } diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c index 94546ed5f8675..08f9477d2ee84 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c @@ -102,10 +102,8 @@ void netxen_release_rx_buffers(struct netxen_adapter *adapter) rx_buf = &(rds_ring->rx_buf_arr[i]); if (rx_buf->state == NETXEN_BUFFER_FREE) continue; - pci_unmap_single(adapter->pdev, - rx_buf->dma, - rds_ring->dma_size, - PCI_DMA_FROMDEVICE); + dma_unmap_single(&adapter->pdev->dev, rx_buf->dma, + rds_ring->dma_size, DMA_FROM_DEVICE); if (rx_buf->skb != NULL) dev_kfree_skb_any(rx_buf->skb); } @@ -124,16 +122,16 @@ void netxen_release_tx_buffers(struct netxen_adapter *adapter) for (i = 0; i < tx_ring->num_desc; i++) { buffrag = cmd_buf->frag_array; if (buffrag->dma) { - pci_unmap_single(adapter->pdev, buffrag->dma, - buffrag->length, PCI_DMA_TODEVICE); + dma_unmap_single(&adapter->pdev->dev, buffrag->dma, + buffrag->length, DMA_TO_DEVICE); buffrag->dma = 0ULL; } for (j = 1; j < cmd_buf->frag_count; j++) { buffrag++; if (buffrag->dma) { - pci_unmap_page(adapter->pdev, buffrag->dma, - buffrag->length, - PCI_DMA_TODEVICE); + dma_unmap_page(&adapter->pdev->dev, + buffrag->dma, buffrag->length, + DMA_TO_DEVICE); buffrag->dma = 0ULL; } } @@ -1250,9 +1248,10 @@ int netxen_init_dummy_dma(struct netxen_adapter *adapter) if (!NX_IS_REVISION_P2(adapter->ahw.revision_id)) return 0; - adapter->dummy_dma.addr = pci_alloc_consistent(adapter->pdev, - NETXEN_HOST_DUMMY_DMA_SIZE, - &adapter->dummy_dma.phys_addr); + adapter->dummy_dma.addr = dma_alloc_coherent(&adapter->pdev->dev, + NETXEN_HOST_DUMMY_DMA_SIZE, + &adapter->dummy_dma.phys_addr, + GFP_KERNEL); if (adapter->dummy_dma.addr == NULL) { dev_err(&adapter->pdev->dev, "ERROR: Could not allocate dummy DMA memory\n"); @@ -1304,10 +1303,10 @@ void netxen_free_dummy_dma(struct netxen_adapter *adapter) } if (i) { - pci_free_consistent(adapter->pdev, - NETXEN_HOST_DUMMY_DMA_SIZE, - adapter->dummy_dma.addr, - adapter->dummy_dma.phys_addr); + dma_free_coherent(&adapter->pdev->dev, + NETXEN_HOST_DUMMY_DMA_SIZE, + adapter->dummy_dma.addr, + adapter->dummy_dma.phys_addr); adapter->dummy_dma.addr = NULL; } else dev_err(&adapter->pdev->dev, "dma_watchdog_shutdown failed\n"); @@ -1467,10 +1466,10 @@ netxen_alloc_rx_skb(struct netxen_adapter *adapter, if (!adapter->ahw.cut_through) skb_reserve(skb, 2); - dma = pci_map_single(pdev, skb->data, - rds_ring->dma_size, PCI_DMA_FROMDEVICE); + dma = dma_map_single(&pdev->dev, skb->data, rds_ring->dma_size, + DMA_FROM_DEVICE); - if (pci_dma_mapping_error(pdev, dma)) { + if (dma_mapping_error(&pdev->dev, dma)) { dev_kfree_skb_any(skb); buffer->skb = NULL; return 1; @@ -1491,8 +1490,8 @@ static struct sk_buff *netxen_process_rxbuf(struct netxen_adapter *adapter, buffer = &rds_ring->rx_buf_arr[index]; - pci_unmap_single(adapter->pdev, buffer->dma, rds_ring->dma_size, - PCI_DMA_FROMDEVICE); + dma_unmap_single(&adapter->pdev->dev, buffer->dma, rds_ring->dma_size, + DMA_FROM_DEVICE); skb = buffer->skb; if (!skb) @@ -1754,13 +1753,13 @@ int netxen_process_cmd_ring(struct netxen_adapter *adapter) buffer = &tx_ring->cmd_buf_arr[sw_consumer]; if (buffer->skb) { frag = &buffer->frag_array[0]; - pci_unmap_single(pdev, frag->dma, frag->length, - PCI_DMA_TODEVICE); + dma_unmap_single(&pdev->dev, frag->dma, frag->length, + DMA_TO_DEVICE); frag->dma = 0ULL; for (i = 1; i < buffer->frag_count; i++) { frag++; /* Get the next frag */ - pci_unmap_page(pdev, frag->dma, frag->length, - PCI_DMA_TODEVICE); + dma_unmap_page(&pdev->dev, frag->dma, + frag->length, DMA_TO_DEVICE); frag->dma = 0ULL; } diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index d258e0ccf9465..7e6bac85495d3 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -243,8 +243,8 @@ static int nx_set_dma_mask(struct netxen_adapter *adapter) cmask = mask; } - if (pci_set_dma_mask(pdev, mask) == 0 && - pci_set_consistent_dma_mask(pdev, cmask) == 0) { + if (dma_set_mask(&pdev->dev, mask) == 0 && + dma_set_coherent_mask(&pdev->dev, cmask) == 0) { adapter->pci_using_dac = 1; return 0; } @@ -277,13 +277,13 @@ nx_update_dma_mask(struct netxen_adapter *adapter) mask = DMA_BIT_MASK(32+shift); - err = pci_set_dma_mask(pdev, mask); + err = dma_set_mask(&pdev->dev, mask); if (err) goto err_out; if (NX_IS_REVISION_P3(adapter->ahw.revision_id)) { - err = pci_set_consistent_dma_mask(pdev, mask); + err = dma_set_coherent_mask(&pdev->dev, mask); if (err) goto err_out; } @@ -293,8 +293,8 @@ nx_update_dma_mask(struct netxen_adapter *adapter) return 0; err_out: - pci_set_dma_mask(pdev, old_mask); - pci_set_consistent_dma_mask(pdev, old_cmask); + dma_set_mask(&pdev->dev, old_mask); + dma_set_coherent_mask(&pdev->dev, old_cmask); return err; } @@ -1978,9 +1978,9 @@ netxen_map_tx_skb(struct pci_dev *pdev, nr_frags = skb_shinfo(skb)->nr_frags; nf = &pbuf->frag_array[0]; - map = pci_map_single(pdev, skb->data, - skb_headlen(skb), PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(pdev, map)) + map = dma_map_single(&pdev->dev, skb->data, skb_headlen(skb), + DMA_TO_DEVICE); + if (dma_mapping_error(&pdev->dev, map)) goto out_err; nf->dma = map; @@ -2004,12 +2004,12 @@ netxen_map_tx_skb(struct pci_dev *pdev, unwind: while (--i >= 0) { nf = &pbuf->frag_array[i+1]; - pci_unmap_page(pdev, nf->dma, nf->length, PCI_DMA_TODEVICE); + dma_unmap_page(&pdev->dev, nf->dma, nf->length, DMA_TO_DEVICE); nf->dma = 0ULL; } nf = &pbuf->frag_array[0]; - pci_unmap_single(pdev, nf->dma, skb_headlen(skb), PCI_DMA_TODEVICE); + dma_unmap_single(&pdev->dev, nf->dma, skb_headlen(skb), DMA_TO_DEVICE); nf->dma = 0ULL; out_err: -- GitLab From ebfd44883ab5dd9a201af2d936e1dfb93962be0b Mon Sep 17 00:00:00 2001 From: David Gow Date: Fri, 11 Dec 2020 14:32:32 -0800 Subject: [PATCH 0916/2012] kunit: tool: Fix spelling of "diagnostic" in kunit_parser Various helper functions were misspelling "diagnostic" in their names. It finally got annoying, so fix it. Signed-off-by: David Gow Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 6614ec4d08989..1a1e1d13f1d30 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -97,11 +97,11 @@ def print_log(log): TAP_ENTRIES = re.compile(r'^(TAP|[\s]*ok|[\s]*not ok|[\s]*[0-9]+\.\.[0-9]+|[\s]*#).*$') -def consume_non_diagnositic(lines: List[str]) -> None: +def consume_non_diagnostic(lines: List[str]) -> None: while lines and not TAP_ENTRIES.match(lines[0]): lines.pop(0) -def save_non_diagnositic(lines: List[str], test_case: TestCase) -> None: +def save_non_diagnostic(lines: List[str], test_case: TestCase) -> None: while lines and not TAP_ENTRIES.match(lines[0]): test_case.log.append(lines[0]) lines.pop(0) @@ -113,7 +113,7 @@ OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$') OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) ([0-9]+) - (.*)$') def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool: - save_non_diagnositic(lines, test_case) + save_non_diagnostic(lines, test_case) if not lines: test_case.status = TestStatus.TEST_CRASHED return True @@ -139,7 +139,7 @@ SUBTEST_DIAGNOSTIC = re.compile(r'^[\s]+# (.*)$') DIAGNOSTIC_CRASH_MESSAGE = re.compile(r'^[\s]+# .*?: kunit test case crashed!$') def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool: - save_non_diagnositic(lines, test_case) + save_non_diagnostic(lines, test_case) if not lines: return False line = lines[0] @@ -155,7 +155,7 @@ def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool: def parse_test_case(lines: List[str]) -> Optional[TestCase]: test_case = TestCase() - save_non_diagnositic(lines, test_case) + save_non_diagnostic(lines, test_case) while parse_diagnostic(lines, test_case): pass if parse_ok_not_ok_test_case(lines, test_case): @@ -166,7 +166,7 @@ def parse_test_case(lines: List[str]) -> Optional[TestCase]: SUBTEST_HEADER = re.compile(r'^[\s]+# Subtest: (.*)$') def parse_subtest_header(lines: List[str]) -> Optional[str]: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) if not lines: return None match = SUBTEST_HEADER.match(lines[0]) @@ -179,7 +179,7 @@ def parse_subtest_header(lines: List[str]) -> Optional[str]: SUBTEST_PLAN = re.compile(r'[\s]+[0-9]+\.\.([0-9]+)') def parse_subtest_plan(lines: List[str]) -> Optional[int]: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) match = SUBTEST_PLAN.match(lines[0]) if match: lines.pop(0) @@ -202,7 +202,7 @@ def max_status(left: TestStatus, right: TestStatus) -> TestStatus: def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite, expected_suite_index: int) -> bool: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) if not lines: test_suite.status = TestStatus.TEST_CRASHED return False @@ -235,7 +235,7 @@ def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus: def parse_test_suite(lines: List[str], expected_suite_index: int) -> Optional[TestSuite]: if not lines: return None - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) test_suite = TestSuite() test_suite.status = TestStatus.SUCCESS name = parse_subtest_header(lines) @@ -264,7 +264,7 @@ def parse_test_suite(lines: List[str], expected_suite_index: int) -> Optional[Te TAP_HEADER = re.compile(r'^TAP version 14$') def parse_tap_header(lines: List[str]) -> bool: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) if TAP_HEADER.match(lines[0]): lines.pop(0) return True @@ -274,7 +274,7 @@ def parse_tap_header(lines: List[str]) -> bool: TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)') def parse_test_plan(lines: List[str]) -> Optional[int]: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) match = TEST_PLAN.match(lines[0]) if match: lines.pop(0) @@ -286,7 +286,7 @@ def bubble_up_suite_errors(test_suite_list: List[TestSuite]) -> TestStatus: return bubble_up_errors(lambda x: x.status, test_suite_list) def parse_test_result(lines: List[str]) -> TestResult: - consume_non_diagnositic(lines) + consume_non_diagnostic(lines) if not lines or not parse_tap_header(lines): return TestResult(TestStatus.NO_TESTS, [], lines) expected_test_suite_num = parse_test_plan(lines) -- GitLab From 8db50be262e9faf59fa0feb74599c29b64eb0af2 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Tue, 15 Dec 2020 16:22:46 -0800 Subject: [PATCH 0917/2012] Documentation: kunit: include example of a parameterized test Commit fadb08e7c750 ("kunit: Support for Parameterized Testing") introduced support but lacks documentation for how to use it. This patch builds on commit 1f0e943df68a ("Documentation: kunit: provide guidance for testing many inputs") to show a minimal example of the new feature. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Tested-by: Brendan Higgins Acked-by: Brendan Higgins Signed-off-by: Shuah Khan --- Documentation/dev-tools/kunit/usage.rst | 57 +++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst index d9fdc14f0677b..650f99590df57 100644 --- a/Documentation/dev-tools/kunit/usage.rst +++ b/Documentation/dev-tools/kunit/usage.rst @@ -522,6 +522,63 @@ There's more boilerplate involved, but it can: * E.g. if we wanted to also test ``sha256sum``, we could add a ``sha256`` field and reuse ``cases``. +* be converted to a "parameterized test", see below. + +Parameterized Testing +~~~~~~~~~~~~~~~~~~~~~ + +The table-driven testing pattern is common enough that KUnit has special +support for it. + +Reusing the same ``cases`` array from above, we can write the test as a +"parameterized test" with the following. + +.. code-block:: c + + // This is copy-pasted from above. + struct sha1_test_case { + const char *str; + const char *sha1; + }; + struct sha1_test_case cases[] = { + { + .str = "hello world", + .sha1 = "2aae6c35c94fcfb415dbe95f408b9ce91ee846ed", + }, + { + .str = "hello world!", + .sha1 = "430ce34d020724ed75a196dfc2ad67c77772d169", + }, + }; + + // Need a helper function to generate a name for each test case. + static void case_to_desc(const struct sha1_test_case *t, char *desc) + { + strcpy(desc, t->str); + } + // Creates `sha1_gen_params()` to iterate over `cases`. + KUNIT_ARRAY_PARAM(sha1, cases, case_to_desc); + + // Looks no different from a normal test. + static void sha1_test(struct kunit *test) + { + // This function can just contain the body of the for-loop. + // The former `cases[i]` is accessible under test->param_value. + char out[40]; + struct sha1_test_case *test_param = (struct sha1_test_case *)(test->param_value); + + sha1sum(test_param->str, out); + KUNIT_EXPECT_STREQ_MSG(test, (char *)out, test_param->sha1, + "sha1sum(%s)", test_param->str); + } + + // Instead of KUNIT_CASE, we use KUNIT_CASE_PARAM and pass in the + // function declared by KUNIT_ARRAY_PARAM. + static struct kunit_case sha1_test_cases[] = { + KUNIT_CASE_PARAM(sha1_test, sha1_gen_params), + {} + }; + .. _kunit-on-non-uml: KUnit on non-UML architectures -- GitLab From 09641f7c7d8f1309fe9ad9ce4e6a1697016d73ba Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Thu, 14 Jan 2021 16:39:11 -0800 Subject: [PATCH 0918/2012] kunit: tool: surface and address more typing issues The authors of this tool were more familiar with a different type-checker, https://github.com/google/pytype. That's open source, but mypy seems more prevalent (and runs faster). And unlike pytype, mypy doesn't try to infer types so it doesn't check unanotated functions. So annotate ~all functions in kunit tool to increase type-checking coverage. Note: per https://www.python.org/dev/peps/pep-0484/, `__init__()` should be annotated as `-> None`. Doing so makes mypy discover a number of new violations. Exclude main() since we reuse `request` for the different types of requests, which mypy isn't happy about. This commit fixes all but one error, where `TestSuite.status` might be None. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Tested-by: Brendan Higgins Acked-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 14 ++++----- tools/testing/kunit/kunit_config.py | 7 +++-- tools/testing/kunit/kunit_json.py | 2 +- tools/testing/kunit/kunit_kernel.py | 37 ++++++++++++----------- tools/testing/kunit/kunit_parser.py | 46 ++++++++++++++--------------- 5 files changed, 54 insertions(+), 52 deletions(-) diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 21516e293d171..5521e0a8201e3 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -43,9 +43,9 @@ class KunitStatus(Enum): BUILD_FAILURE = auto() TEST_FAILURE = auto() -def get_kernel_root_path(): - parts = sys.argv[0] if not __file__ else __file__ - parts = os.path.realpath(parts).split('tools/testing/kunit') +def get_kernel_root_path() -> str: + path = sys.argv[0] if not __file__ else __file__ + parts = os.path.realpath(path).split('tools/testing/kunit') if len(parts) != 2: sys.exit(1) return parts[0] @@ -171,7 +171,7 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, exec_result.elapsed_time)) return parse_result -def add_common_opts(parser): +def add_common_opts(parser) -> None: parser.add_argument('--build_dir', help='As in the make command, it specifies the build ' 'directory.', @@ -183,13 +183,13 @@ def add_common_opts(parser): help='Run all KUnit tests through allyesconfig', action='store_true') -def add_build_opts(parser): +def add_build_opts(parser) -> None: parser.add_argument('--jobs', help='As in the make command, "Specifies the number of ' 'jobs (commands) to run simultaneously."', type=int, default=8, metavar='jobs') -def add_exec_opts(parser): +def add_exec_opts(parser) -> None: parser.add_argument('--timeout', help='maximum number of seconds to allow for all tests ' 'to run. This does not include time taken to build the ' @@ -198,7 +198,7 @@ def add_exec_opts(parser): default=300, metavar='timeout') -def add_parse_opts(parser): +def add_parse_opts(parser) -> None: parser.add_argument('--raw_output', help='don\'t format output from kernel', action='store_true') parser.add_argument('--json', diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py index 02ffc3a3e5dc7..bdd60230764b0 100644 --- a/tools/testing/kunit/kunit_config.py +++ b/tools/testing/kunit/kunit_config.py @@ -8,6 +8,7 @@ import collections import re +from typing import List, Set CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$' CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$' @@ -30,10 +31,10 @@ class KconfigParseError(Exception): class Kconfig(object): """Represents defconfig or .config specified using the Kconfig language.""" - def __init__(self): - self._entries = [] + def __init__(self) -> None: + self._entries = [] # type: List[KconfigEntry] - def entries(self): + def entries(self) -> Set[KconfigEntry]: return set(self._entries) def add_entry(self, entry: KconfigEntry) -> None: diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py index 624b31b2dbd62..f5cca5c38cacf 100644 --- a/tools/testing/kunit/kunit_json.py +++ b/tools/testing/kunit/kunit_json.py @@ -13,7 +13,7 @@ import kunit_parser from kunit_parser import TestStatus -def get_json_result(test_result, def_config, build_dir, json_path): +def get_json_result(test_result, def_config, build_dir, json_path) -> str: sub_groups = [] # Each test suite is mapped to a KernelCI sub_group diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 698358c9c0d60..e77ee06aa4076 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -11,6 +11,7 @@ import subprocess import os import shutil import signal +from typing import Iterator from contextlib import ExitStack @@ -39,7 +40,7 @@ class BuildError(Exception): class LinuxSourceTreeOperations(object): """An abstraction over command line operations performed on a source tree.""" - def make_mrproper(self): + def make_mrproper(self) -> None: try: subprocess.check_output(['make', 'mrproper'], stderr=subprocess.STDOUT) except OSError as e: @@ -47,7 +48,7 @@ class LinuxSourceTreeOperations(object): except subprocess.CalledProcessError as e: raise ConfigError(e.output.decode()) - def make_olddefconfig(self, build_dir, make_options): + def make_olddefconfig(self, build_dir, make_options) -> None: command = ['make', 'ARCH=um', 'olddefconfig'] if make_options: command.extend(make_options) @@ -60,7 +61,7 @@ class LinuxSourceTreeOperations(object): except subprocess.CalledProcessError as e: raise ConfigError(e.output.decode()) - def make_allyesconfig(self, build_dir, make_options): + def make_allyesconfig(self, build_dir, make_options) -> None: kunit_parser.print_with_timestamp( 'Enabling all CONFIGs for UML...') command = ['make', 'ARCH=um', 'allyesconfig'] @@ -82,7 +83,7 @@ class LinuxSourceTreeOperations(object): kunit_parser.print_with_timestamp( 'Starting Kernel with all configs takes a few minutes...') - def make(self, jobs, build_dir, make_options): + def make(self, jobs, build_dir, make_options) -> None: command = ['make', 'ARCH=um', '--jobs=' + str(jobs)] if make_options: command.extend(make_options) @@ -100,7 +101,7 @@ class LinuxSourceTreeOperations(object): if stderr: # likely only due to build warnings print(stderr.decode()) - def linux_bin(self, params, timeout, build_dir): + def linux_bin(self, params, timeout, build_dir) -> None: """Runs the Linux UML binary. Must be named 'linux'.""" linux_bin = get_file_path(build_dir, 'linux') outfile = get_outfile_path(build_dir) @@ -110,23 +111,23 @@ class LinuxSourceTreeOperations(object): stderr=subprocess.STDOUT) process.wait(timeout) -def get_kconfig_path(build_dir): +def get_kconfig_path(build_dir) -> str: return get_file_path(build_dir, KCONFIG_PATH) -def get_kunitconfig_path(build_dir): +def get_kunitconfig_path(build_dir) -> str: return get_file_path(build_dir, KUNITCONFIG_PATH) -def get_outfile_path(build_dir): +def get_outfile_path(build_dir) -> str: return get_file_path(build_dir, OUTFILE_PATH) class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" - def __init__(self): + def __init__(self) -> None: self._ops = LinuxSourceTreeOperations() signal.signal(signal.SIGINT, self.signal_handler) - def clean(self): + def clean(self) -> bool: try: self._ops.make_mrproper() except ConfigError as e: @@ -134,17 +135,17 @@ class LinuxSourceTree(object): return False return True - def create_kunitconfig(self, build_dir, defconfig=DEFAULT_KUNITCONFIG_PATH): + def create_kunitconfig(self, build_dir, defconfig=DEFAULT_KUNITCONFIG_PATH) -> None: kunitconfig_path = get_kunitconfig_path(build_dir) if not os.path.exists(kunitconfig_path): shutil.copyfile(defconfig, kunitconfig_path) - def read_kunitconfig(self, build_dir): + def read_kunitconfig(self, build_dir) -> None: kunitconfig_path = get_kunitconfig_path(build_dir) self._kconfig = kunit_config.Kconfig() self._kconfig.read_from_file(kunitconfig_path) - def validate_config(self, build_dir): + def validate_config(self, build_dir) -> bool: kconfig_path = get_kconfig_path(build_dir) validated_kconfig = kunit_config.Kconfig() validated_kconfig.read_from_file(kconfig_path) @@ -158,7 +159,7 @@ class LinuxSourceTree(object): return False return True - def build_config(self, build_dir, make_options): + def build_config(self, build_dir, make_options) -> bool: kconfig_path = get_kconfig_path(build_dir) if build_dir and not os.path.exists(build_dir): os.mkdir(build_dir) @@ -170,7 +171,7 @@ class LinuxSourceTree(object): return False return self.validate_config(build_dir) - def build_reconfig(self, build_dir, make_options): + def build_reconfig(self, build_dir, make_options) -> bool: """Creates a new .config if it is not a subset of the .kunitconfig.""" kconfig_path = get_kconfig_path(build_dir) if os.path.exists(kconfig_path): @@ -186,7 +187,7 @@ class LinuxSourceTree(object): print('Generating .config ...') return self.build_config(build_dir, make_options) - def build_um_kernel(self, alltests, jobs, build_dir, make_options): + def build_um_kernel(self, alltests, jobs, build_dir, make_options) -> bool: try: if alltests: self._ops.make_allyesconfig(build_dir, make_options) @@ -197,7 +198,7 @@ class LinuxSourceTree(object): return False return self.validate_config(build_dir) - def run_kernel(self, args=[], build_dir='', timeout=None): + def run_kernel(self, args=[], build_dir='', timeout=None) -> Iterator[str]: args.extend(['mem=1G', 'console=tty']) self._ops.linux_bin(args, timeout, build_dir) outfile = get_outfile_path(build_dir) @@ -206,6 +207,6 @@ class LinuxSourceTree(object): for line in file: yield line - def signal_handler(self, sig, frame): + def signal_handler(self, sig, frame) -> None: logging.error('Build interruption occurred. Cleaning console.') subprocess.call(['stty', 'sane']) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 1a1e1d13f1d30..bd90d7b86b76c 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -12,32 +12,32 @@ from collections import namedtuple from datetime import datetime from enum import Enum, auto from functools import reduce -from typing import List, Optional, Tuple +from typing import Iterator, List, Optional, Tuple TestResult = namedtuple('TestResult', ['status','suites','log']) class TestSuite(object): - def __init__(self): - self.status = None - self.name = None - self.cases = [] + def __init__(self) -> None: + self.status = None # type: Optional[TestStatus] + self.name = '' + self.cases = [] # type: List[TestCase] - def __str__(self): - return 'TestSuite(' + self.status + ',' + self.name + ',' + str(self.cases) + ')' + def __str__(self) -> str: + return 'TestSuite(' + str(self.status) + ',' + self.name + ',' + str(self.cases) + ')' - def __repr__(self): + def __repr__(self) -> str: return str(self) class TestCase(object): - def __init__(self): - self.status = None + def __init__(self) -> None: + self.status = None # type: Optional[TestStatus] self.name = '' - self.log = [] + self.log = [] # type: List[str] - def __str__(self): - return 'TestCase(' + self.status + ',' + self.name + ',' + str(self.log) + ')' + def __str__(self) -> str: + return 'TestCase(' + str(self.status) + ',' + self.name + ',' + str(self.log) + ')' - def __repr__(self): + def __repr__(self) -> str: return str(self) class TestStatus(Enum): @@ -51,7 +51,7 @@ kunit_start_re = re.compile(r'TAP version [0-9]+$') kunit_end_re = re.compile('(List of all partitions:|' 'Kernel panic - not syncing: VFS:)') -def isolate_kunit_output(kernel_output): +def isolate_kunit_output(kernel_output) -> Iterator[str]: started = False for line in kernel_output: line = line.rstrip() # line always has a trailing \n @@ -64,7 +64,7 @@ def isolate_kunit_output(kernel_output): elif started: yield line[prefix_len:] if prefix_len > 0 else line -def raw_output(kernel_output): +def raw_output(kernel_output) -> None: for line in kernel_output: print(line.rstrip()) @@ -72,26 +72,26 @@ DIVIDER = '=' * 60 RESET = '\033[0;0m' -def red(text): +def red(text) -> str: return '\033[1;31m' + text + RESET -def yellow(text): +def yellow(text) -> str: return '\033[1;33m' + text + RESET -def green(text): +def green(text) -> str: return '\033[1;32m' + text + RESET -def print_with_timestamp(message): +def print_with_timestamp(message) -> None: print('[%s] %s' % (datetime.now().strftime('%H:%M:%S'), message)) -def format_suite_divider(message): +def format_suite_divider(message) -> str: return '======== ' + message + ' ========' -def print_suite_divider(message): +def print_suite_divider(message) -> None: print_with_timestamp(DIVIDER) print_with_timestamp(format_suite_divider(message)) -def print_log(log): +def print_log(log) -> None: for m in log: print_with_timestamp(m) -- GitLab From 81c60306dc588e2e6b21391c1f6dd509403e6eec Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Thu, 14 Jan 2021 16:39:12 -0800 Subject: [PATCH 0919/2012] kunit: tool: fix minor typing issue with None status The code to handle aggregating statuses didn't check that the status actually got set to some non-None value. Default the value to SUCCESS instead of adding a bunch of `is None` checks. This sorta follows the precedent in commit 3fc48259d525 ("kunit: Don't fail test suites if one of them is empty"). Also slightly simplify the code and add type annotations. Signed-off-by: Daniel Latypov Reviewed-by: David Gow Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index bd90d7b86b76c..e8bcc139702e2 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -12,13 +12,13 @@ from collections import namedtuple from datetime import datetime from enum import Enum, auto from functools import reduce -from typing import Iterator, List, Optional, Tuple +from typing import Iterable, Iterator, List, Optional, Tuple TestResult = namedtuple('TestResult', ['status','suites','log']) class TestSuite(object): def __init__(self) -> None: - self.status = None # type: Optional[TestStatus] + self.status = TestStatus.SUCCESS self.name = '' self.cases = [] # type: List[TestCase] @@ -30,7 +30,7 @@ class TestSuite(object): class TestCase(object): def __init__(self) -> None: - self.status = None # type: Optional[TestStatus] + self.status = TestStatus.SUCCESS self.name = '' self.log = [] # type: List[str] @@ -224,12 +224,11 @@ def parse_ok_not_ok_test_suite(lines: List[str], else: return False -def bubble_up_errors(to_status, status_container_list) -> TestStatus: - status_list = map(to_status, status_container_list) - return reduce(max_status, status_list, TestStatus.SUCCESS) +def bubble_up_errors(statuses: Iterable[TestStatus]) -> TestStatus: + return reduce(max_status, statuses, TestStatus.SUCCESS) def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus: - max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases) + max_test_case_status = bubble_up_errors(x.status for x in test_suite.cases) return max_status(max_test_case_status, test_suite.status) def parse_test_suite(lines: List[str], expected_suite_index: int) -> Optional[TestSuite]: @@ -282,8 +281,8 @@ def parse_test_plan(lines: List[str]) -> Optional[int]: else: return None -def bubble_up_suite_errors(test_suite_list: List[TestSuite]) -> TestStatus: - return bubble_up_errors(lambda x: x.status, test_suite_list) +def bubble_up_suite_errors(test_suites: Iterable[TestSuite]) -> TestStatus: + return bubble_up_errors(x.status for x in test_suites) def parse_test_result(lines: List[str]) -> TestResult: consume_non_diagnostic(lines) -- GitLab From 2b8fdbbf1c616300312f71fe5b21fe8f03129950 Mon Sep 17 00:00:00 2001 From: Daniel Latypov Date: Thu, 14 Jan 2021 16:39:13 -0800 Subject: [PATCH 0920/2012] kunit: tool: move kunitconfig parsing into __init__, make it optional LinuxSourceTree will unceremoniously crash if the user doesn't call read_kunitconfig() first in a number of functions. And currently every place we create an instance, the caller also calls create_kunitconfig() and read_kunitconfig(). Move these instead into __init__() so they can't be forgotten and to reduce copy-paste. The https://github.com/google/pytype type-checker complained that _config wasn't initialized. With this, kunit_tool now type checks under both pytype and mypy. Add an optional boolean that can be used to disable this for use cases in the future where we might not need/want to load the config. Signed-off-by: Daniel Latypov Reviewed-by: Brendan Higgins Tested-by: Brendan Higgins Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit.py | 20 ++++---------------- tools/testing/kunit/kunit_kernel.py | 25 +++++++++++++------------ 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 5521e0a8201e3..e808a47c839bf 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -256,10 +256,7 @@ def main(argv, linux=None): os.mkdir(cli_args.build_dir) if not linux: - linux = kunit_kernel.LinuxSourceTree() - - linux.create_kunitconfig(cli_args.build_dir) - linux.read_kunitconfig(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir) request = KunitRequest(cli_args.raw_output, cli_args.timeout, @@ -277,10 +274,7 @@ def main(argv, linux=None): os.mkdir(cli_args.build_dir) if not linux: - linux = kunit_kernel.LinuxSourceTree() - - linux.create_kunitconfig(cli_args.build_dir) - linux.read_kunitconfig(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir) request = KunitConfigRequest(cli_args.build_dir, cli_args.make_options) @@ -292,10 +286,7 @@ def main(argv, linux=None): sys.exit(1) elif cli_args.subcommand == 'build': if not linux: - linux = kunit_kernel.LinuxSourceTree() - - linux.create_kunitconfig(cli_args.build_dir) - linux.read_kunitconfig(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir) request = KunitBuildRequest(cli_args.jobs, cli_args.build_dir, @@ -309,10 +300,7 @@ def main(argv, linux=None): sys.exit(1) elif cli_args.subcommand == 'exec': if not linux: - linux = kunit_kernel.LinuxSourceTree() - - linux.create_kunitconfig(cli_args.build_dir) - linux.read_kunitconfig(cli_args.build_dir) + linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir) exec_request = KunitExecRequest(cli_args.timeout, cli_args.build_dir, diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index e77ee06aa4076..2076a5a2d060a 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -123,28 +123,29 @@ def get_outfile_path(build_dir) -> str: class LinuxSourceTree(object): """Represents a Linux kernel source tree with KUnit tests.""" - def __init__(self) -> None: - self._ops = LinuxSourceTreeOperations() + def __init__(self, build_dir: str, load_config=True, defconfig=DEFAULT_KUNITCONFIG_PATH) -> None: signal.signal(signal.SIGINT, self.signal_handler) - def clean(self) -> bool: - try: - self._ops.make_mrproper() - except ConfigError as e: - logging.error(e) - return False - return True + self._ops = LinuxSourceTreeOperations() + + if not load_config: + return - def create_kunitconfig(self, build_dir, defconfig=DEFAULT_KUNITCONFIG_PATH) -> None: kunitconfig_path = get_kunitconfig_path(build_dir) if not os.path.exists(kunitconfig_path): shutil.copyfile(defconfig, kunitconfig_path) - def read_kunitconfig(self, build_dir) -> None: - kunitconfig_path = get_kunitconfig_path(build_dir) self._kconfig = kunit_config.Kconfig() self._kconfig.read_from_file(kunitconfig_path) + def clean(self) -> bool: + try: + self._ops.make_mrproper() + except ConfigError as e: + logging.error(e) + return False + return True + def validate_config(self, build_dir) -> bool: kconfig_path = get_kconfig_path(build_dir) validated_kconfig = kunit_config.Kconfig() -- GitLab From 0ee2af4ebbe3c4364429859acd571018ebfb3424 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 16 Jan 2021 01:19:19 +0200 Subject: [PATCH 0921/2012] net: dsa: set configure_vlan_while_not_filtering to true by default As explained in commit 54a0ed0df496 ("net: dsa: provide an option for drivers to always receive bridge VLANs"), DSA has historically been skipping VLAN switchdev operations when the bridge wasn't in vlan_filtering mode, but the reason why it was doing that has never been clear. So the configure_vlan_while_not_filtering option is there merely to preserve functionality for existing drivers. It isn't some behavior that drivers should opt into. Ideally, when all drivers leave this flag set, we can delete the dsa_port_skip_vlan_configuration() function. New drivers always seem to omit setting this flag, for some reason. So let's reverse the logic: the DSA core sets it by default to true before the .setup() callback, and legacy drivers can turn it off. This way, new drivers get the new behavior by default, unless they explicitly set the flag to false, which is more obvious during review. Remove the assignment from drivers which were setting it to true, and add the assignment to false for the drivers that didn't previously have it. This way, it should be easier to see how many we have left. The following drivers: lan9303, mv88e6060 were skipped from setting this flag to false, because they didn't have any VLAN offload ops in the first place. The Broadcom Starfighter 2 driver calls the common b53_switch_alloc and therefore also inherits the configure_vlan_while_not_filtering=true behavior. Also, print a message through netlink extack every time a VLAN has been skipped. This is mildly annoying on purpose, so that (a) it is at least clear that VLANs are being skipped - the legacy behavior in itself is confusing, and the extack should be much more difficult to miss, unlike kernel logs - and (b) people have one more incentive to convert to the new behavior. No behavior change except for the added prints is intended at this time. $ ip link add br0 type bridge vlan_filtering 0 $ ip link set sw0p2 master br0 [ 60.315148] br0: port 1(sw0p2) entered blocking state [ 60.320350] br0: port 1(sw0p2) entered disabled state [ 60.327839] device sw0p2 entered promiscuous mode [ 60.334905] br0: port 1(sw0p2) entered blocking state [ 60.340142] br0: port 1(sw0p2) entered forwarding state Warning: dsa_core: skipping configuration of VLAN. # This was the pvid $ bridge vlan add dev sw0p2 vid 100 Warning: dsa_core: skipping configuration of VLAN. Signed-off-by: Vladimir Oltean Reviewed-by: Kurt Kanzenbach Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20210115231919.43834-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 3 +-- drivers/net/dsa/dsa_loop.c | 1 - drivers/net/dsa/hirschmann/hellcreek.c | 5 ----- drivers/net/dsa/lantiq_gswip.c | 3 +++ drivers/net/dsa/microchip/ksz8795.c | 2 ++ drivers/net/dsa/microchip/ksz9477.c | 2 ++ drivers/net/dsa/mt7530.c | 2 -- drivers/net/dsa/mv88e6xxx/chip.c | 1 - drivers/net/dsa/ocelot/felix.c | 1 - drivers/net/dsa/qca/ar9331.c | 2 ++ drivers/net/dsa/qca8k.c | 1 - drivers/net/dsa/rtl8366rb.c | 2 ++ drivers/net/dsa/sja1105/sja1105_main.c | 2 -- net/dsa/dsa2.c | 2 ++ net/dsa/slave.c | 9 ++++++--- 15 files changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 5d6d9f91d40af..a238ded354c22 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2602,9 +2602,8 @@ struct b53_device *b53_switch_alloc(struct device *base, dev->priv = priv; dev->ops = ops; ds->ops = &b53_switch_ops; - ds->configure_vlan_while_not_filtering = true; ds->untag_bridge_pvid = true; - dev->vlan_enabled = ds->configure_vlan_while_not_filtering; + dev->vlan_enabled = true; mutex_init(&dev->reg_mutex); mutex_init(&dev->stats_mutex); diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index be61ce93a3778..5f69216376fe2 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -323,7 +323,6 @@ static int dsa_loop_drv_probe(struct mdio_device *mdiodev) ds->dev = &mdiodev->dev; ds->ops = &dsa_loop_driver; ds->priv = ps; - ds->configure_vlan_while_not_filtering = true; ps->bus = mdiodev->bus; dev_set_drvdata(&mdiodev->dev, ds); diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 205249504289d..9a1921e653e86 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -1033,11 +1033,6 @@ static int hellcreek_setup(struct dsa_switch *ds) /* Configure PCP <-> TC mapping */ hellcreek_setup_tc_identity_mapping(hellcreek); - /* Allow VLAN configurations while not filtering which is the default - * for new DSA drivers. - */ - ds->configure_vlan_while_not_filtering = true; - /* The VLAN awareness is a global switch setting. Therefore, mixed vlan * filtering setups are not supported. */ diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 244729ee3bdbd..9fec97773a156 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -827,6 +827,9 @@ static int gswip_setup(struct dsa_switch *ds) } gswip_port_enable(ds, cpu_port, NULL); + + ds->configure_vlan_while_not_filtering = false; + return 0; } diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index d639f9476bd93..37a73421e2cc9 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -1094,6 +1094,8 @@ static int ksz8795_setup(struct dsa_switch *ds) ksz_init_mib_timer(dev); + ds->configure_vlan_while_not_filtering = false; + return 0; } diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 71cf24f20252a..00e38c8e0d010 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -1381,6 +1381,8 @@ static int ksz9477_setup(struct dsa_switch *ds) ksz_init_mib_timer(dev); + ds->configure_vlan_while_not_filtering = false; + return 0; } diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 199a135125b23..d2196197d920a 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1639,7 +1639,6 @@ mt7530_setup(struct dsa_switch *ds) * as two netdev instances. */ dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent; - ds->configure_vlan_while_not_filtering = true; ds->mtu_enforcement_ingress = true; if (priv->id == ID_MT7530) { @@ -1878,7 +1877,6 @@ mt7531_setup(struct dsa_switch *ds) PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); } - ds->configure_vlan_while_not_filtering = true; ds->mtu_enforcement_ingress = true; /* Flush the FDB table */ diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 91286d7b12c75..2f976050a0d71 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2875,7 +2875,6 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) chip->ds = ds; ds->slave_mii_bus = mv88e6xxx_default_mdio_bus(chip); - ds->configure_vlan_while_not_filtering = true; mv88e6xxx_reg_lock(chip); diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 768a74dc462af..8492151f81334 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -599,7 +599,6 @@ static int felix_setup(struct dsa_switch *ds) ANA_PGID_PGID, PGID_UC); ds->mtu_enforcement_ingress = true; - ds->configure_vlan_while_not_filtering = true; ds->assisted_learning_on_cpu_port = true; return 0; diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index 1e3706054ae1f..ca2ad77b71f1c 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -402,6 +402,8 @@ static int ar9331_sw_setup(struct dsa_switch *ds) if (ret) goto error; + ds->configure_vlan_while_not_filtering = false; + return 0; error: dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret); diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index f54e8b6c86215..6127823d6c2e4 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1431,7 +1431,6 @@ qca8k_sw_probe(struct mdio_device *mdiodev) priv->ds->dev = &mdiodev->dev; priv->ds->num_ports = QCA8K_NUM_PORTS; - priv->ds->configure_vlan_while_not_filtering = true; priv->ds->priv = priv; priv->ops = qca8k_switch_ops; priv->ds->ops = &priv->ops; diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/rtl8366rb.c index 8969785687167..c6cc4938897ce 100644 --- a/drivers/net/dsa/rtl8366rb.c +++ b/drivers/net/dsa/rtl8366rb.c @@ -972,6 +972,8 @@ static int rtl8366rb_setup(struct dsa_switch *ds) return -ENODEV; } + ds->configure_vlan_while_not_filtering = false; + return 0; } diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 050b1260f358b..282253543f3b8 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -2901,8 +2901,6 @@ static int sja1105_setup(struct dsa_switch *ds) ds->mtu_enforcement_ingress = true; - ds->configure_vlan_while_not_filtering = true; - rc = sja1105_devlink_setup(ds); if (rc < 0) return rc; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 6f65ea0eef9f1..f98eb521c2ec1 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -511,6 +511,8 @@ static int dsa_switch_setup(struct dsa_switch *ds) if (err) goto unregister_devlink_ports; + ds->configure_vlan_while_not_filtering = true; + err = ds->ops->setup(ds); if (err < 0) goto unregister_notifier; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index c5c81cba8259d..f2fb433f38284 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -326,7 +326,8 @@ dsa_slave_vlan_check_for_8021q_uppers(struct net_device *slave, } static int dsa_slave_vlan_add(struct net_device *dev, - const struct switchdev_obj *obj) + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack) { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); @@ -336,8 +337,10 @@ static int dsa_slave_vlan_add(struct net_device *dev, if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) return -EOPNOTSUPP; - if (dsa_port_skip_vlan_configuration(dp)) + if (dsa_port_skip_vlan_configuration(dp)) { + NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN"); return 0; + } vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj); @@ -389,7 +392,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev, err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = dsa_slave_vlan_add(dev, obj); + err = dsa_slave_vlan_add(dev, obj, extack); break; default: err = -EOPNOTSUPP; -- GitLab From 2267c530f868d51b856acebf9dad780b696d39fb Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 13 Jan 2021 13:56:03 -0800 Subject: [PATCH 0922/2012] gianfar: remove definition of DEBUG Defining DEBUG should only be done in development. So remove DEBUG. Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20210113215603.1721958-1-trix@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/gianfar.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index d391a45cebb66..541de32ea6622 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -58,7 +58,6 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#define DEBUG #include #include -- GitLab From e794e7fa1963a6a7464606b59e15c69965409947 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Thu, 14 Jan 2021 13:29:17 -0800 Subject: [PATCH 0923/2012] neighbor: remove definition of DEBUG Defining DEBUG should only be done in development. So remove DEBUG. Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20210114212917.48174-1-trix@redhat.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 277ed854aef1c..ff073581b5b17 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -41,7 +41,6 @@ #include -#define DEBUG #define NEIGH_DEBUG 1 #define neigh_dbg(level, fmt, ...) \ do { \ -- GitLab From 3ada665b8fab46bc126b9f0660a86503781ab67c Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 15 Jan 2021 07:31:28 -0800 Subject: [PATCH 0924/2012] net: ks8851: remove definition of DEBUG Defining DEBUG should only be done in development. So remove DEBUG. Signed-off-by: Tom Rix Reviewed-by: Marek Vasut Link: https://lore.kernel.org/r/20210115153128.131026-1-trix@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/micrel/ks8851_common.c | 2 -- drivers/net/ethernet/micrel/ks8851_par.c | 2 -- drivers/net/ethernet/micrel/ks8851_spi.c | 2 -- 3 files changed, 6 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 058fd99bd4838..2feed6ce19d37 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -8,8 +8,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#define DEBUG - #include #include #include diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c index 3bab0cb2b1a56..2e8fcce50f9d1 100644 --- a/drivers/net/ethernet/micrel/ks8851_par.c +++ b/drivers/net/ethernet/micrel/ks8851_par.c @@ -8,8 +8,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#define DEBUG - #include #include #include diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c index 4ec7f16159775..479406ecbaa30 100644 --- a/drivers/net/ethernet/micrel/ks8851_spi.c +++ b/drivers/net/ethernet/micrel/ks8851_spi.c @@ -8,8 +8,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#define DEBUG - #include #include #include -- GitLab From e4bedf48aaa5552bc1f49703abd17606e7e6e82a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Jan 2021 08:06:37 -0800 Subject: [PATCH 0925/2012] net_sched: reject silly cell_log in qdisc_get_rtab() iproute2 probably never goes beyond 8 for the cell exponent, but stick to the max shift exponent for signed 32bit. UBSAN reported: UBSAN: shift-out-of-bounds in net/sched/sch_api.c:389:22 shift exponent 130 is too large for 32-bit type 'int' CPU: 1 PID: 8450 Comm: syz-executor586 Not tainted 5.11.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x183/0x22e lib/dump_stack.c:120 ubsan_epilogue lib/ubsan.c:148 [inline] __ubsan_handle_shift_out_of_bounds+0x432/0x4d0 lib/ubsan.c:395 __detect_linklayer+0x2a9/0x330 net/sched/sch_api.c:389 qdisc_get_rtab+0x2b5/0x410 net/sched/sch_api.c:435 cbq_init+0x28f/0x12c0 net/sched/sch_cbq.c:1180 qdisc_create+0x801/0x1470 net/sched/sch_api.c:1246 tc_modify_qdisc+0x9e3/0x1fc0 net/sched/sch_api.c:1662 rtnetlink_rcv_msg+0xb1d/0xe60 net/core/rtnetlink.c:5564 netlink_rcv_skb+0x1f0/0x460 net/netlink/af_netlink.c:2494 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline] netlink_unicast+0x7de/0x9b0 net/netlink/af_netlink.c:1330 netlink_sendmsg+0xaa6/0xe90 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg net/socket.c:672 [inline] ____sys_sendmsg+0x5a2/0x900 net/socket.c:2345 ___sys_sendmsg net/socket.c:2399 [inline] __sys_sendmsg+0x319/0x400 net/socket.c:2432 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: Cong Wang Link: https://lore.kernel.org/r/20210114160637.1660597-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/sch_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 51cb553e4317a..6fe4e5cc807c9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -412,7 +412,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, { struct qdisc_rate_table *rtab; - if (tab == NULL || r->rate == 0 || r->cell_log == 0 || + if (tab == NULL || r->rate == 0 || + r->cell_log == 0 || r->cell_log >= 32 || nla_len(tab) != TC_RTAB_SIZE) { NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching"); return NULL; -- GitLab From dd5e073381f2ada3630f36be42833c6e9c78b75e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Jan 2021 10:19:29 -0800 Subject: [PATCH 0926/2012] net_sched: gen_estimator: support large ewma log syzbot report reminded us that very big ewma_log were supported in the past, even if they made litle sense. tc qdisc replace dev xxx root est 1sec 131072sec ... While fixing the bug, also add boundary checks for ewma_log, in line with range supported by iproute2. UBSAN: shift-out-of-bounds in net/core/gen_estimator.c:83:38 shift exponent -1 is negative CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.10.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x107/0x163 lib/dump_stack.c:120 ubsan_epilogue+0xb/0x5a lib/ubsan.c:148 __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:395 est_timer.cold+0xbb/0x12d net/core/gen_estimator.c:83 call_timer_fn+0x1a5/0x710 kernel/time/timer.c:1417 expire_timers kernel/time/timer.c:1462 [inline] __run_timers.part.0+0x692/0xa80 kernel/time/timer.c:1731 __run_timers kernel/time/timer.c:1712 [inline] run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1744 __do_softirq+0x2bc/0xa77 kernel/softirq.c:343 asm_call_irq_on_stack+0xf/0x20 __run_on_irqstack arch/x86/include/asm/irq_stack.h:26 [inline] run_on_irqstack_cond arch/x86/include/asm/irq_stack.h:77 [inline] do_softirq_own_stack+0xaa/0xd0 arch/x86/kernel/irq_64.c:77 invoke_softirq kernel/softirq.c:226 [inline] __irq_exit_rcu+0x17f/0x200 kernel/softirq.c:420 irq_exit_rcu+0x5/0x20 kernel/softirq.c:432 sysvec_apic_timer_interrupt+0x4d/0x100 arch/x86/kernel/apic/apic.c:1096 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:628 RIP: 0010:native_save_fl arch/x86/include/asm/irqflags.h:29 [inline] RIP: 0010:arch_local_save_flags arch/x86/include/asm/irqflags.h:79 [inline] RIP: 0010:arch_irqs_disabled arch/x86/include/asm/irqflags.h:169 [inline] RIP: 0010:acpi_safe_halt drivers/acpi/processor_idle.c:111 [inline] RIP: 0010:acpi_idle_do_entry+0x1c9/0x250 drivers/acpi/processor_idle.c:516 Fixes: 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate estimators") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20210114181929.1717985-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/core/gen_estimator.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 80dbf2f4016e2..8e582e29a41e3 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -80,11 +80,11 @@ static void est_timer(struct timer_list *t) u64 rate, brate; est_fetch_counters(est, &b); - brate = (b.bytes - est->last_bytes) << (10 - est->ewma_log - est->intvl_log); - brate -= (est->avbps >> est->ewma_log); + brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log); + brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log); - rate = (b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log); - rate -= (est->avpps >> est->ewma_log); + rate = (b.packets - est->last_packets) << (10 - est->intvl_log); + rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log); write_seqcount_begin(&est->seq); est->avbps += brate; @@ -143,6 +143,9 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, if (parm->interval < -2 || parm->interval > 3) return -EINVAL; + if (parm->ewma_log == 0 || parm->ewma_log >= 31) + return -EINVAL; + est = kzalloc(sizeof(*est), GFP_KERNEL); if (!est) return -ENOBUFS; -- GitLab From bcd0cf19ef8258ac31b9a20248b05c15a1f4b4b0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Jan 2021 10:52:29 -0800 Subject: [PATCH 0927/2012] net_sched: avoid shift-out-of-bounds in tcindex_set_parms() tc_index being 16bit wide, we need to check that TCA_TCINDEX_SHIFT attribute is not silly. UBSAN: shift-out-of-bounds in net/sched/cls_tcindex.c:260:29 shift exponent 255 is too large for 32-bit type 'int' CPU: 0 PID: 8516 Comm: syz-executor228 Not tainted 5.10.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x107/0x163 lib/dump_stack.c:120 ubsan_epilogue+0xb/0x5a lib/ubsan.c:148 __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:395 valid_perfect_hash net/sched/cls_tcindex.c:260 [inline] tcindex_set_parms.cold+0x1b/0x215 net/sched/cls_tcindex.c:425 tcindex_change+0x232/0x340 net/sched/cls_tcindex.c:546 tc_new_tfilter+0x13fb/0x21b0 net/sched/cls_api.c:2127 rtnetlink_rcv_msg+0x8b6/0xb80 net/core/rtnetlink.c:5555 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1330 netlink_sendmsg+0x907/0xe40 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:672 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2336 ___sys_sendmsg+0xf3/0x170 net/socket.c:2390 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2423 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20210114185229.1742255-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/sched/cls_tcindex.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 78bec347b8b66..c4007b9cd16d6 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -366,9 +366,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (tb[TCA_TCINDEX_MASK]) cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); - if (tb[TCA_TCINDEX_SHIFT]) + if (tb[TCA_TCINDEX_SHIFT]) { cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); - + if (cp->shift > 16) { + err = -EINVAL; + goto errout; + } + } if (!cp->hash) { /* Hash not specified, use perfect hash if the upper limit * of the hashing index is below the threshold. -- GitLab From a959a9782fa87669feeed095ced5d78181a7c02d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Jan 2021 18:19:26 +0100 Subject: [PATCH 0928/2012] iov_iter: fix the uaccess area in copy_compat_iovec_from_user sizeof needs to be called on the compat pointer, not the native one. Fixes: 89cd35c58bc2 ("iov_iter: transparently handle compat iovecs in import_iovec") Reported-by: David Laight Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- lib/iov_iter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 1635111c5bd2a..a21e6a5792c5a 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1658,7 +1658,7 @@ static int copy_compat_iovec_from_user(struct iovec *iov, (const struct compat_iovec __user *)uvec; int ret = -EFAULT, i; - if (!user_access_begin(uvec, nr_segs * sizeof(*uvec))) + if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) return -EFAULT; for (i = 0; i < nr_segs; i++) { -- GitLab From f6fe01d6fa24dd3c89996ad82780872441e86bfa Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 15 Jan 2021 04:11:11 +0200 Subject: [PATCH 0929/2012] net: mscc: ocelot: auto-detect packet buffer size and number of frame references Instead of reading these values from the reference manual and writing them down into the driver, it appears that the hardware gives us the option of detecting them dynamically. The number of frame references corresponds to what the reference manual notes, however it seems that the frame buffers are reported as slightly less than the books would indicate. On VSC9959 (Felix), the books say it should have 128KB of packet buffer, but the registers indicate only 129840 bytes (126.79 KB). Also, the unit of measurement for FREECNT from the documentation of all these devices is incorrect (taken from an older generation). This was confirmed by Younes Leroul from Microchip support. Not having anything better to do with these values at the moment* (this will change soon), let's just print them. *The frame buffer size is, in fact, used to calculate the tail dropping watermarks. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 1 - drivers/net/dsa/ocelot/felix.h | 1 - drivers/net/dsa/ocelot/felix_vsc9959.c | 1 - drivers/net/dsa/ocelot/seville_vsc9953.c | 1 - drivers/net/ethernet/mscc/ocelot.c | 22 +++++++++++++++++++++- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 1 - include/soc/mscc/ocelot.h | 3 ++- include/soc/mscc/ocelot_qsys.h | 3 +++ 8 files changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 8492151f81334..cf9317110591e 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -422,7 +422,6 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports) ocelot->map = felix->info->map; ocelot->stats_layout = felix->info->stats_layout; ocelot->num_stats = felix->info->num_stats; - ocelot->shared_queue_sz = felix->info->shared_queue_sz; ocelot->num_mact_rows = felix->info->num_mact_rows; ocelot->vcap = felix->info->vcap; ocelot->ops = felix->info->ops; diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 4c717324ac2f7..5434fe278d2c3 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -15,7 +15,6 @@ struct felix_info { const struct reg_field *regfields; const u32 *const *map; const struct ocelot_ops *ops; - int shared_queue_sz; int num_mact_rows; const struct ocelot_stat_layout *stats_layout; unsigned int num_stats; diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index a87597eef8cf3..b68df85e01a16 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1356,7 +1356,6 @@ static const struct felix_info felix_info_vsc9959 = { .stats_layout = vsc9959_stats_layout, .num_stats = ARRAY_SIZE(vsc9959_stats_layout), .vcap = vsc9959_vcap_props, - .shared_queue_sz = 128 * 1024, .num_mact_rows = 2048, .num_ports = 6, .num_tx_queues = FELIX_NUM_TC, diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index ebbaf6817ec86..b72813da6d9fc 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -1181,7 +1181,6 @@ static const struct felix_info seville_info_vsc9953 = { .stats_layout = vsc9953_stats_layout, .num_stats = ARRAY_SIZE(vsc9953_stats_layout), .vcap = vsc9953_vcap_props, - .shared_queue_sz = 256 * 1024, .num_mact_rows = 2048, .num_ports = 10, .mdio_bus_alloc = vsc9953_mdio_bus_alloc, diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index af620f7ce4690..3af8e607c8a9f 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1354,7 +1354,7 @@ void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu) pause_stop); /* Tail dropping watermarks */ - atop_tot = (ocelot->shared_queue_sz - 9 * maxlen) / + atop_tot = (ocelot->packet_buffer_size - 9 * maxlen) / OCELOT_BUFFER_CELL_SZ; atop = (9 * maxlen) / OCELOT_BUFFER_CELL_SZ; ocelot_write_rix(ocelot, ocelot->ops->wm_enc(atop), SYS_ATOP, port); @@ -1467,6 +1467,25 @@ static void ocelot_cpu_port_init(struct ocelot *ocelot) ANA_PORT_VLAN_CFG, cpu); } +static void ocelot_detect_features(struct ocelot *ocelot) +{ + int mmgt, eq_ctrl; + + /* For Ocelot, Felix, Seville, Serval etc, SYS:MMGT:MMGT:FREECNT holds + * the number of 240-byte free memory words (aka 4-cell chunks) and not + * 192 bytes as the documentation incorrectly says. + */ + mmgt = ocelot_read(ocelot, SYS_MMGT); + ocelot->packet_buffer_size = 240 * SYS_MMGT_FREECNT(mmgt); + + eq_ctrl = ocelot_read(ocelot, QSYS_EQ_CTRL); + ocelot->num_frame_refs = QSYS_MMGT_EQ_CTRL_FP_FREE_CNT(eq_ctrl); + + dev_info(ocelot->dev, + "Detected %d bytes of packet buffer and %d frame references\n", + ocelot->packet_buffer_size, ocelot->num_frame_refs); +} + int ocelot_init(struct ocelot *ocelot) { char queue_name[32]; @@ -1509,6 +1528,7 @@ int ocelot_init(struct ocelot *ocelot) INIT_LIST_HEAD(&ocelot->multicast); INIT_LIST_HEAD(&ocelot->pgids); + ocelot_detect_features(ocelot); ocelot_mact_init(ocelot); ocelot_vlan_init(ocelot); ocelot_vcap_init(ocelot); diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 9cf2bc5f42892..7135ad18affe2 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -517,7 +517,6 @@ static int ocelot_chip_init(struct ocelot *ocelot, const struct ocelot_ops *ops) ocelot->map = ocelot_regmap; ocelot->stats_layout = ocelot_stats_layout; ocelot->num_stats = ARRAY_SIZE(ocelot_stats_layout); - ocelot->shared_queue_sz = 224 * 1024; ocelot->num_mact_rows = 1024; ocelot->ops = ops; diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index f3db75c0172be..c17a372335cd4 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -607,7 +607,8 @@ struct ocelot { const struct ocelot_stat_layout *stats_layout; unsigned int num_stats; - int shared_queue_sz; + int packet_buffer_size; + int num_frame_refs; int num_mact_rows; struct net_device *hw_bridge_dev; diff --git a/include/soc/mscc/ocelot_qsys.h b/include/soc/mscc/ocelot_qsys.h index a814bc2017d8d..b7b263a190688 100644 --- a/include/soc/mscc/ocelot_qsys.h +++ b/include/soc/mscc/ocelot_qsys.h @@ -77,6 +77,9 @@ #define QSYS_RES_STAT_MAXUSE(x) ((x) & GENMASK(11, 0)) #define QSYS_RES_STAT_MAXUSE_M GENMASK(11, 0) +#define QSYS_MMGT_EQ_CTRL_FP_FREE_CNT(x) ((x) & GENMASK(15, 0)) +#define QSYS_MMGT_EQ_CTRL_FP_FREE_CNT_M GENMASK(15, 0) + #define QSYS_EVENTS_CORE_EV_FDC(x) (((x) << 2) & GENMASK(4, 2)) #define QSYS_EVENTS_CORE_EV_FDC_M GENMASK(4, 2) #define QSYS_EVENTS_CORE_EV_FDC_X(x) (((x) & GENMASK(4, 2)) >> 2) -- GitLab From 703b762190e643bf46a048ebe99504b14d71449c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 15 Jan 2021 04:11:12 +0200 Subject: [PATCH 0930/2012] net: mscc: ocelot: add ops for decoding watermark threshold and occupancy We'll need to read back the watermark thresholds and occupancy from hardware (for devlink-sb integration), not only to write them as we did so far in ocelot_port_set_maxlen. So introduce 2 new functions in struct ocelot_ops, similar to wm_enc, and implement them for the 3 supported mscc_ocelot switches. Remove the INUSE and MAXUSE unpacking helpers for the QSYS_RES_STAT register, because that doesn't scale with the number of switches that mscc_ocelot supports now. They have different bit widths for the watermarks, and we need function pointers to abstract that difference away. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix_vsc9959.c | 18 ++++++++++++++++++ drivers/net/dsa/ocelot/seville_vsc9953.c | 18 ++++++++++++++++++ drivers/net/ethernet/mscc/ocelot_vsc7514.c | 16 ++++++++++++++++ include/soc/mscc/ocelot.h | 2 ++ include/soc/mscc/ocelot_qsys.h | 6 ------ 5 files changed, 54 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index b68df85e01a16..21dacb85976e6 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1006,9 +1006,27 @@ static u16 vsc9959_wm_enc(u16 value) return value; } +static u16 vsc9959_wm_dec(u16 wm) +{ + WARN_ON(wm & ~GENMASK(8, 0)); + + if (wm & BIT(8)) + return (wm & GENMASK(7, 0)) * 16; + + return wm; +} + +static void vsc9959_wm_stat(u32 val, u32 *inuse, u32 *maxuse) +{ + *inuse = (val & GENMASK(23, 12)) >> 12; + *maxuse = val & GENMASK(11, 0); +} + static const struct ocelot_ops vsc9959_ops = { .reset = vsc9959_reset, .wm_enc = vsc9959_wm_enc, + .wm_dec = vsc9959_wm_dec, + .wm_stat = vsc9959_wm_stat, .port_to_netdev = felix_port_to_netdev, .netdev_to_port = felix_netdev_to_port, }; diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index b72813da6d9fc..8dad0c894eca1 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -1057,9 +1057,27 @@ static u16 vsc9953_wm_enc(u16 value) return value; } +static u16 vsc9953_wm_dec(u16 wm) +{ + WARN_ON(wm & ~GENMASK(9, 0)); + + if (wm & BIT(9)) + return (wm & GENMASK(8, 0)) * 16; + + return wm; +} + +static void vsc9953_wm_stat(u32 val, u32 *inuse, u32 *maxuse) +{ + *inuse = (val & GENMASK(25, 13)) >> 13; + *maxuse = val & GENMASK(12, 0); +} + static const struct ocelot_ops vsc9953_ops = { .reset = vsc9953_reset, .wm_enc = vsc9953_wm_enc, + .wm_dec = vsc9953_wm_dec, + .wm_stat = vsc9953_wm_stat, .port_to_netdev = felix_port_to_netdev, .netdev_to_port = felix_netdev_to_port, }; diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 7135ad18affe2..ecd474476cc6c 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -763,9 +763,25 @@ static u16 ocelot_wm_enc(u16 value) return value; } +static u16 ocelot_wm_dec(u16 wm) +{ + if (wm & BIT(8)) + return (wm & GENMASK(7, 0)) * 16; + + return wm; +} + +static void ocelot_wm_stat(u32 val, u32 *inuse, u32 *maxuse) +{ + *inuse = (val & GENMASK(23, 12)) >> 12; + *maxuse = val & GENMASK(11, 0); +} + static const struct ocelot_ops ocelot_ops = { .reset = ocelot_reset, .wm_enc = ocelot_wm_enc, + .wm_dec = ocelot_wm_dec, + .wm_stat = ocelot_wm_stat, .port_to_netdev = ocelot_port_to_netdev, .netdev_to_port = ocelot_netdev_to_port, }; diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index c17a372335cd4..e548b0f51d0c6 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -563,6 +563,8 @@ struct ocelot_ops { int (*netdev_to_port)(struct net_device *dev); int (*reset)(struct ocelot *ocelot); u16 (*wm_enc)(u16 value); + u16 (*wm_dec)(u16 value); + void (*wm_stat)(u32 val, u32 *inuse, u32 *maxuse); }; struct ocelot_vcap_block { diff --git a/include/soc/mscc/ocelot_qsys.h b/include/soc/mscc/ocelot_qsys.h index b7b263a190688..9731895be6431 100644 --- a/include/soc/mscc/ocelot_qsys.h +++ b/include/soc/mscc/ocelot_qsys.h @@ -71,12 +71,6 @@ #define QSYS_RES_STAT_GSZ 0x8 -#define QSYS_RES_STAT_INUSE(x) (((x) << 12) & GENMASK(23, 12)) -#define QSYS_RES_STAT_INUSE_M GENMASK(23, 12) -#define QSYS_RES_STAT_INUSE_X(x) (((x) & GENMASK(23, 12)) >> 12) -#define QSYS_RES_STAT_MAXUSE(x) ((x) & GENMASK(11, 0)) -#define QSYS_RES_STAT_MAXUSE_M GENMASK(11, 0) - #define QSYS_MMGT_EQ_CTRL_FP_FREE_CNT(x) ((x) & GENMASK(15, 0)) #define QSYS_MMGT_EQ_CTRL_FP_FREE_CNT_M GENMASK(15, 0) -- GitLab From 2a6ef763037238a5aa6a6505fc6693ee77c1a59b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 15 Jan 2021 04:11:13 +0200 Subject: [PATCH 0931/2012] net: dsa: add ops for devlink-sb Switches that care about QoS might have hardware support for reserving buffer pools for individual ports or traffic classes, and configuring their sizes and thresholds. Through devlink-sb (shared buffers), this is all configurable, as well as their occupancy being viewable. Add the plumbing in DSA for these operations. Individual drivers still need to call devlink_sb_register() with the shared buffers they want to expose. A helper was not created in DSA for this purpose (unlike, say, dsa_devlink_params_register), since in my opinion it does not bring any benefit over plainly calling devlink_sb_register() directly. Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 34 ++++++++++ net/dsa/dsa2.c | 159 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 192 insertions(+), 1 deletion(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index fa537afcab532..2f5435d3d1db5 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -699,6 +699,40 @@ struct dsa_switch_ops { int (*devlink_info_get)(struct dsa_switch *ds, struct devlink_info_req *req, struct netlink_ext_ack *extack); + int (*devlink_sb_pool_get)(struct dsa_switch *ds, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info); + int (*devlink_sb_pool_set)(struct dsa_switch *ds, unsigned int sb_index, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack); + int (*devlink_sb_port_pool_get)(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold); + int (*devlink_sb_port_pool_set)(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 pool_index, + u32 threshold, + struct netlink_ext_ack *extack); + int (*devlink_sb_tc_pool_bind_get)(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold); + int (*devlink_sb_tc_pool_bind_set)(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack); + int (*devlink_sb_occ_snapshot)(struct dsa_switch *ds, + unsigned int sb_index); + int (*devlink_sb_occ_max_clear)(struct dsa_switch *ds, + unsigned int sb_index); + int (*devlink_sb_occ_port_pool_get)(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max); + int (*devlink_sb_occ_tc_port_bind_get)(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max); /* * MTU change functionality. Switches can also adjust their MRU through diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index f98eb521c2ec1..cc13549120e5b 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -463,8 +463,165 @@ static int dsa_devlink_info_get(struct devlink *dl, return -EOPNOTSUPP; } +static int dsa_devlink_sb_pool_get(struct devlink *dl, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (!ds->ops->devlink_sb_pool_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_pool_get(ds, sb_index, pool_index, + pool_info); +} + +static int dsa_devlink_sb_pool_set(struct devlink *dl, unsigned int sb_index, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (!ds->ops->devlink_sb_pool_set) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_pool_set(ds, sb_index, pool_index, size, + threshold_type, extack); +} + +static int dsa_devlink_sb_port_pool_get(struct devlink_port *dlp, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_port_pool_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_port_pool_get(ds, port, sb_index, + pool_index, p_threshold); +} + +static int dsa_devlink_sb_port_pool_set(struct devlink_port *dlp, + unsigned int sb_index, u16 pool_index, + u32 threshold, + struct netlink_ext_ack *extack) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_port_pool_set) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_port_pool_set(ds, port, sb_index, + pool_index, threshold, extack); +} + +static int +dsa_devlink_sb_tc_pool_bind_get(struct devlink_port *dlp, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_tc_pool_bind_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_tc_pool_bind_get(ds, port, sb_index, + tc_index, pool_type, + p_pool_index, p_threshold); +} + +static int +dsa_devlink_sb_tc_pool_bind_set(struct devlink_port *dlp, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_tc_pool_bind_set) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_tc_pool_bind_set(ds, port, sb_index, + tc_index, pool_type, + pool_index, threshold, + extack); +} + +static int dsa_devlink_sb_occ_snapshot(struct devlink *dl, + unsigned int sb_index) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (!ds->ops->devlink_sb_occ_snapshot) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_occ_snapshot(ds, sb_index); +} + +static int dsa_devlink_sb_occ_max_clear(struct devlink *dl, + unsigned int sb_index) +{ + struct dsa_switch *ds = dsa_devlink_to_ds(dl); + + if (!ds->ops->devlink_sb_occ_max_clear) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_occ_max_clear(ds, sb_index); +} + +static int dsa_devlink_sb_occ_port_pool_get(struct devlink_port *dlp, + unsigned int sb_index, + u16 pool_index, u32 *p_cur, + u32 *p_max) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_occ_port_pool_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_occ_port_pool_get(ds, port, sb_index, + pool_index, p_cur, p_max); +} + +static int +dsa_devlink_sb_occ_tc_port_bind_get(struct devlink_port *dlp, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max) +{ + struct dsa_switch *ds = dsa_devlink_port_to_ds(dlp); + int port = dsa_devlink_port_to_port(dlp); + + if (!ds->ops->devlink_sb_occ_tc_port_bind_get) + return -EOPNOTSUPP; + + return ds->ops->devlink_sb_occ_tc_port_bind_get(ds, port, + sb_index, tc_index, + pool_type, p_cur, + p_max); +} + static const struct devlink_ops dsa_devlink_ops = { - .info_get = dsa_devlink_info_get, + .info_get = dsa_devlink_info_get, + .sb_pool_get = dsa_devlink_sb_pool_get, + .sb_pool_set = dsa_devlink_sb_pool_set, + .sb_port_pool_get = dsa_devlink_sb_port_pool_get, + .sb_port_pool_set = dsa_devlink_sb_port_pool_set, + .sb_tc_pool_bind_get = dsa_devlink_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = dsa_devlink_sb_tc_pool_bind_set, + .sb_occ_snapshot = dsa_devlink_sb_occ_snapshot, + .sb_occ_max_clear = dsa_devlink_sb_occ_max_clear, + .sb_occ_port_pool_get = dsa_devlink_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = dsa_devlink_sb_occ_tc_port_bind_get, }; static int dsa_switch_setup(struct dsa_switch *ds) -- GitLab From a7096915e4276fff6905a8eff89986ef9704bbe7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 15 Jan 2021 04:11:14 +0200 Subject: [PATCH 0932/2012] net: dsa: felix: reindent struct dsa_switch_ops The devlink function pointer names are super long, and they would break the alignment. So reindent the existing ops now by adding one tab. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 74 +++++++++++++++++----------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index cf9317110591e..6144f71f4651b 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -751,43 +751,43 @@ static int felix_port_setup_tc(struct dsa_switch *ds, int port, } const struct dsa_switch_ops felix_switch_ops = { - .get_tag_protocol = felix_get_tag_protocol, - .setup = felix_setup, - .teardown = felix_teardown, - .set_ageing_time = felix_set_ageing_time, - .get_strings = felix_get_strings, - .get_ethtool_stats = felix_get_ethtool_stats, - .get_sset_count = felix_get_sset_count, - .get_ts_info = felix_get_ts_info, - .phylink_validate = felix_phylink_validate, - .phylink_mac_config = felix_phylink_mac_config, - .phylink_mac_link_down = felix_phylink_mac_link_down, - .phylink_mac_link_up = felix_phylink_mac_link_up, - .port_enable = felix_port_enable, - .port_disable = felix_port_disable, - .port_fdb_dump = felix_fdb_dump, - .port_fdb_add = felix_fdb_add, - .port_fdb_del = felix_fdb_del, - .port_mdb_add = felix_mdb_add, - .port_mdb_del = felix_mdb_del, - .port_bridge_join = felix_bridge_join, - .port_bridge_leave = felix_bridge_leave, - .port_stp_state_set = felix_bridge_stp_state_set, - .port_vlan_filtering = felix_vlan_filtering, - .port_vlan_add = felix_vlan_add, - .port_vlan_del = felix_vlan_del, - .port_hwtstamp_get = felix_hwtstamp_get, - .port_hwtstamp_set = felix_hwtstamp_set, - .port_rxtstamp = felix_rxtstamp, - .port_txtstamp = felix_txtstamp, - .port_change_mtu = felix_change_mtu, - .port_max_mtu = felix_get_max_mtu, - .port_policer_add = felix_port_policer_add, - .port_policer_del = felix_port_policer_del, - .cls_flower_add = felix_cls_flower_add, - .cls_flower_del = felix_cls_flower_del, - .cls_flower_stats = felix_cls_flower_stats, - .port_setup_tc = felix_port_setup_tc, + .get_tag_protocol = felix_get_tag_protocol, + .setup = felix_setup, + .teardown = felix_teardown, + .set_ageing_time = felix_set_ageing_time, + .get_strings = felix_get_strings, + .get_ethtool_stats = felix_get_ethtool_stats, + .get_sset_count = felix_get_sset_count, + .get_ts_info = felix_get_ts_info, + .phylink_validate = felix_phylink_validate, + .phylink_mac_config = felix_phylink_mac_config, + .phylink_mac_link_down = felix_phylink_mac_link_down, + .phylink_mac_link_up = felix_phylink_mac_link_up, + .port_enable = felix_port_enable, + .port_disable = felix_port_disable, + .port_fdb_dump = felix_fdb_dump, + .port_fdb_add = felix_fdb_add, + .port_fdb_del = felix_fdb_del, + .port_mdb_add = felix_mdb_add, + .port_mdb_del = felix_mdb_del, + .port_bridge_join = felix_bridge_join, + .port_bridge_leave = felix_bridge_leave, + .port_stp_state_set = felix_bridge_stp_state_set, + .port_vlan_filtering = felix_vlan_filtering, + .port_vlan_add = felix_vlan_add, + .port_vlan_del = felix_vlan_del, + .port_hwtstamp_get = felix_hwtstamp_get, + .port_hwtstamp_set = felix_hwtstamp_set, + .port_rxtstamp = felix_rxtstamp, + .port_txtstamp = felix_txtstamp, + .port_change_mtu = felix_change_mtu, + .port_max_mtu = felix_get_max_mtu, + .port_policer_add = felix_port_policer_add, + .port_policer_del = felix_port_policer_del, + .cls_flower_add = felix_cls_flower_add, + .cls_flower_del = felix_cls_flower_del, + .cls_flower_stats = felix_cls_flower_stats, + .port_setup_tc = felix_port_setup_tc, }; struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port) -- GitLab From d19741b0f54487cf3a11307900f8633935cd2849 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 15 Jan 2021 04:11:15 +0200 Subject: [PATCH 0933/2012] net: dsa: felix: perform teardown in reverse order of setup In general it is desirable that cleanup is the reverse process of setup. In this case I am not seeing any particular issue, but with the introduction of devlink-sb for felix, a non-obvious decision had to be made as to where to put its cleanup method. When there's a convention in place, that decision becomes obvious. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 6144f71f4651b..178be0d736bdf 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -609,14 +609,14 @@ static void felix_teardown(struct dsa_switch *ds) struct felix *felix = ocelot_to_felix(ocelot); int port; - if (felix->info->mdio_bus_free) - felix->info->mdio_bus_free(ocelot); + ocelot_deinit_timestamp(ocelot); + ocelot_deinit(ocelot); for (port = 0; port < ocelot->num_phys_ports; port++) ocelot_deinit_port(ocelot, port); - ocelot_deinit_timestamp(ocelot); - /* stop workqueue thread */ - ocelot_deinit(ocelot); + + if (felix->info->mdio_bus_free) + felix->info->mdio_bus_free(ocelot); } static int felix_hwtstamp_get(struct dsa_switch *ds, int port, -- GitLab From 70d39a6e62d31a4a7372a712ccc6f8063bbb1550 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 15 Jan 2021 04:11:16 +0200 Subject: [PATCH 0934/2012] net: mscc: ocelot: export NUM_TC constant from felix to common switch lib We should be moving anything that isn't DSA-specific or SoC-specific out of the felix DSA driver, and into the common mscc_ocelot switch library. The number of traffic classes is one of the aspects that is common between all ocelot switches, so it belongs in the library. This patch also makes seville use 8 TX queues, and therefore enables prioritization via the QOS_CLASS field in the NPI injection header. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 2 +- drivers/net/dsa/ocelot/felix.h | 1 - drivers/net/dsa/ocelot/felix_vsc9959.c | 4 ++-- drivers/net/dsa/ocelot/seville_vsc9953.c | 1 + include/soc/mscc/ocelot.h | 1 + 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 178be0d736bdf..0c6a7de033318 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -299,7 +299,7 @@ static void felix_port_qos_map_init(struct ocelot *ocelot, int port) ANA_PORT_QOS_CFG, port); - for (i = 0; i < FELIX_NUM_TC * 2; i++) { + for (i = 0; i < OCELOT_NUM_TC * 2; i++) { ocelot_rmw_ix(ocelot, (ANA_PORT_PCP_DEI_MAP_DP_PCP_DEI_VAL & i) | ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL(i), diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 5434fe278d2c3..994835cb9307f 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -5,7 +5,6 @@ #define _MSCC_FELIX_H #define ocelot_to_felix(o) container_of((o), struct felix, ocelot) -#define FELIX_NUM_TC 8 /* Platform-specific information */ struct felix_info { diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 21dacb85976e6..f9711e69b8d54 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1376,7 +1376,7 @@ static const struct felix_info felix_info_vsc9959 = { .vcap = vsc9959_vcap_props, .num_mact_rows = 2048, .num_ports = 6, - .num_tx_queues = FELIX_NUM_TC, + .num_tx_queues = OCELOT_NUM_TC, .switch_pci_bar = 4, .imdio_pci_bar = 0, .ptp_caps = &vsc9959_ptp_caps, @@ -1435,7 +1435,7 @@ static int felix_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, felix); ocelot = &felix->ocelot; ocelot->dev = &pdev->dev; - ocelot->num_flooding_pgids = FELIX_NUM_TC; + ocelot->num_flooding_pgids = OCELOT_NUM_TC; felix->info = &felix_info_vsc9959; felix->switch_base = pci_resource_start(pdev, felix->info->switch_pci_bar); diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index 8dad0c894eca1..5e9bfdea50be5 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -1201,6 +1201,7 @@ static const struct felix_info seville_info_vsc9953 = { .vcap = vsc9953_vcap_props, .num_mact_rows = 2048, .num_ports = 10, + .num_tx_queues = OCELOT_NUM_TC, .mdio_bus_alloc = vsc9953_mdio_bus_alloc, .mdio_bus_free = vsc9953_mdio_bus_free, .phylink_validate = vsc9953_phylink_validate, diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index e548b0f51d0c6..1dc0c6d0671a4 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -98,6 +98,7 @@ #define IFH_REW_OP_TWO_STEP_PTP 0x3 #define IFH_REW_OP_ORIGIN_PTP 0x5 +#define OCELOT_NUM_TC 8 #define OCELOT_TAG_LEN 16 #define OCELOT_SHORT_PREFIX_LEN 4 #define OCELOT_LONG_PREFIX_LEN 16 -- GitLab From c6c65d47ddebe82cf32f98ea56f10daf82dab16c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 15 Jan 2021 04:11:17 +0200 Subject: [PATCH 0935/2012] net: mscc: ocelot: delete unused ocelot_set_cpu_port prototype This is a leftover of commit 69df578c5f4b ("net: mscc: ocelot: eliminate confusion between CPU and NPI port") which renamed that function. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index 291d39d49c4e0..519335676c242 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -122,10 +122,6 @@ void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg); int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target, struct phy_device *phy); -void ocelot_set_cpu_port(struct ocelot *ocelot, int cpu, - enum ocelot_tag_prefix injection, - enum ocelot_tag_prefix extraction); - extern struct notifier_block ocelot_netdevice_nb; extern struct notifier_block ocelot_switchdev_nb; extern struct notifier_block ocelot_switchdev_blocking_nb; -- GitLab From 6c30384eb1dec96b678ff9c01c15134b1a0e81f4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 15 Jan 2021 04:11:18 +0200 Subject: [PATCH 0936/2012] net: mscc: ocelot: register devlink ports Add devlink integration into the mscc_ocelot switchdev driver. All physical ports (i.e. the unused ones as well) except the CPU port module at ocelot->num_phys_ports are registered with devlink, and that requires keeping the devlink_port structure outside struct ocelot_port_private, since the latter has a 1:1 mapping with a struct net_device (which does not exist for unused ports). Since we use devlink_port_type_eth_set to link the devlink port to the net_device, we can as well remove the .ndo_get_phys_port_name and .ndo_get_port_parent_id implementations, since devlink takes care of retrieving the port name and number automatically, once .ndo_get_devlink_port is implemented. Note that the felix DSA driver is already integrated with devlink by default, since that is a thing that the DSA core takes care of. This is the reason why these devlink stubs were put in ocelot_net.c and not in the common library. It is also the reason why ocelot::devlink is a pointer and not a full structure embedded inside struct ocelot: because the mscc_ocelot driver allocates that by itself (as the container of struct ocelot, in fact), but in the case of felix, it is DSA who allocates the devlink, and felix just propagates the pointer towards struct ocelot. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.h | 6 ++ drivers/net/ethernet/mscc/ocelot_net.c | 66 +++++++----- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 118 ++++++++++++++++++--- include/soc/mscc/ocelot.h | 2 + 4 files changed, 148 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index 519335676c242..e8621dbc14f73 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -121,9 +121,15 @@ void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg); int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target, struct phy_device *phy); +int ocelot_devlink_init(struct ocelot *ocelot); +void ocelot_devlink_teardown(struct ocelot *ocelot); +int ocelot_port_devlink_init(struct ocelot *ocelot, int port, + enum devlink_port_flavour flavour); +void ocelot_port_devlink_teardown(struct ocelot *ocelot, int port); extern struct notifier_block ocelot_netdevice_nb; extern struct notifier_block ocelot_switchdev_nb; extern struct notifier_block ocelot_switchdev_blocking_nb; +extern const struct devlink_ops ocelot_devlink_ops; #endif diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 4fb9095be3eab..4485faefc2b16 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -8,6 +8,43 @@ #include "ocelot.h" #include "ocelot_vcap.h" +const struct devlink_ops ocelot_devlink_ops = { +}; + +int ocelot_port_devlink_init(struct ocelot *ocelot, int port, + enum devlink_port_flavour flavour) +{ + struct devlink_port *dlp = &ocelot->devlink_ports[port]; + int id_len = sizeof(ocelot->base_mac); + struct devlink *dl = ocelot->devlink; + struct devlink_port_attrs attrs = {}; + + memcpy(attrs.switch_id.id, &ocelot->base_mac, id_len); + attrs.switch_id.id_len = id_len; + attrs.phys.port_number = port; + attrs.flavour = flavour; + + devlink_port_attrs_set(dlp, &attrs); + + return devlink_port_register(dl, dlp, port); +} + +void ocelot_port_devlink_teardown(struct ocelot *ocelot, int port) +{ + struct devlink_port *dlp = &ocelot->devlink_ports[port]; + + devlink_port_unregister(dlp); +} + +static struct devlink_port *ocelot_get_devlink_port(struct net_device *dev) +{ + struct ocelot_port_private *priv = netdev_priv(dev); + struct ocelot *ocelot = priv->port.ocelot; + int port = priv->chip_port; + + return &ocelot->devlink_ports[port]; +} + int ocelot_setup_tc_cls_flower(struct ocelot_port_private *priv, struct flow_cls_offload *f, bool ingress) @@ -525,20 +562,6 @@ static void ocelot_set_rx_mode(struct net_device *dev) __dev_mc_sync(dev, ocelot_mc_sync, ocelot_mc_unsync); } -static int ocelot_port_get_phys_port_name(struct net_device *dev, - char *buf, size_t len) -{ - struct ocelot_port_private *priv = netdev_priv(dev); - int port = priv->chip_port; - int ret; - - ret = snprintf(buf, len, "p%d", port); - if (ret >= len) - return -EINVAL; - - return 0; -} - static int ocelot_port_set_mac_address(struct net_device *dev, void *p) { struct ocelot_port_private *priv = netdev_priv(dev); @@ -689,18 +712,6 @@ static int ocelot_set_features(struct net_device *dev, return 0; } -static int ocelot_get_port_parent_id(struct net_device *dev, - struct netdev_phys_item_id *ppid) -{ - struct ocelot_port_private *priv = netdev_priv(dev); - struct ocelot *ocelot = priv->port.ocelot; - - ppid->id_len = sizeof(ocelot->base_mac); - memcpy(&ppid->id, &ocelot->base_mac, ppid->id_len); - - return 0; -} - static int ocelot_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct ocelot_port_private *priv = netdev_priv(dev); @@ -727,7 +738,6 @@ static const struct net_device_ops ocelot_port_netdev_ops = { .ndo_stop = ocelot_port_stop, .ndo_start_xmit = ocelot_port_xmit, .ndo_set_rx_mode = ocelot_set_rx_mode, - .ndo_get_phys_port_name = ocelot_port_get_phys_port_name, .ndo_set_mac_address = ocelot_port_set_mac_address, .ndo_get_stats64 = ocelot_get_stats64, .ndo_fdb_add = ocelot_port_fdb_add, @@ -736,9 +746,9 @@ static const struct net_device_ops ocelot_port_netdev_ops = { .ndo_vlan_rx_add_vid = ocelot_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ocelot_vlan_rx_kill_vid, .ndo_set_features = ocelot_set_features, - .ndo_get_port_parent_id = ocelot_get_port_parent_id, .ndo_setup_tc = ocelot_setup_tc, .ndo_do_ioctl = ocelot_ioctl, + .ndo_get_devlink_port = ocelot_get_devlink_port, }; struct net_device *ocelot_port_to_netdev(struct ocelot *ocelot, int port) diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index ecd474476cc6c..28617023cd85c 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -1051,6 +1051,14 @@ static struct ptp_clock_info ocelot_ptp_clock_info = { .enable = ocelot_ptp_enable, }; +static void mscc_ocelot_teardown_devlink_ports(struct ocelot *ocelot) +{ + int port; + + for (port = 0; port < ocelot->num_phys_ports; port++) + ocelot_port_devlink_teardown(ocelot, port); +} + static void mscc_ocelot_release_ports(struct ocelot *ocelot) { int port; @@ -1078,28 +1086,44 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev, { struct ocelot *ocelot = platform_get_drvdata(pdev); struct device_node *portnp; - int err; + bool *registered_ports; + int port, err; + u32 reg; ocelot->ports = devm_kcalloc(ocelot->dev, ocelot->num_phys_ports, sizeof(struct ocelot_port *), GFP_KERNEL); if (!ocelot->ports) return -ENOMEM; + ocelot->devlink_ports = devm_kcalloc(ocelot->dev, + ocelot->num_phys_ports, + sizeof(*ocelot->devlink_ports), + GFP_KERNEL); + if (!ocelot->devlink_ports) + return -ENOMEM; + + registered_ports = kcalloc(ocelot->num_phys_ports, sizeof(bool), + GFP_KERNEL); + if (!registered_ports) + return -ENOMEM; + for_each_available_child_of_node(ports, portnp) { struct ocelot_port_private *priv; struct ocelot_port *ocelot_port; struct device_node *phy_node; + struct devlink_port *dlp; phy_interface_t phy_mode; struct phy_device *phy; struct regmap *target; struct resource *res; struct phy *serdes; char res_name[8]; - u32 port; - if (of_property_read_u32(portnp, "reg", &port)) + if (of_property_read_u32(portnp, "reg", ®)) continue; + port = reg; + snprintf(res_name, sizeof(res_name), "port%d", port); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, @@ -1117,15 +1141,26 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev, if (!phy) continue; + err = ocelot_port_devlink_init(ocelot, port, + DEVLINK_PORT_FLAVOUR_PHYSICAL); + if (err) { + of_node_put(portnp); + goto out_teardown; + } + err = ocelot_probe_port(ocelot, port, target, phy); if (err) { of_node_put(portnp); - return err; + goto out_teardown; } + registered_ports[port] = true; + ocelot_port = ocelot->ports[port]; priv = container_of(ocelot_port, struct ocelot_port_private, port); + dlp = &ocelot->devlink_ports[port]; + devlink_port_type_eth_set(dlp, priv->dev); of_get_phy_mode(portnp, &phy_mode); @@ -1150,7 +1185,8 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev, "invalid phy mode for port%d, (Q)SGMII only\n", port); of_node_put(portnp); - return -EINVAL; + err = -EINVAL; + goto out_teardown; } serdes = devm_of_phy_get(ocelot->dev, portnp, NULL); @@ -1164,13 +1200,46 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev, port); of_node_put(portnp); - return err; + goto out_teardown; } priv->serdes = serdes; } + /* Initialize unused devlink ports at the end */ + for (port = 0; port < ocelot->num_phys_ports; port++) { + if (registered_ports[port]) + continue; + + err = ocelot_port_devlink_init(ocelot, port, + DEVLINK_PORT_FLAVOUR_UNUSED); + if (err) { + while (port-- >= 0) { + if (!registered_ports[port]) + continue; + ocelot_port_devlink_teardown(ocelot, port); + } + + goto out_teardown; + } + } + + kfree(registered_ports); + return 0; + +out_teardown: + /* Unregister the network interfaces */ + mscc_ocelot_release_ports(ocelot); + /* Tear down devlink ports for the registered network interfaces */ + for (port = 0; port < ocelot->num_phys_ports; port++) { + if (!registered_ports[port]) + continue; + + ocelot_port_devlink_teardown(ocelot, port); + } + kfree(registered_ports); + return err; } static int mscc_ocelot_probe(struct platform_device *pdev) @@ -1178,6 +1247,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; int err, irq_xtr, irq_ptp_rdy; struct device_node *ports; + struct devlink *devlink; struct ocelot *ocelot; struct regmap *hsio; unsigned int i; @@ -1201,10 +1271,12 @@ static int mscc_ocelot_probe(struct platform_device *pdev) if (!np && !pdev->dev.platform_data) return -ENODEV; - ocelot = devm_kzalloc(&pdev->dev, sizeof(*ocelot), GFP_KERNEL); - if (!ocelot) + devlink = devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot)); + if (!devlink) return -ENOMEM; + ocelot = devlink_priv(devlink); + ocelot->devlink = priv_to_devlink(ocelot); platform_set_drvdata(pdev, ocelot); ocelot->dev = &pdev->dev; @@ -1221,7 +1293,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev) ocelot->targets[io_target[i].id] = NULL; continue; } - return PTR_ERR(target); + err = PTR_ERR(target); + goto out_free_devlink; } ocelot->targets[io_target[i].id] = target; @@ -1230,24 +1303,25 @@ static int mscc_ocelot_probe(struct platform_device *pdev) hsio = syscon_regmap_lookup_by_compatible("mscc,ocelot-hsio"); if (IS_ERR(hsio)) { dev_err(&pdev->dev, "missing hsio syscon\n"); - return PTR_ERR(hsio); + err = PTR_ERR(hsio); + goto out_free_devlink; } ocelot->targets[HSIO] = hsio; err = ocelot_chip_init(ocelot, &ocelot_ops); if (err) - return err; + goto out_free_devlink; irq_xtr = platform_get_irq_byname(pdev, "xtr"); if (irq_xtr < 0) - return -ENODEV; + goto out_free_devlink; err = devm_request_threaded_irq(&pdev->dev, irq_xtr, NULL, ocelot_xtr_irq_handler, IRQF_ONESHOT, "frame extraction", ocelot); if (err) - return err; + goto out_free_devlink; irq_ptp_rdy = platform_get_irq_byname(pdev, "ptp_rdy"); if (irq_ptp_rdy > 0 && ocelot->targets[PTP]) { @@ -1256,7 +1330,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev) IRQF_ONESHOT, "ptp ready", ocelot); if (err) - return err; + goto out_free_devlink; /* Both the PTP interrupt and the PTP bank are available */ ocelot->ptp = 1; @@ -1265,7 +1339,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev) ports = of_get_child_by_name(np, "ethernet-ports"); if (!ports) { dev_err(ocelot->dev, "no ethernet-ports child node found\n"); - return -ENODEV; + err = -ENODEV; + goto out_free_devlink; } ocelot->num_phys_ports = of_get_child_count(ports); @@ -1280,10 +1355,14 @@ static int mscc_ocelot_probe(struct platform_device *pdev) if (err) goto out_put_ports; - err = mscc_ocelot_init_ports(pdev, ports); + err = devlink_register(devlink, ocelot->dev); if (err) goto out_ocelot_deinit; + err = mscc_ocelot_init_ports(pdev, ports); + if (err) + goto out_ocelot_devlink_unregister; + if (ocelot->ptp) { err = ocelot_init_timestamp(ocelot, &ocelot_ptp_clock_info); if (err) { @@ -1303,10 +1382,14 @@ static int mscc_ocelot_probe(struct platform_device *pdev) return 0; +out_ocelot_devlink_unregister: + devlink_unregister(devlink); out_ocelot_deinit: ocelot_deinit(ocelot); out_put_ports: of_node_put(ports); +out_free_devlink: + devlink_free(devlink); return err; } @@ -1316,10 +1399,13 @@ static int mscc_ocelot_remove(struct platform_device *pdev) ocelot_deinit_timestamp(ocelot); mscc_ocelot_release_ports(ocelot); + mscc_ocelot_teardown_devlink_ports(ocelot); + devlink_unregister(ocelot->devlink); ocelot_deinit(ocelot); unregister_switchdev_blocking_notifier(&ocelot_switchdev_blocking_nb); unregister_switchdev_notifier(&ocelot_switchdev_nb); unregister_netdevice_notifier(&ocelot_netdevice_nb); + devlink_free(ocelot->devlink); return 0; } diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 1dc0c6d0671a4..fc7dc66797399 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -602,6 +602,8 @@ struct ocelot_port { struct ocelot { struct device *dev; + struct devlink *devlink; + struct devlink_port *devlink_ports; const struct ocelot_ops *ops; struct regmap *targets[TARGET_MAX]; -- GitLab From a4ae997adcbdf8ead133bafa5e9e2d6925c576b6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 15 Jan 2021 04:11:19 +0200 Subject: [PATCH 0937/2012] net: mscc: ocelot: initialize watermarks to sane defaults This is meant to be a gentle introduction into the world of watermarks on ocelot. The code is placed in ocelot_devlink.c because it will be integrated with devlink, even if it isn't right now. My first step was intended to be to replicate the default configuration of the congestion watermarks programatically, since they are now going to be tuned by the user. But after studying and understanding through trial and error how they work, I now believe that the configuration used out of reset does not do justice to the word "reservation", since the sum of all reservations exceeds the total amount of resources (otherwise said, all reservations cannot be fulfilled at the same time, which means that, contrary to the reference manual, they don't guarantee anything). As an example, here's a dump of the reservation watermarks for frame buffers, for port 0 (for brevity, the ports 1-6 were omitted, but they have the same configuration): BUF_Q_RSRV_I(port 0, prio 0) = max 3000 bytes BUF_Q_RSRV_I(port 0, prio 1) = max 3000 bytes BUF_Q_RSRV_I(port 0, prio 2) = max 3000 bytes BUF_Q_RSRV_I(port 0, prio 3) = max 3000 bytes BUF_Q_RSRV_I(port 0, prio 4) = max 3000 bytes BUF_Q_RSRV_I(port 0, prio 5) = max 3000 bytes BUF_Q_RSRV_I(port 0, prio 6) = max 3000 bytes BUF_Q_RSRV_I(port 0, prio 7) = max 3000 bytes Otherwise said, every port-tc has an ingress reservation of 3000 bytes, and there are 7 ports in VSC9959 Felix (6 user ports and 1 CPU port). Concentrating only on the ingress reservations, there are, in total, 8 [traffic classes] x 7 [ports] x 3000 [bytes] = 168,000 bytes of memory reserved on ingress. But, surprise, Felix only has 128 KB of packet buffer in total... A similar thing happens with Seville, which has a larger packet buffer, but also more ports, and the default configuration is also overcommitted. This patch disables the (apparently) bogus reservations and moves all resources to the shared area. This way, real reservations can be set up by the user, using devlink-sb. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/Makefile | 3 +- drivers/net/ethernet/mscc/ocelot.c | 1 + drivers/net/ethernet/mscc/ocelot.h | 1 + drivers/net/ethernet/mscc/ocelot_devlink.c | 442 +++++++++++++++++++++ 4 files changed, 446 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mscc/ocelot_devlink.c diff --git a/drivers/net/ethernet/mscc/Makefile b/drivers/net/ethernet/mscc/Makefile index 58f94c3d80f91..346bba2730adb 100644 --- a/drivers/net/ethernet/mscc/Makefile +++ b/drivers/net/ethernet/mscc/Makefile @@ -6,7 +6,8 @@ mscc_ocelot_switch_lib-y := \ ocelot_police.o \ ocelot_vcap.o \ ocelot_flower.o \ - ocelot_ptp.o + ocelot_ptp.o \ + ocelot_devlink.o obj-$(CONFIG_MSCC_OCELOT_SWITCH) += mscc_ocelot.o mscc_ocelot-y := \ ocelot_vsc7514.o \ diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 3af8e607c8a9f..474ea5e59f89e 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1624,6 +1624,7 @@ int ocelot_init(struct ocelot *ocelot) INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work); queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work, OCELOT_STATS_CHECK_DELAY); + ocelot_watermark_init(ocelot); return 0; } diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index e8621dbc14f73..48faa4dc893c7 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -126,6 +126,7 @@ void ocelot_devlink_teardown(struct ocelot *ocelot); int ocelot_port_devlink_init(struct ocelot *ocelot, int port, enum devlink_port_flavour flavour); void ocelot_port_devlink_teardown(struct ocelot *ocelot, int port); +void ocelot_watermark_init(struct ocelot *ocelot); extern struct notifier_block ocelot_netdevice_nb; extern struct notifier_block ocelot_switchdev_nb; diff --git a/drivers/net/ethernet/mscc/ocelot_devlink.c b/drivers/net/ethernet/mscc/ocelot_devlink.c new file mode 100644 index 0000000000000..c58315b628414 --- /dev/null +++ b/drivers/net/ethernet/mscc/ocelot_devlink.c @@ -0,0 +1,442 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright 2020-2021 NXP Semiconductors + */ +#include +#include "ocelot.h" + +/* The queue system tracks four resource consumptions: + * Resource 0: Memory tracked per source port + * Resource 1: Frame references tracked per source port + * Resource 2: Memory tracked per destination port + * Resource 3: Frame references tracked per destination port + */ +#define OCELOT_RESOURCE_SZ 256 +#define OCELOT_NUM_RESOURCES 4 + +#define BUF_xxxx_I (0 * OCELOT_RESOURCE_SZ) +#define REF_xxxx_I (1 * OCELOT_RESOURCE_SZ) +#define BUF_xxxx_E (2 * OCELOT_RESOURCE_SZ) +#define REF_xxxx_E (3 * OCELOT_RESOURCE_SZ) + +/* For each resource type there are 4 types of watermarks: + * Q_RSRV: reservation per QoS class per port + * PRIO_SHR: sharing watermark per QoS class across all ports + * P_RSRV: reservation per port + * COL_SHR: sharing watermark per color (drop precedence) across all ports + */ +#define xxx_Q_RSRV_x 0 +#define xxx_PRIO_SHR_x 216 +#define xxx_P_RSRV_x 224 +#define xxx_COL_SHR_x 254 + +/* Reservation Watermarks + * ---------------------- + * + * For setting up the reserved areas, egress watermarks exist per port and per + * QoS class for both ingress and egress. + */ + +/* Amount of packet buffer + * | per QoS class + * | | reserved + * | | | per egress port + * | | | | + * V V v v + * BUF_Q_RSRV_E + */ +#define BUF_Q_RSRV_E(port, prio) \ + (BUF_xxxx_E + xxx_Q_RSRV_x + OCELOT_NUM_TC * (port) + (prio)) + +/* Amount of packet buffer + * | for all port's traffic classes + * | | reserved + * | | | per egress port + * | | | | + * V V v v + * BUF_P_RSRV_E + */ +#define BUF_P_RSRV_E(port) \ + (BUF_xxxx_E + xxx_P_RSRV_x + (port)) + +/* Amount of packet buffer + * | per QoS class + * | | reserved + * | | | per ingress port + * | | | | + * V V v v + * BUF_Q_RSRV_I + */ +#define BUF_Q_RSRV_I(port, prio) \ + (BUF_xxxx_I + xxx_Q_RSRV_x + OCELOT_NUM_TC * (port) + (prio)) + +/* Amount of packet buffer + * | for all port's traffic classes + * | | reserved + * | | | per ingress port + * | | | | + * V V v v + * BUF_P_RSRV_I + */ +#define BUF_P_RSRV_I(port) \ + (BUF_xxxx_I + xxx_P_RSRV_x + (port)) + +/* Amount of frame references + * | per QoS class + * | | reserved + * | | | per egress port + * | | | | + * V V v v + * REF_Q_RSRV_E + */ +#define REF_Q_RSRV_E(port, prio) \ + (REF_xxxx_E + xxx_Q_RSRV_x + OCELOT_NUM_TC * (port) + (prio)) + +/* Amount of frame references + * | for all port's traffic classes + * | | reserved + * | | | per egress port + * | | | | + * V V v v + * REF_P_RSRV_E + */ +#define REF_P_RSRV_E(port) \ + (REF_xxxx_E + xxx_P_RSRV_x + (port)) + +/* Amount of frame references + * | per QoS class + * | | reserved + * | | | per ingress port + * | | | | + * V V v v + * REF_Q_RSRV_I + */ +#define REF_Q_RSRV_I(port, prio) \ + (REF_xxxx_I + xxx_Q_RSRV_x + OCELOT_NUM_TC * (port) + (prio)) + +/* Amount of frame references + * | for all port's traffic classes + * | | reserved + * | | | per ingress port + * | | | | + * V V v v + * REF_P_RSRV_I + */ +#define REF_P_RSRV_I(port) \ + (REF_xxxx_I + xxx_P_RSRV_x + (port)) + +/* Sharing Watermarks + * ------------------ + * + * The shared memory area is shared between all ports. + */ + +/* Amount of buffer + * | per QoS class + * | | from the shared memory area + * | | | for egress traffic + * | | | | + * V V v v + * BUF_PRIO_SHR_E + */ +#define BUF_PRIO_SHR_E(prio) \ + (BUF_xxxx_E + xxx_PRIO_SHR_x + (prio)) + +/* Amount of buffer + * | per color (drop precedence level) + * | | from the shared memory area + * | | | for egress traffic + * | | | | + * V V v v + * BUF_COL_SHR_E + */ +#define BUF_COL_SHR_E(dp) \ + (BUF_xxxx_E + xxx_COL_SHR_x + (1 - (dp))) + +/* Amount of buffer + * | per QoS class + * | | from the shared memory area + * | | | for ingress traffic + * | | | | + * V V v v + * BUF_PRIO_SHR_I + */ +#define BUF_PRIO_SHR_I(prio) \ + (BUF_xxxx_I + xxx_PRIO_SHR_x + (prio)) + +/* Amount of buffer + * | per color (drop precedence level) + * | | from the shared memory area + * | | | for ingress traffic + * | | | | + * V V v v + * BUF_COL_SHR_I + */ +#define BUF_COL_SHR_I(dp) \ + (BUF_xxxx_I + xxx_COL_SHR_x + (1 - (dp))) + +/* Amount of frame references + * | per QoS class + * | | from the shared area + * | | | for egress traffic + * | | | | + * V V v v + * REF_PRIO_SHR_E + */ +#define REF_PRIO_SHR_E(prio) \ + (REF_xxxx_E + xxx_PRIO_SHR_x + (prio)) + +/* Amount of frame references + * | per color (drop precedence level) + * | | from the shared area + * | | | for egress traffic + * | | | | + * V V v v + * REF_COL_SHR_E + */ +#define REF_COL_SHR_E(dp) \ + (REF_xxxx_E + xxx_COL_SHR_x + (1 - (dp))) + +/* Amount of frame references + * | per QoS class + * | | from the shared area + * | | | for ingress traffic + * | | | | + * V V v v + * REF_PRIO_SHR_I + */ +#define REF_PRIO_SHR_I(prio) \ + (REF_xxxx_I + xxx_PRIO_SHR_x + (prio)) + +/* Amount of frame references + * | per color (drop precedence level) + * | | from the shared area + * | | | for ingress traffic + * | | | | + * V V v v + * REF_COL_SHR_I + */ +#define REF_COL_SHR_I(dp) \ + (REF_xxxx_I + xxx_COL_SHR_x + (1 - (dp))) + +static u32 ocelot_wm_read(struct ocelot *ocelot, int index) +{ + int wm = ocelot_read_gix(ocelot, QSYS_RES_CFG, index); + + return ocelot->ops->wm_dec(wm); +} + +static void ocelot_wm_write(struct ocelot *ocelot, int index, u32 val) +{ + u32 wm = ocelot->ops->wm_enc(val); + + ocelot_write_gix(ocelot, wm, QSYS_RES_CFG, index); +} + +/* The hardware comes out of reset with strange defaults: the sum of all + * reservations for frame memory is larger than the total buffer size. + * One has to wonder how can the reservation watermarks still guarantee + * anything under congestion. + * Bring some sense into the hardware by changing the defaults to disable all + * reservations and rely only on the sharing watermark for frames with drop + * precedence 0. The user can still explicitly request reservations per port + * and per port-tc through devlink-sb. + */ +static void ocelot_disable_reservation_watermarks(struct ocelot *ocelot, + int port) +{ + int prio; + + for (prio = 0; prio < OCELOT_NUM_TC; prio++) { + ocelot_wm_write(ocelot, BUF_Q_RSRV_I(port, prio), 0); + ocelot_wm_write(ocelot, BUF_Q_RSRV_E(port, prio), 0); + ocelot_wm_write(ocelot, REF_Q_RSRV_I(port, prio), 0); + ocelot_wm_write(ocelot, REF_Q_RSRV_E(port, prio), 0); + } + + ocelot_wm_write(ocelot, BUF_P_RSRV_I(port), 0); + ocelot_wm_write(ocelot, BUF_P_RSRV_E(port), 0); + ocelot_wm_write(ocelot, REF_P_RSRV_I(port), 0); + ocelot_wm_write(ocelot, REF_P_RSRV_E(port), 0); +} + +/* We want the sharing watermarks to consume all nonreserved resources, for + * efficient resource utilization (a single traffic flow should be able to use + * up the entire buffer space and frame resources as long as there's no + * interference). + * The switch has 10 sharing watermarks per lookup: 8 per traffic class and 2 + * per color (drop precedence). + * The trouble with configuring these sharing watermarks is that: + * (1) There's a risk that we overcommit the resources if we configure + * (a) all 8 per-TC sharing watermarks to the max + * (b) all 2 per-color sharing watermarks to the max + * (2) There's a risk that we undercommit the resources if we configure + * (a) all 8 per-TC sharing watermarks to "max / 8" + * (b) all 2 per-color sharing watermarks to "max / 2" + * So for Linux, let's just disable the sharing watermarks per traffic class + * (setting them to 0 will make them always exceeded), and rely only on the + * sharing watermark for drop priority 0. So frames with drop priority set to 1 + * by QoS classification or policing will still be allowed, but only as long as + * the port and port-TC reservations are not exceeded. + */ +static void ocelot_disable_tc_sharing_watermarks(struct ocelot *ocelot) +{ + int prio; + + for (prio = 0; prio < OCELOT_NUM_TC; prio++) { + ocelot_wm_write(ocelot, BUF_PRIO_SHR_I(prio), 0); + ocelot_wm_write(ocelot, BUF_PRIO_SHR_E(prio), 0); + ocelot_wm_write(ocelot, REF_PRIO_SHR_I(prio), 0); + ocelot_wm_write(ocelot, REF_PRIO_SHR_E(prio), 0); + } +} + +static void ocelot_get_buf_rsrv(struct ocelot *ocelot, u32 *buf_rsrv_i, + u32 *buf_rsrv_e) +{ + int port, prio; + + *buf_rsrv_i = 0; + *buf_rsrv_e = 0; + + for (port = 0; port <= ocelot->num_phys_ports; port++) { + for (prio = 0; prio < OCELOT_NUM_TC; prio++) { + *buf_rsrv_i += ocelot_wm_read(ocelot, + BUF_Q_RSRV_I(port, prio)); + *buf_rsrv_e += ocelot_wm_read(ocelot, + BUF_Q_RSRV_E(port, prio)); + } + + *buf_rsrv_i += ocelot_wm_read(ocelot, BUF_P_RSRV_I(port)); + *buf_rsrv_e += ocelot_wm_read(ocelot, BUF_P_RSRV_E(port)); + } + + *buf_rsrv_i *= OCELOT_BUFFER_CELL_SZ; + *buf_rsrv_e *= OCELOT_BUFFER_CELL_SZ; +} + +static void ocelot_get_ref_rsrv(struct ocelot *ocelot, u32 *ref_rsrv_i, + u32 *ref_rsrv_e) +{ + int port, prio; + + *ref_rsrv_i = 0; + *ref_rsrv_e = 0; + + for (port = 0; port <= ocelot->num_phys_ports; port++) { + for (prio = 0; prio < OCELOT_NUM_TC; prio++) { + *ref_rsrv_i += ocelot_wm_read(ocelot, + REF_Q_RSRV_I(port, prio)); + *ref_rsrv_e += ocelot_wm_read(ocelot, + REF_Q_RSRV_E(port, prio)); + } + + *ref_rsrv_i += ocelot_wm_read(ocelot, REF_P_RSRV_I(port)); + *ref_rsrv_e += ocelot_wm_read(ocelot, REF_P_RSRV_E(port)); + } +} + +/* Calculate all reservations, then set up the sharing watermark for DP=0 to + * consume the remaining resources up to the pool's configured size. + */ +static void ocelot_setup_sharing_watermarks(struct ocelot *ocelot) +{ + u32 buf_rsrv_i, buf_rsrv_e; + u32 ref_rsrv_i, ref_rsrv_e; + u32 buf_shr_i, buf_shr_e; + u32 ref_shr_i, ref_shr_e; + + ocelot_get_buf_rsrv(ocelot, &buf_rsrv_i, &buf_rsrv_e); + ocelot_get_ref_rsrv(ocelot, &ref_rsrv_i, &ref_rsrv_e); + + buf_shr_i = ocelot->packet_buffer_size - buf_rsrv_i; + buf_shr_e = ocelot->packet_buffer_size - buf_rsrv_e; + ref_shr_i = ocelot->num_frame_refs - ref_rsrv_i; + ref_shr_e = ocelot->num_frame_refs - ref_rsrv_e; + + buf_shr_i /= OCELOT_BUFFER_CELL_SZ; + buf_shr_e /= OCELOT_BUFFER_CELL_SZ; + + ocelot_wm_write(ocelot, BUF_COL_SHR_I(0), buf_shr_i); + ocelot_wm_write(ocelot, BUF_COL_SHR_E(0), buf_shr_e); + ocelot_wm_write(ocelot, REF_COL_SHR_E(0), ref_shr_e); + ocelot_wm_write(ocelot, REF_COL_SHR_I(0), ref_shr_i); + ocelot_wm_write(ocelot, BUF_COL_SHR_I(1), 0); + ocelot_wm_write(ocelot, BUF_COL_SHR_E(1), 0); + ocelot_wm_write(ocelot, REF_COL_SHR_E(1), 0); + ocelot_wm_write(ocelot, REF_COL_SHR_I(1), 0); +} + +/* The hardware works like this: + * + * Frame forwarding decision taken + * | + * v + * +--------------------+--------------------+--------------------+ + * | | | | + * v v v v + * Ingress memory Egress memory Ingress frame Egress frame + * check check reference check reference check + * | | | | + * v v v v + * BUF_Q_RSRV_I ok BUF_Q_RSRV_E ok REF_Q_RSRV_I ok REF_Q_RSRV_E ok + *(src port, prio) -+ (dst port, prio) -+ (src port, prio) -+ (dst port, prio) -+ + * | | | | | | | | + * |exceeded | |exceeded | |exceeded | |exceeded | + * v | v | v | v | + * BUF_P_RSRV_I ok| BUF_P_RSRV_E ok| REF_P_RSRV_I ok| REF_P_RSRV_E ok| + * (src port) ----+ (dst port) ----+ (src port) ----+ (dst port) -----+ + * | | | | | | | | + * |exceeded | |exceeded | |exceeded | |exceeded | + * v | v | v | v | + * BUF_PRIO_SHR_I ok| BUF_PRIO_SHR_E ok| REF_PRIO_SHR_I ok| REF_PRIO_SHR_E ok| + * (prio) ------+ (prio) ------+ (prio) ------+ (prio) -------+ + * | | | | | | | | + * |exceeded | |exceeded | |exceeded | |exceeded | + * v | v | v | v | + * BUF_COL_SHR_I ok| BUF_COL_SHR_E ok| REF_COL_SHR_I ok| REF_COL_SHR_E ok| + * (dp) -------+ (dp) -------+ (dp) -------+ (dp) --------+ + * | | | | | | | | + * |exceeded | |exceeded | |exceeded | |exceeded | + * v v v v v v v v + * fail success fail success fail success fail success + * | | | | | | | | + * v v v v v v v v + * +-----+----+ +-----+----+ +-----+----+ +-----+-----+ + * | | | | + * +-------> OR <-------+ +-------> OR <-------+ + * | | + * v v + * +----------------> AND <-----------------+ + * | + * v + * FIFO drop / accept + * + * We are modeling each of the 4 parallel lookups as a devlink-sb pool. + * At least one (ingress or egress) memory pool and one (ingress or egress) + * frame reference pool need to have resources for frame acceptance to succeed. + * + * The following watermarks are controlled explicitly through devlink-sb: + * BUF_Q_RSRV_I, BUF_Q_RSRV_E, REF_Q_RSRV_I, REF_Q_RSRV_E + * BUF_P_RSRV_I, BUF_P_RSRV_E, REF_P_RSRV_I, REF_P_RSRV_E + * The following watermarks are controlled implicitly through devlink-sb: + * BUF_COL_SHR_I, BUF_COL_SHR_E, REF_COL_SHR_I, REF_COL_SHR_E + * The following watermarks are unused and disabled: + * BUF_PRIO_SHR_I, BUF_PRIO_SHR_E, REF_PRIO_SHR_I, REF_PRIO_SHR_E + * + * This function overrides the hardware defaults with more sane ones (no + * reservations by default, let sharing use all resources) and disables the + * unused watermarks. + */ +void ocelot_watermark_init(struct ocelot *ocelot) +{ + int all_tcs = GENMASK(OCELOT_NUM_TC - 1, 0); + int port; + + ocelot_write(ocelot, all_tcs, QSYS_RES_QOS_MODE); + + for (port = 0; port <= ocelot->num_phys_ports; port++) + ocelot_disable_reservation_watermarks(ocelot, port); + + ocelot_disable_tc_sharing_watermarks(ocelot); + ocelot_setup_sharing_watermarks(ocelot); +} -- GitLab From f59fd9cab7305266f4148776c3b66329551a2a3a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 15 Jan 2021 04:11:20 +0200 Subject: [PATCH 0938/2012] net: mscc: ocelot: configure watermarks using devlink-sb Using devlink-sb, we can configure 12/16 (the important 75%) of the switch's controlling watermarks for congestion drops, and we can monitor 50% of the watermark occupancies (we can monitor the reservation watermarks, but not the sharing watermarks, which are exposed as pool sizes). The following definitions can be made: SB_BUF=0 # The devlink-sb for frame buffers SB_REF=1 # The devlink-sb for frame references POOL_ING=0 # The pool for ingress traffic. Both devlink-sb instances # have one of these. POOL_EGR=1 # The pool for egress traffic. Both devlink-sb instances # have one of these. Editing the hardware watermarks is done in the following way: BUF_xxxx_I is accessed when sb=$SB_BUF and pool=$POOL_ING REF_xxxx_I is accessed when sb=$SB_REF and pool=$POOL_ING BUF_xxxx_E is accessed when sb=$SB_BUF and pool=$POOL_EGR REF_xxxx_E is accessed when sb=$SB_REF and pool=$POOL_EGR Configuring the sharing watermarks for COL_SHR(dp=0) is done implicitly by modifying the corresponding pool size. By default, the pool size has maximum size, so this can be skipped. devlink sb pool set pci/0000:00:00.5 sb $SB_BUF pool $POOL_ING \ size 129840 thtype static Since by default there is no buffer reservation, the above command has maxed out BUF_COL_SHR_I(dp=0). Configuring the per-port reservation watermark (P_RSRV) is done in the following way: devlink sb port pool set pci/0000:00:00.5/0 sb $SB_BUF \ pool $POOL_ING th 1000 The above command sets BUF_P_RSRV_I(port 0) to 1000 bytes. After this command, the sharing watermarks are internally reconfigured with 1000 bytes less, i.e. from 129840 bytes to 128840 bytes. Configuring the per-port-tc reservation watermarks (Q_RSRV) is done in the following way: for tc in {0..7}; do devlink sb tc bind set pci/0000:00:00.5/0 sb 0 tc $tc \ type ingress pool $POOL_ING \ th 3000 done The above command sets BUF_Q_RSRV_I(port 0, tc 0..7) to 3000 bytes. The sharing watermarks are again reconfigured with 24000 bytes less. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 118 ++++++ drivers/net/ethernet/mscc/ocelot.c | 5 - drivers/net/ethernet/mscc/ocelot.h | 1 - drivers/net/ethernet/mscc/ocelot_devlink.c | 453 ++++++++++++++++++++- drivers/net/ethernet/mscc/ocelot_net.c | 140 +++++++ drivers/net/ethernet/mscc/ocelot_vsc7514.c | 8 + include/soc/mscc/ocelot.h | 47 +++ 7 files changed, 761 insertions(+), 11 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 0c6a7de033318..767cbdccdb3e1 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -427,6 +427,7 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports) ocelot->ops = felix->info->ops; ocelot->inj_prefix = OCELOT_TAG_PREFIX_SHORT; ocelot->xtr_prefix = OCELOT_TAG_PREFIX_SHORT; + ocelot->devlink = felix->ds->devlink; port_phy_modes = kcalloc(num_phys_ports, sizeof(phy_interface_t), GFP_KERNEL); @@ -588,6 +589,10 @@ static int felix_setup(struct dsa_switch *ds) felix_port_qos_map_init(ocelot, port); } + err = ocelot_devlink_sb_register(ocelot); + if (err) + return err; + /* Include the CPU port module in the forwarding mask for unknown * unicast - the hardware default value for ANA_FLOODING_FLD_UNICAST * excludes BIT(ocelot->num_phys_ports), and so does ocelot_init, since @@ -609,6 +614,7 @@ static void felix_teardown(struct dsa_switch *ds) struct felix *felix = ocelot_to_felix(ocelot); int port; + ocelot_devlink_sb_unregister(ocelot); ocelot_deinit_timestamp(ocelot); ocelot_deinit(ocelot); @@ -750,6 +756,108 @@ static int felix_port_setup_tc(struct dsa_switch *ds, int port, return -EOPNOTSUPP; } +static int felix_sb_pool_get(struct dsa_switch *ds, unsigned int sb_index, + u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_sb_pool_get(ocelot, sb_index, pool_index, pool_info); +} + +static int felix_sb_pool_set(struct dsa_switch *ds, unsigned int sb_index, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_sb_pool_set(ocelot, sb_index, pool_index, size, + threshold_type, extack); +} + +static int felix_sb_port_pool_get(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_sb_port_pool_get(ocelot, port, sb_index, pool_index, + p_threshold); +} + +static int felix_sb_port_pool_set(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 pool_index, + u32 threshold, struct netlink_ext_ack *extack) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_sb_port_pool_set(ocelot, port, sb_index, pool_index, + threshold, extack); +} + +static int felix_sb_tc_pool_bind_get(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_sb_tc_pool_bind_get(ocelot, port, sb_index, tc_index, + pool_type, p_pool_index, + p_threshold); +} + +static int felix_sb_tc_pool_bind_set(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_sb_tc_pool_bind_set(ocelot, port, sb_index, tc_index, + pool_type, pool_index, threshold, + extack); +} + +static int felix_sb_occ_snapshot(struct dsa_switch *ds, + unsigned int sb_index) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_sb_occ_snapshot(ocelot, sb_index); +} + +static int felix_sb_occ_max_clear(struct dsa_switch *ds, + unsigned int sb_index) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_sb_occ_max_clear(ocelot, sb_index); +} + +static int felix_sb_occ_port_pool_get(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_sb_occ_port_pool_get(ocelot, port, sb_index, pool_index, + p_cur, p_max); +} + +static int felix_sb_occ_tc_port_bind_get(struct dsa_switch *ds, int port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_sb_occ_tc_port_bind_get(ocelot, port, sb_index, tc_index, + pool_type, p_cur, p_max); +} + const struct dsa_switch_ops felix_switch_ops = { .get_tag_protocol = felix_get_tag_protocol, .setup = felix_setup, @@ -788,6 +896,16 @@ const struct dsa_switch_ops felix_switch_ops = { .cls_flower_del = felix_cls_flower_del, .cls_flower_stats = felix_cls_flower_stats, .port_setup_tc = felix_port_setup_tc, + .devlink_sb_pool_get = felix_sb_pool_get, + .devlink_sb_pool_set = felix_sb_pool_set, + .devlink_sb_port_pool_get = felix_sb_port_pool_get, + .devlink_sb_port_pool_set = felix_sb_port_pool_set, + .devlink_sb_tc_pool_bind_get = felix_sb_tc_pool_bind_get, + .devlink_sb_tc_pool_bind_set = felix_sb_tc_pool_bind_set, + .devlink_sb_occ_snapshot = felix_sb_occ_snapshot, + .devlink_sb_occ_max_clear = felix_sb_occ_max_clear, + .devlink_sb_occ_port_pool_get = felix_sb_occ_port_pool_get, + .devlink_sb_occ_tc_port_bind_get= felix_sb_occ_tc_port_bind_get, }; struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 474ea5e59f89e..a560d6be2a445 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1480,10 +1480,6 @@ static void ocelot_detect_features(struct ocelot *ocelot) eq_ctrl = ocelot_read(ocelot, QSYS_EQ_CTRL); ocelot->num_frame_refs = QSYS_MMGT_EQ_CTRL_FP_FREE_CNT(eq_ctrl); - - dev_info(ocelot->dev, - "Detected %d bytes of packet buffer and %d frame references\n", - ocelot->packet_buffer_size, ocelot->num_frame_refs); } int ocelot_init(struct ocelot *ocelot) @@ -1624,7 +1620,6 @@ int ocelot_init(struct ocelot *ocelot) INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work); queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work, OCELOT_STATS_CHECK_DELAY); - ocelot_watermark_init(ocelot); return 0; } diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index 48faa4dc893c7..e8621dbc14f73 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -126,7 +126,6 @@ void ocelot_devlink_teardown(struct ocelot *ocelot); int ocelot_port_devlink_init(struct ocelot *ocelot, int port, enum devlink_port_flavour flavour); void ocelot_port_devlink_teardown(struct ocelot *ocelot, int port); -void ocelot_watermark_init(struct ocelot *ocelot); extern struct notifier_block ocelot_netdevice_nb; extern struct notifier_block ocelot_switchdev_nb; diff --git a/drivers/net/ethernet/mscc/ocelot_devlink.c b/drivers/net/ethernet/mscc/ocelot_devlink.c index c58315b628414..edafbd37d12cb 100644 --- a/drivers/net/ethernet/mscc/ocelot_devlink.c +++ b/drivers/net/ethernet/mscc/ocelot_devlink.c @@ -232,6 +232,14 @@ static void ocelot_wm_write(struct ocelot *ocelot, int index, u32 val) ocelot_write_gix(ocelot, wm, QSYS_RES_CFG, index); } +static void ocelot_wm_status(struct ocelot *ocelot, int index, u32 *inuse, + u32 *maxuse) +{ + int res_stat = ocelot_read_gix(ocelot, QSYS_RES_STAT, index); + + return ocelot->ops->wm_stat(res_stat, inuse, maxuse); +} + /* The hardware comes out of reset with strange defaults: the sum of all * reservations for frame memory is larger than the total buffer size. * One has to wonder how can the reservation watermarks still guarantee @@ -348,10 +356,14 @@ static void ocelot_setup_sharing_watermarks(struct ocelot *ocelot) ocelot_get_buf_rsrv(ocelot, &buf_rsrv_i, &buf_rsrv_e); ocelot_get_ref_rsrv(ocelot, &ref_rsrv_i, &ref_rsrv_e); - buf_shr_i = ocelot->packet_buffer_size - buf_rsrv_i; - buf_shr_e = ocelot->packet_buffer_size - buf_rsrv_e; - ref_shr_i = ocelot->num_frame_refs - ref_rsrv_i; - ref_shr_e = ocelot->num_frame_refs - ref_rsrv_e; + buf_shr_i = ocelot->pool_size[OCELOT_SB_BUF][OCELOT_SB_POOL_ING] - + buf_rsrv_i; + buf_shr_e = ocelot->pool_size[OCELOT_SB_BUF][OCELOT_SB_POOL_EGR] - + buf_rsrv_e; + ref_shr_i = ocelot->pool_size[OCELOT_SB_REF][OCELOT_SB_POOL_ING] - + ref_rsrv_i; + ref_shr_e = ocelot->pool_size[OCELOT_SB_REF][OCELOT_SB_POOL_EGR] - + ref_rsrv_e; buf_shr_i /= OCELOT_BUFFER_CELL_SZ; buf_shr_e /= OCELOT_BUFFER_CELL_SZ; @@ -366,6 +378,40 @@ static void ocelot_setup_sharing_watermarks(struct ocelot *ocelot) ocelot_wm_write(ocelot, REF_COL_SHR_I(1), 0); } +/* Ensure that all reservations can be enforced */ +static int ocelot_watermark_validate(struct ocelot *ocelot, + struct netlink_ext_ack *extack) +{ + u32 buf_rsrv_i, buf_rsrv_e; + u32 ref_rsrv_i, ref_rsrv_e; + + ocelot_get_buf_rsrv(ocelot, &buf_rsrv_i, &buf_rsrv_e); + ocelot_get_ref_rsrv(ocelot, &ref_rsrv_i, &ref_rsrv_e); + + if (buf_rsrv_i > ocelot->pool_size[OCELOT_SB_BUF][OCELOT_SB_POOL_ING]) { + NL_SET_ERR_MSG_MOD(extack, + "Ingress frame reservations exceed pool size"); + return -ERANGE; + } + if (buf_rsrv_e > ocelot->pool_size[OCELOT_SB_BUF][OCELOT_SB_POOL_EGR]) { + NL_SET_ERR_MSG_MOD(extack, + "Egress frame reservations exceed pool size"); + return -ERANGE; + } + if (ref_rsrv_i > ocelot->pool_size[OCELOT_SB_REF][OCELOT_SB_POOL_ING]) { + NL_SET_ERR_MSG_MOD(extack, + "Ingress reference reservations exceed pool size"); + return -ERANGE; + } + if (ref_rsrv_e > ocelot->pool_size[OCELOT_SB_REF][OCELOT_SB_POOL_EGR]) { + NL_SET_ERR_MSG_MOD(extack, + "Egress reference reservations exceed pool size"); + return -ERANGE; + } + + return 0; +} + /* The hardware works like this: * * Frame forwarding decision taken @@ -427,7 +473,7 @@ static void ocelot_setup_sharing_watermarks(struct ocelot *ocelot) * reservations by default, let sharing use all resources) and disables the * unused watermarks. */ -void ocelot_watermark_init(struct ocelot *ocelot) +static void ocelot_watermark_init(struct ocelot *ocelot) { int all_tcs = GENMASK(OCELOT_NUM_TC - 1, 0); int port; @@ -440,3 +486,400 @@ void ocelot_watermark_init(struct ocelot *ocelot) ocelot_disable_tc_sharing_watermarks(ocelot); ocelot_setup_sharing_watermarks(ocelot); } + +/* Pool size and type are fixed up at runtime. Keeping this structure to + * look up the cell size multipliers. + */ +static const struct devlink_sb_pool_info ocelot_sb_pool[] = { + [OCELOT_SB_BUF] = { + .cell_size = OCELOT_BUFFER_CELL_SZ, + .threshold_type = DEVLINK_SB_THRESHOLD_TYPE_STATIC, + }, + [OCELOT_SB_REF] = { + .cell_size = 1, + .threshold_type = DEVLINK_SB_THRESHOLD_TYPE_STATIC, + }, +}; + +/* Returns the pool size configured through ocelot_sb_pool_set */ +int ocelot_sb_pool_get(struct ocelot *ocelot, unsigned int sb_index, + u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + if (sb_index >= OCELOT_SB_NUM) + return -ENODEV; + if (pool_index >= OCELOT_SB_POOL_NUM) + return -ENODEV; + + *pool_info = ocelot_sb_pool[sb_index]; + pool_info->size = ocelot->pool_size[sb_index][pool_index]; + if (pool_index) + pool_info->pool_type = DEVLINK_SB_POOL_TYPE_INGRESS; + else + pool_info->pool_type = DEVLINK_SB_POOL_TYPE_EGRESS; + + return 0; +} +EXPORT_SYMBOL(ocelot_sb_pool_get); + +/* The pool size received here configures the total amount of resources used on + * ingress (or on egress, depending upon the pool index). The pool size, minus + * the values for the port and port-tc reservations, is written into the + * COL_SHR(dp=0) sharing watermark. + */ +int ocelot_sb_pool_set(struct ocelot *ocelot, unsigned int sb_index, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack) +{ + u32 old_pool_size; + int err; + + if (sb_index >= OCELOT_SB_NUM) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid sb, use 0 for buffers and 1 for frame references"); + return -ENODEV; + } + if (pool_index >= OCELOT_SB_POOL_NUM) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid pool, use 0 for ingress and 1 for egress"); + return -ENODEV; + } + if (threshold_type != DEVLINK_SB_THRESHOLD_TYPE_STATIC) { + NL_SET_ERR_MSG_MOD(extack, + "Only static threshold supported"); + return -EOPNOTSUPP; + } + + old_pool_size = ocelot->pool_size[sb_index][pool_index]; + ocelot->pool_size[sb_index][pool_index] = size; + + err = ocelot_watermark_validate(ocelot, extack); + if (err) { + ocelot->pool_size[sb_index][pool_index] = old_pool_size; + return err; + } + + ocelot_setup_sharing_watermarks(ocelot); + + return 0; +} +EXPORT_SYMBOL(ocelot_sb_pool_set); + +/* This retrieves the configuration made with ocelot_sb_port_pool_set */ +int ocelot_sb_port_pool_get(struct ocelot *ocelot, int port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold) +{ + int wm_index; + + switch (sb_index) { + case OCELOT_SB_BUF: + if (pool_index == OCELOT_SB_POOL_ING) + wm_index = BUF_P_RSRV_I(port); + else + wm_index = BUF_P_RSRV_E(port); + break; + case OCELOT_SB_REF: + if (pool_index == OCELOT_SB_POOL_ING) + wm_index = REF_P_RSRV_I(port); + else + wm_index = REF_P_RSRV_E(port); + break; + default: + return -ENODEV; + } + + *p_threshold = ocelot_wm_read(ocelot, wm_index); + *p_threshold *= ocelot_sb_pool[sb_index].cell_size; + + return 0; +} +EXPORT_SYMBOL(ocelot_sb_port_pool_get); + +/* This configures the P_RSRV per-port reserved resource watermark */ +int ocelot_sb_port_pool_set(struct ocelot *ocelot, int port, + unsigned int sb_index, u16 pool_index, + u32 threshold, struct netlink_ext_ack *extack) +{ + int wm_index, err; + u32 old_thr; + + switch (sb_index) { + case OCELOT_SB_BUF: + if (pool_index == OCELOT_SB_POOL_ING) + wm_index = BUF_P_RSRV_I(port); + else + wm_index = BUF_P_RSRV_E(port); + break; + case OCELOT_SB_REF: + if (pool_index == OCELOT_SB_POOL_ING) + wm_index = REF_P_RSRV_I(port); + else + wm_index = REF_P_RSRV_E(port); + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Invalid shared buffer"); + return -ENODEV; + } + + threshold /= ocelot_sb_pool[sb_index].cell_size; + + old_thr = ocelot_wm_read(ocelot, wm_index); + ocelot_wm_write(ocelot, wm_index, threshold); + + err = ocelot_watermark_validate(ocelot, extack); + if (err) { + ocelot_wm_write(ocelot, wm_index, old_thr); + return err; + } + + ocelot_setup_sharing_watermarks(ocelot); + + return 0; +} +EXPORT_SYMBOL(ocelot_sb_port_pool_set); + +/* This retrieves the configuration done by ocelot_sb_tc_pool_bind_set */ +int ocelot_sb_tc_pool_bind_get(struct ocelot *ocelot, int port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold) +{ + int wm_index; + + switch (sb_index) { + case OCELOT_SB_BUF: + if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS) + wm_index = BUF_Q_RSRV_I(port, tc_index); + else + wm_index = BUF_Q_RSRV_E(port, tc_index); + break; + case OCELOT_SB_REF: + if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS) + wm_index = REF_Q_RSRV_I(port, tc_index); + else + wm_index = REF_Q_RSRV_E(port, tc_index); + break; + default: + return -ENODEV; + } + + *p_threshold = ocelot_wm_read(ocelot, wm_index); + *p_threshold *= ocelot_sb_pool[sb_index].cell_size; + + if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS) + *p_pool_index = 0; + else + *p_pool_index = 1; + + return 0; +} +EXPORT_SYMBOL(ocelot_sb_tc_pool_bind_get); + +/* This configures the Q_RSRV per-port-tc reserved resource watermark */ +int ocelot_sb_tc_pool_bind_set(struct ocelot *ocelot, int port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack) +{ + int wm_index, err; + u32 old_thr; + + /* Paranoid check? */ + if (pool_index == OCELOT_SB_POOL_ING && + pool_type != DEVLINK_SB_POOL_TYPE_INGRESS) + return -EINVAL; + if (pool_index == OCELOT_SB_POOL_EGR && + pool_type != DEVLINK_SB_POOL_TYPE_EGRESS) + return -EINVAL; + + switch (sb_index) { + case OCELOT_SB_BUF: + if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS) + wm_index = BUF_Q_RSRV_I(port, tc_index); + else + wm_index = BUF_Q_RSRV_E(port, tc_index); + break; + case OCELOT_SB_REF: + if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS) + wm_index = REF_Q_RSRV_I(port, tc_index); + else + wm_index = REF_Q_RSRV_E(port, tc_index); + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Invalid shared buffer"); + return -ENODEV; + } + + threshold /= ocelot_sb_pool[sb_index].cell_size; + + old_thr = ocelot_wm_read(ocelot, wm_index); + ocelot_wm_write(ocelot, wm_index, threshold); + err = ocelot_watermark_validate(ocelot, extack); + if (err) { + ocelot_wm_write(ocelot, wm_index, old_thr); + return err; + } + + ocelot_setup_sharing_watermarks(ocelot); + + return 0; +} +EXPORT_SYMBOL(ocelot_sb_tc_pool_bind_set); + +/* The hardware does not support atomic snapshots, we'll read out the + * occupancy registers individually and have this as just a stub. + */ +int ocelot_sb_occ_snapshot(struct ocelot *ocelot, unsigned int sb_index) +{ + return 0; +} +EXPORT_SYMBOL(ocelot_sb_occ_snapshot); + +/* The watermark occupancy registers are cleared upon read, + * so let's read them. + */ +int ocelot_sb_occ_max_clear(struct ocelot *ocelot, unsigned int sb_index) +{ + u32 inuse, maxuse; + int port, prio; + + switch (sb_index) { + case OCELOT_SB_BUF: + for (port = 0; port <= ocelot->num_phys_ports; port++) { + for (prio = 0; prio < OCELOT_NUM_TC; prio++) { + ocelot_wm_status(ocelot, BUF_Q_RSRV_I(port, prio), + &inuse, &maxuse); + ocelot_wm_status(ocelot, BUF_Q_RSRV_E(port, prio), + &inuse, &maxuse); + } + ocelot_wm_status(ocelot, BUF_P_RSRV_I(port), + &inuse, &maxuse); + ocelot_wm_status(ocelot, BUF_P_RSRV_E(port), + &inuse, &maxuse); + } + break; + case OCELOT_SB_REF: + for (port = 0; port <= ocelot->num_phys_ports; port++) { + for (prio = 0; prio < OCELOT_NUM_TC; prio++) { + ocelot_wm_status(ocelot, REF_Q_RSRV_I(port, prio), + &inuse, &maxuse); + ocelot_wm_status(ocelot, REF_Q_RSRV_E(port, prio), + &inuse, &maxuse); + } + ocelot_wm_status(ocelot, REF_P_RSRV_I(port), + &inuse, &maxuse); + ocelot_wm_status(ocelot, REF_P_RSRV_E(port), + &inuse, &maxuse); + } + break; + default: + return -ENODEV; + } + + return 0; +} +EXPORT_SYMBOL(ocelot_sb_occ_max_clear); + +/* This retrieves the watermark occupancy for per-port P_RSRV watermarks */ +int ocelot_sb_occ_port_pool_get(struct ocelot *ocelot, int port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max) +{ + int wm_index; + + switch (sb_index) { + case OCELOT_SB_BUF: + if (pool_index == OCELOT_SB_POOL_ING) + wm_index = BUF_P_RSRV_I(port); + else + wm_index = BUF_P_RSRV_E(port); + break; + case OCELOT_SB_REF: + if (pool_index == OCELOT_SB_POOL_ING) + wm_index = REF_P_RSRV_I(port); + else + wm_index = REF_P_RSRV_E(port); + break; + default: + return -ENODEV; + } + + ocelot_wm_status(ocelot, wm_index, p_cur, p_max); + *p_cur *= ocelot_sb_pool[sb_index].cell_size; + *p_max *= ocelot_sb_pool[sb_index].cell_size; + + return 0; +} +EXPORT_SYMBOL(ocelot_sb_occ_port_pool_get); + +/* This retrieves the watermark occupancy for per-port-tc Q_RSRV watermarks */ +int ocelot_sb_occ_tc_port_bind_get(struct ocelot *ocelot, int port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max) +{ + int wm_index; + + switch (sb_index) { + case OCELOT_SB_BUF: + if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS) + wm_index = BUF_Q_RSRV_I(port, tc_index); + else + wm_index = BUF_Q_RSRV_E(port, tc_index); + break; + case OCELOT_SB_REF: + if (pool_type == DEVLINK_SB_POOL_TYPE_INGRESS) + wm_index = REF_Q_RSRV_I(port, tc_index); + else + wm_index = REF_Q_RSRV_E(port, tc_index); + break; + default: + return -ENODEV; + } + + ocelot_wm_status(ocelot, wm_index, p_cur, p_max); + *p_cur *= ocelot_sb_pool[sb_index].cell_size; + *p_max *= ocelot_sb_pool[sb_index].cell_size; + + return 0; +} +EXPORT_SYMBOL(ocelot_sb_occ_tc_port_bind_get); + +int ocelot_devlink_sb_register(struct ocelot *ocelot) +{ + int err; + + err = devlink_sb_register(ocelot->devlink, OCELOT_SB_BUF, + ocelot->packet_buffer_size, 1, 1, + OCELOT_NUM_TC, OCELOT_NUM_TC); + if (err) + return err; + + err = devlink_sb_register(ocelot->devlink, OCELOT_SB_REF, + ocelot->num_frame_refs, 1, 1, + OCELOT_NUM_TC, OCELOT_NUM_TC); + if (err) { + devlink_sb_unregister(ocelot->devlink, OCELOT_SB_BUF); + return err; + } + + ocelot->pool_size[OCELOT_SB_BUF][OCELOT_SB_POOL_ING] = ocelot->packet_buffer_size; + ocelot->pool_size[OCELOT_SB_BUF][OCELOT_SB_POOL_EGR] = ocelot->packet_buffer_size; + ocelot->pool_size[OCELOT_SB_REF][OCELOT_SB_POOL_ING] = ocelot->num_frame_refs; + ocelot->pool_size[OCELOT_SB_REF][OCELOT_SB_POOL_EGR] = ocelot->num_frame_refs; + + ocelot_watermark_init(ocelot); + + return 0; +} +EXPORT_SYMBOL(ocelot_devlink_sb_register); + +void ocelot_devlink_sb_unregister(struct ocelot *ocelot) +{ + devlink_sb_unregister(ocelot->devlink, OCELOT_SB_BUF); + devlink_sb_unregister(ocelot->devlink, OCELOT_SB_REF); +} +EXPORT_SYMBOL(ocelot_devlink_sb_unregister); diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 4485faefc2b16..a520fd485912d 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -1,14 +1,154 @@ // SPDX-License-Identifier: (GPL-2.0 OR MIT) /* Microsemi Ocelot Switch driver + * + * This contains glue logic between the switchdev driver operations and the + * mscc_ocelot_switch_lib. * * Copyright (c) 2017, 2019 Microsemi Corporation + * Copyright 2020-2021 NXP Semiconductors */ #include #include "ocelot.h" #include "ocelot_vcap.h" +static struct ocelot *devlink_port_to_ocelot(struct devlink_port *dlp) +{ + return devlink_priv(dlp->devlink); +} + +static int devlink_port_to_port(struct devlink_port *dlp) +{ + struct ocelot *ocelot = devlink_port_to_ocelot(dlp); + + return dlp - ocelot->devlink_ports; +} + +static int ocelot_devlink_sb_pool_get(struct devlink *dl, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + struct ocelot *ocelot = devlink_priv(dl); + + return ocelot_sb_pool_get(ocelot, sb_index, pool_index, pool_info); +} + +static int ocelot_devlink_sb_pool_set(struct devlink *dl, unsigned int sb_index, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack) +{ + struct ocelot *ocelot = devlink_priv(dl); + + return ocelot_sb_pool_set(ocelot, sb_index, pool_index, size, + threshold_type, extack); +} + +static int ocelot_devlink_sb_port_pool_get(struct devlink_port *dlp, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold) +{ + struct ocelot *ocelot = devlink_port_to_ocelot(dlp); + int port = devlink_port_to_port(dlp); + + return ocelot_sb_port_pool_get(ocelot, port, sb_index, pool_index, + p_threshold); +} + +static int ocelot_devlink_sb_port_pool_set(struct devlink_port *dlp, + unsigned int sb_index, u16 pool_index, + u32 threshold, + struct netlink_ext_ack *extack) +{ + struct ocelot *ocelot = devlink_port_to_ocelot(dlp); + int port = devlink_port_to_port(dlp); + + return ocelot_sb_port_pool_set(ocelot, port, sb_index, pool_index, + threshold, extack); +} + +static int +ocelot_devlink_sb_tc_pool_bind_get(struct devlink_port *dlp, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold) +{ + struct ocelot *ocelot = devlink_port_to_ocelot(dlp); + int port = devlink_port_to_port(dlp); + + return ocelot_sb_tc_pool_bind_get(ocelot, port, sb_index, tc_index, + pool_type, p_pool_index, + p_threshold); +} + +static int +ocelot_devlink_sb_tc_pool_bind_set(struct devlink_port *dlp, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack) +{ + struct ocelot *ocelot = devlink_port_to_ocelot(dlp); + int port = devlink_port_to_port(dlp); + + return ocelot_sb_tc_pool_bind_set(ocelot, port, sb_index, tc_index, + pool_type, pool_index, threshold, + extack); +} + +static int ocelot_devlink_sb_occ_snapshot(struct devlink *dl, + unsigned int sb_index) +{ + struct ocelot *ocelot = devlink_priv(dl); + + return ocelot_sb_occ_snapshot(ocelot, sb_index); +} + +static int ocelot_devlink_sb_occ_max_clear(struct devlink *dl, + unsigned int sb_index) +{ + struct ocelot *ocelot = devlink_priv(dl); + + return ocelot_sb_occ_max_clear(ocelot, sb_index); +} + +static int ocelot_devlink_sb_occ_port_pool_get(struct devlink_port *dlp, + unsigned int sb_index, + u16 pool_index, u32 *p_cur, + u32 *p_max) +{ + struct ocelot *ocelot = devlink_port_to_ocelot(dlp); + int port = devlink_port_to_port(dlp); + + return ocelot_sb_occ_port_pool_get(ocelot, port, sb_index, pool_index, + p_cur, p_max); +} + +static int +ocelot_devlink_sb_occ_tc_port_bind_get(struct devlink_port *dlp, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max) +{ + struct ocelot *ocelot = devlink_port_to_ocelot(dlp); + int port = devlink_port_to_port(dlp); + + return ocelot_sb_occ_tc_port_bind_get(ocelot, port, sb_index, + tc_index, pool_type, + p_cur, p_max); +} + const struct devlink_ops ocelot_devlink_ops = { + .sb_pool_get = ocelot_devlink_sb_pool_get, + .sb_pool_set = ocelot_devlink_sb_pool_set, + .sb_port_pool_get = ocelot_devlink_sb_port_pool_get, + .sb_port_pool_set = ocelot_devlink_sb_port_pool_set, + .sb_tc_pool_bind_get = ocelot_devlink_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = ocelot_devlink_sb_tc_pool_bind_set, + .sb_occ_snapshot = ocelot_devlink_sb_occ_snapshot, + .sb_occ_max_clear = ocelot_devlink_sb_occ_max_clear, + .sb_occ_port_pool_get = ocelot_devlink_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = ocelot_devlink_sb_occ_tc_port_bind_get, }; int ocelot_port_devlink_init(struct ocelot *ocelot, int port, diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 28617023cd85c..30a38df08a219 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -1363,6 +1363,10 @@ static int mscc_ocelot_probe(struct platform_device *pdev) if (err) goto out_ocelot_devlink_unregister; + err = ocelot_devlink_sb_register(ocelot); + if (err) + goto out_ocelot_release_ports; + if (ocelot->ptp) { err = ocelot_init_timestamp(ocelot, &ocelot_ptp_clock_info); if (err) { @@ -1382,6 +1386,9 @@ static int mscc_ocelot_probe(struct platform_device *pdev) return 0; +out_ocelot_release_ports: + mscc_ocelot_release_ports(ocelot); + mscc_ocelot_teardown_devlink_ports(ocelot); out_ocelot_devlink_unregister: devlink_unregister(devlink); out_ocelot_deinit: @@ -1398,6 +1405,7 @@ static int mscc_ocelot_remove(struct platform_device *pdev) struct ocelot *ocelot = platform_get_drvdata(pdev); ocelot_deinit_timestamp(ocelot); + ocelot_devlink_sb_unregister(ocelot); mscc_ocelot_release_ports(ocelot); mscc_ocelot_teardown_devlink_ports(ocelot); devlink_unregister(ocelot->devlink); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index fc7dc66797399..cdc33fa05660b 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -579,6 +579,18 @@ struct ocelot_vlan { u16 vid; }; +enum ocelot_sb { + OCELOT_SB_BUF, + OCELOT_SB_REF, + OCELOT_SB_NUM, +}; + +enum ocelot_sb_pool { + OCELOT_SB_POOL_ING, + OCELOT_SB_POOL_EGR, + OCELOT_SB_POOL_NUM, +}; + struct ocelot_port { struct ocelot *ocelot; @@ -612,6 +624,7 @@ struct ocelot { const struct ocelot_stat_layout *stats_layout; unsigned int num_stats; + u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM]; int packet_buffer_size; int num_frame_refs; int num_mact_rows; @@ -783,4 +796,38 @@ int ocelot_port_mdb_add(struct ocelot *ocelot, int port, int ocelot_port_mdb_del(struct ocelot *ocelot, int port, const struct switchdev_obj_port_mdb *mdb); +int ocelot_devlink_sb_register(struct ocelot *ocelot); +void ocelot_devlink_sb_unregister(struct ocelot *ocelot); +int ocelot_sb_pool_get(struct ocelot *ocelot, unsigned int sb_index, + u16 pool_index, + struct devlink_sb_pool_info *pool_info); +int ocelot_sb_pool_set(struct ocelot *ocelot, unsigned int sb_index, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type, + struct netlink_ext_ack *extack); +int ocelot_sb_port_pool_get(struct ocelot *ocelot, int port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold); +int ocelot_sb_port_pool_set(struct ocelot *ocelot, int port, + unsigned int sb_index, u16 pool_index, + u32 threshold, struct netlink_ext_ack *extack); +int ocelot_sb_tc_pool_bind_get(struct ocelot *ocelot, int port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold); +int ocelot_sb_tc_pool_bind_set(struct ocelot *ocelot, int port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold, + struct netlink_ext_ack *extack); +int ocelot_sb_occ_snapshot(struct ocelot *ocelot, unsigned int sb_index); +int ocelot_sb_occ_max_clear(struct ocelot *ocelot, unsigned int sb_index); +int ocelot_sb_occ_port_pool_get(struct ocelot *ocelot, int port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max); +int ocelot_sb_occ_tc_port_bind_get(struct ocelot *ocelot, int port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max); + #endif -- GitLab From 32d91b4af35312cf559a45a07aad1f3bde996dfb Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 15 Jan 2021 12:14:53 +0800 Subject: [PATCH 0939/2012] nfc: netlink: use &w->w in nfc_genl_rcv_nl_event Use the struct member w of the struct urelease_work directly instead of casting it. Signed-off-by: Geliang Tang Link: https://lore.kernel.org/r/f0ed86d6d54ac0834bd2e161d172bf7bb5647cf7.1610683862.git.geliangtang@gmail.com Signed-off-by: Jakub Kicinski --- net/nfc/netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 573b38ad2f8ed..640906359c22b 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1819,9 +1819,9 @@ static int nfc_genl_rcv_nl_event(struct notifier_block *this, w = kmalloc(sizeof(*w), GFP_ATOMIC); if (w) { - INIT_WORK((struct work_struct *) w, nfc_urelease_event_work); + INIT_WORK(&w->w, nfc_urelease_event_work); w->portid = n->portid; - schedule_work((struct work_struct *) w); + schedule_work(&w->w); } out: -- GitLab From b69df2608281b71575fbb3b9f426dbcc4be8a700 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Thu, 14 Jan 2021 18:32:38 -0800 Subject: [PATCH 0940/2012] net: tap: check vlan with eth_type_vlan() method Replace some checks for ETH_P_8021Q and ETH_P_8021AD in drivers/net/tap.c with eth_type_vlan. Signed-off-by: Menglong Dong Link: https://lore.kernel.org/r/20210115023238.4681-1-dong.menglong@zte.com.cn Signed-off-by: Jakub Kicinski --- drivers/net/tap.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 3c652c8ac5ba7..ff4aa35979a19 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -713,8 +713,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, skb_probe_transport_header(skb); /* Move network header to the right position for VLAN tagged packets */ - if ((skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) && + if (eth_type_vlan(skb->protocol) && __vlan_get_protocol(skb, skb->protocol, &depth) != 0) skb_set_network_header(skb, depth); @@ -1164,8 +1163,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) } /* Move network header to the right position for VLAN tagged packets */ - if ((skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) && + if (eth_type_vlan(skb->protocol) && __vlan_get_protocol(skb, skb->protocol, &depth) != 0) skb_set_network_header(skb, depth); -- GitLab From f4d133d86af7f39a0f5bdaf7a888ec7b84733b5e Mon Sep 17 00:00:00 2001 From: Yejune Deng Date: Thu, 14 Jan 2021 12:14:56 +0800 Subject: [PATCH 0941/2012] tcp_cubic: use memset and offsetof init In bictcp_reset(), use memset and offsetof instead of = 0. Signed-off-by: Yejune Deng Link: https://lore.kernel.org/r/1610597696-128610-1-git-send-email-yejune.deng@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_cubic.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index c7bf5b26bf0c2..ffcbe46dacdb8 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -104,16 +104,7 @@ struct bictcp { static inline void bictcp_reset(struct bictcp *ca) { - ca->cnt = 0; - ca->last_max_cwnd = 0; - ca->last_cwnd = 0; - ca->last_time = 0; - ca->bic_origin_point = 0; - ca->bic_K = 0; - ca->delay_min = 0; - ca->epoch_start = 0; - ca->ack_cnt = 0; - ca->tcp_cwnd = 0; + memset(ca, 0, offsetof(struct bictcp, unused)); ca->found = 0; } -- GitLab From 9ab7e76aefc97a9aa664accb59d6e8dc5e52514a Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Sat, 9 Jan 2021 23:00:21 -0800 Subject: [PATCH 0942/2012] GTP: add support for flow based tunneling API Following patch add support for flow based tunneling API to send and recv GTP tunnel packet over tunnel metadata API. This would allow this device integration with OVS or eBPF using flow based tunneling APIs. Signed-off-by: Pravin B Shelar Link: https://lore.kernel.org/r/20210110070021.26822-1-pbshelar@fb.com Signed-off-by: Jakub Kicinski --- drivers/net/gtp.c | 527 +++++++++++++++++++++-------- include/uapi/linux/gtp.h | 12 + include/uapi/linux/if_link.h | 1 + include/uapi/linux/if_tunnel.h | 1 + tools/include/uapi/linux/if_link.h | 1 + 5 files changed, 398 insertions(+), 144 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 4c04e271f1844..851364314ecc4 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -73,6 +74,9 @@ struct gtp_dev { unsigned int hash_size; struct hlist_head *tid_hash; struct hlist_head *addr_hash; + /* Used by LWT tunnel. */ + bool collect_md; + struct socket *collect_md_sock; }; static unsigned int gtp_net_id __read_mostly; @@ -179,33 +183,121 @@ static bool gtp_check_ms(struct sk_buff *skb, struct pdp_ctx *pctx, return false; } -static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb, - unsigned int hdrlen, unsigned int role) +static int gtp_set_tun_dst(struct gtp_dev *gtp, struct sk_buff *skb, + unsigned int hdrlen, u8 gtp_version, + __be64 tid, u8 flags) { - if (!gtp_check_ms(skb, pctx, hdrlen, role)) { - netdev_dbg(pctx->dev, "No PDP ctx for this MS\n"); - return 1; + struct metadata_dst *tun_dst; + int opts_len = 0; + + if (unlikely(flags & GTP1_F_MASK)) + opts_len = sizeof(struct gtpu_metadata); + + tun_dst = udp_tun_rx_dst(skb, gtp->sk1u->sk_family, TUNNEL_KEY, tid, opts_len); + if (!tun_dst) { + netdev_dbg(gtp->dev, "Failed to allocate tun_dst"); + goto err; } + netdev_dbg(gtp->dev, "attaching metadata_dst to skb, gtp ver %d hdrlen %d\n", + gtp_version, hdrlen); + if (unlikely(opts_len)) { + struct gtpu_metadata *opts; + struct gtp1_header *gtp1; + + opts = ip_tunnel_info_opts(&tun_dst->u.tun_info); + gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr)); + opts->ver = GTP_METADATA_V1; + opts->flags = gtp1->flags; + opts->type = gtp1->type; + netdev_dbg(gtp->dev, "recved control pkt: flag %x type: %d\n", + opts->flags, opts->type); + tun_dst->u.tun_info.key.tun_flags |= TUNNEL_GTPU_OPT; + tun_dst->u.tun_info.options_len = opts_len; + skb->protocol = htons(0xffff); /* Unknown */ + } /* Get rid of the GTP + UDP headers. */ if (iptunnel_pull_header(skb, hdrlen, skb->protocol, - !net_eq(sock_net(pctx->sk), dev_net(pctx->dev)))) - return -1; + !net_eq(sock_net(gtp->sk1u), dev_net(gtp->dev)))) { + gtp->dev->stats.rx_length_errors++; + goto err; + } + + skb_dst_set(skb, &tun_dst->dst); + return 0; +err: + return -1; +} + +static int gtp_rx(struct gtp_dev *gtp, struct sk_buff *skb, + unsigned int hdrlen, u8 gtp_version, unsigned int role, + __be64 tid, u8 flags, u8 type) +{ + if (ip_tunnel_collect_metadata() || gtp->collect_md) { + int err; + + err = gtp_set_tun_dst(gtp, skb, hdrlen, gtp_version, tid, flags); + if (err) + goto err; + } else { + struct pdp_ctx *pctx; + + if (flags & GTP1_F_MASK) + hdrlen += 4; - netdev_dbg(pctx->dev, "forwarding packet from GGSN to uplink\n"); + if (type != GTP_TPDU) + return 1; + + if (gtp_version == GTP_V0) + pctx = gtp0_pdp_find(gtp, be64_to_cpu(tid)); + else + pctx = gtp1_pdp_find(gtp, be64_to_cpu(tid)); + if (!pctx) { + netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb); + return 1; + } + + if (!gtp_check_ms(skb, pctx, hdrlen, role)) { + netdev_dbg(pctx->dev, "No PDP ctx for this MS\n"); + return 1; + } + /* Get rid of the GTP + UDP headers. */ + if (iptunnel_pull_header(skb, hdrlen, skb->protocol, + !net_eq(sock_net(pctx->sk), dev_net(gtp->dev)))) { + gtp->dev->stats.rx_length_errors++; + goto err; + } + } + netdev_dbg(gtp->dev, "forwarding packet from GGSN to uplink\n"); /* Now that the UDP and the GTP header have been removed, set up the * new network header. This is required by the upper layer to * calculate the transport header. */ skb_reset_network_header(skb); + if (pskb_may_pull(skb, sizeof(struct iphdr))) { + struct iphdr *iph; + + iph = ip_hdr(skb); + if (iph->version == 4) { + netdev_dbg(gtp->dev, "inner pkt: ipv4"); + skb->protocol = htons(ETH_P_IP); + } else if (iph->version == 6) { + netdev_dbg(gtp->dev, "inner pkt: ipv6"); + skb->protocol = htons(ETH_P_IPV6); + } else { + netdev_dbg(gtp->dev, "inner pkt error: Unknown type"); + } + } - skb->dev = pctx->dev; - - dev_sw_netstats_rx_add(pctx->dev, skb->len); - + skb->dev = gtp->dev; + dev_sw_netstats_rx_add(gtp->dev, skb->len); netif_rx(skb); return 0; + +err: + gtp->dev->stats.rx_dropped++; + return -1; } /* 1 means pass up to the stack, -1 means drop and 0 means decapsulated. */ @@ -214,7 +306,6 @@ static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb) unsigned int hdrlen = sizeof(struct udphdr) + sizeof(struct gtp0_header); struct gtp0_header *gtp0; - struct pdp_ctx *pctx; if (!pskb_may_pull(skb, hdrlen)) return -1; @@ -224,16 +315,7 @@ static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb) if ((gtp0->flags >> 5) != GTP_V0) return 1; - if (gtp0->type != GTP_TPDU) - return 1; - - pctx = gtp0_pdp_find(gtp, be64_to_cpu(gtp0->tid)); - if (!pctx) { - netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb); - return 1; - } - - return gtp_rx(pctx, skb, hdrlen, gtp->role); + return gtp_rx(gtp, skb, hdrlen, GTP_V0, gtp->role, gtp0->tid, gtp0->flags, gtp0->type); } static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb) @@ -241,41 +323,30 @@ static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb) unsigned int hdrlen = sizeof(struct udphdr) + sizeof(struct gtp1_header); struct gtp1_header *gtp1; - struct pdp_ctx *pctx; if (!pskb_may_pull(skb, hdrlen)) return -1; gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr)); + netdev_dbg(gtp->dev, "GTPv1 recv: flags %x\n", gtp1->flags); if ((gtp1->flags >> 5) != GTP_V1) return 1; - if (gtp1->type != GTP_TPDU) - return 1; - /* From 29.060: "This field shall be present if and only if any one or * more of the S, PN and E flags are set.". * * If any of the bit is set, then the remaining ones also have to be * set. */ - if (gtp1->flags & GTP1_F_MASK) - hdrlen += 4; - /* Make sure the header is larger enough, including extensions. */ if (!pskb_may_pull(skb, hdrlen)) return -1; gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr)); - pctx = gtp1_pdp_find(gtp, ntohl(gtp1->tid)); - if (!pctx) { - netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb); - return 1; - } - - return gtp_rx(pctx, skb, hdrlen, gtp->role); + return gtp_rx(gtp, skb, hdrlen, GTP_V1, gtp->role, + key32_to_tunnel_id(gtp1->tid), gtp1->flags, gtp1->type); } static void __gtp_encap_destroy(struct sock *sk) @@ -315,6 +386,11 @@ static void gtp_encap_disable(struct gtp_dev *gtp) { gtp_encap_disable_sock(gtp->sk0); gtp_encap_disable_sock(gtp->sk1u); + if (gtp->collect_md_sock) { + udp_tunnel_sock_release(gtp->collect_md_sock); + gtp->collect_md_sock = NULL; + netdev_dbg(gtp->dev, "GTP socket released.\n"); + } } /* UDP encapsulation receive handler. See net/ipv4/udp.c. @@ -329,7 +405,8 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb) if (!gtp) return 1; - netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk); + netdev_dbg(gtp->dev, "encap_recv sk=%p type %d\n", + sk, udp_sk(sk)->encap_type); switch (udp_sk(sk)->encap_type) { case UDP_ENCAP_GTP0: @@ -383,12 +460,13 @@ static void gtp_dev_uninit(struct net_device *dev) static struct rtable *ip4_route_output_gtp(struct flowi4 *fl4, const struct sock *sk, - __be32 daddr) + __be32 daddr, + __be32 saddr) { memset(fl4, 0, sizeof(*fl4)); fl4->flowi4_oif = sk->sk_bound_dev_if; fl4->daddr = daddr; - fl4->saddr = inet_sk(sk)->inet_saddr; + fl4->saddr = saddr; fl4->flowi4_tos = RT_CONN_FLAGS(sk); fl4->flowi4_proto = sk->sk_protocol; @@ -412,7 +490,7 @@ static inline void gtp0_push_header(struct sk_buff *skb, struct pdp_ctx *pctx) gtp0->tid = cpu_to_be64(pctx->u.v0.tid); } -static inline void gtp1_push_header(struct sk_buff *skb, struct pdp_ctx *pctx) +static inline void gtp1_push_header(struct sk_buff *skb, __be32 tid) { int payload_len = skb->len; struct gtp1_header *gtp1; @@ -428,46 +506,63 @@ static inline void gtp1_push_header(struct sk_buff *skb, struct pdp_ctx *pctx) gtp1->flags = 0x30; /* v1, GTP-non-prime. */ gtp1->type = GTP_TPDU; gtp1->length = htons(payload_len); - gtp1->tid = htonl(pctx->u.v1.o_tei); + gtp1->tid = tid; /* TODO: Suppport for extension header, sequence number and N-PDU. * Update the length field if any of them is available. */ } -struct gtp_pktinfo { - struct sock *sk; - struct iphdr *iph; - struct flowi4 fl4; - struct rtable *rt; - struct pdp_ctx *pctx; - struct net_device *dev; - __be16 gtph_port; -}; - -static void gtp_push_header(struct sk_buff *skb, struct gtp_pktinfo *pktinfo) +static inline int gtp1_push_control_header(struct sk_buff *skb, + __be32 tid, + struct gtpu_metadata *opts, + struct net_device *dev) { - switch (pktinfo->pctx->gtp_version) { - case GTP_V0: - pktinfo->gtph_port = htons(GTP0_PORT); - gtp0_push_header(skb, pktinfo->pctx); - break; - case GTP_V1: - pktinfo->gtph_port = htons(GTP1U_PORT); - gtp1_push_header(skb, pktinfo->pctx); - break; + struct gtp1_header *gtp1c; + int payload_len; + + if (opts->ver != GTP_METADATA_V1) + return -ENOENT; + + if (opts->type == 0xFE) { + /* for end marker ignore skb data. */ + netdev_dbg(dev, "xmit pkt with null data"); + pskb_trim(skb, 0); } + if (skb_cow_head(skb, sizeof(*gtp1c)) < 0) + return -ENOMEM; + + payload_len = skb->len; + + gtp1c = skb_push(skb, sizeof(*gtp1c)); + + gtp1c->flags = opts->flags; + gtp1c->type = opts->type; + gtp1c->length = htons(payload_len); + gtp1c->tid = tid; + netdev_dbg(dev, "xmit control pkt: ver %d flags %x type %x pkt len %d tid %x", + opts->ver, opts->flags, opts->type, skb->len, tid); + return 0; } +struct gtp_pktinfo { + struct sock *sk; + __u8 tos; + struct flowi4 fl4; + struct rtable *rt; + struct net_device *dev; + __be16 gtph_port; +}; + static inline void gtp_set_pktinfo_ipv4(struct gtp_pktinfo *pktinfo, - struct sock *sk, struct iphdr *iph, - struct pdp_ctx *pctx, struct rtable *rt, + struct sock *sk, + __u8 tos, + struct rtable *rt, struct flowi4 *fl4, struct net_device *dev) { pktinfo->sk = sk; - pktinfo->iph = iph; - pktinfo->pctx = pctx; + pktinfo->tos = tos; pktinfo->rt = rt; pktinfo->fl4 = *fl4; pktinfo->dev = dev; @@ -477,40 +572,99 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, struct gtp_pktinfo *pktinfo) { struct gtp_dev *gtp = netdev_priv(dev); + struct gtpu_metadata *opts = NULL; + struct sock *sk = NULL; struct pdp_ctx *pctx; struct rtable *rt; struct flowi4 fl4; - struct iphdr *iph; - __be16 df; + u8 gtp_version; + __be16 df = 0; + __be32 tun_id; + __be32 daddr; + __be32 saddr; + __u8 tos; int mtu; - /* Read the IP destination address and resolve the PDP context. - * Prepend PDP header with TEI/TID from PDP ctx. - */ - iph = ip_hdr(skb); - if (gtp->role == GTP_ROLE_SGSN) - pctx = ipv4_pdp_find(gtp, iph->saddr); - else - pctx = ipv4_pdp_find(gtp, iph->daddr); + if (gtp->collect_md) { + /* LWT GTP1U encap */ + struct ip_tunnel_info *info = NULL; - if (!pctx) { - netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n", - &iph->daddr); - return -ENOENT; + info = skb_tunnel_info(skb); + if (!info) { + netdev_dbg(dev, "missing tunnel info"); + return -ENOENT; + } + if (info->key.tp_dst && ntohs(info->key.tp_dst) != GTP1U_PORT) { + netdev_dbg(dev, "unexpected GTP dst port: %d", ntohs(info->key.tp_dst)); + return -EOPNOTSUPP; + } + pctx = NULL; + gtp_version = GTP_V1; + tun_id = tunnel_id_to_key32(info->key.tun_id); + daddr = info->key.u.ipv4.dst; + saddr = info->key.u.ipv4.src; + sk = gtp->sk1u; + if (!sk) { + netdev_dbg(dev, "missing tunnel sock"); + return -EOPNOTSUPP; + } + tos = info->key.tos; + if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) + df = htons(IP_DF); + + if (info->options_len != 0) { + if (info->key.tun_flags & TUNNEL_GTPU_OPT) { + opts = ip_tunnel_info_opts(info); + } else { + netdev_dbg(dev, "missing tunnel metadata for control pkt"); + return -EOPNOTSUPP; + } + } + netdev_dbg(dev, "flow-based GTP1U encap: tunnel id %d\n", + be32_to_cpu(tun_id)); + } else { + struct iphdr *iph; + + if (ntohs(skb->protocol) != ETH_P_IP) + return -EOPNOTSUPP; + + iph = ip_hdr(skb); + + /* Read the IP destination address and resolve the PDP context. + * Prepend PDP header with TEI/TID from PDP ctx. + */ + if (gtp->role == GTP_ROLE_SGSN) + pctx = ipv4_pdp_find(gtp, iph->saddr); + else + pctx = ipv4_pdp_find(gtp, iph->daddr); + + if (!pctx) { + netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n", + &iph->daddr); + return -ENOENT; + } + sk = pctx->sk; + netdev_dbg(dev, "found PDP context %p\n", pctx); + + gtp_version = pctx->gtp_version; + tun_id = htonl(pctx->u.v1.o_tei); + daddr = pctx->peer_addr_ip4.s_addr; + saddr = inet_sk(sk)->inet_saddr; + tos = iph->tos; + df = iph->frag_off; + netdev_dbg(dev, "gtp -> IP src: %pI4 dst: %pI4\n", + &iph->saddr, &iph->daddr); } - netdev_dbg(dev, "found PDP context %p\n", pctx); - rt = ip4_route_output_gtp(&fl4, pctx->sk, pctx->peer_addr_ip4.s_addr); + rt = ip4_route_output_gtp(&fl4, sk, daddr, saddr); if (IS_ERR(rt)) { - netdev_dbg(dev, "no route to SSGN %pI4\n", - &pctx->peer_addr_ip4.s_addr); + netdev_dbg(dev, "no route to SSGN %pI4\n", &daddr); dev->stats.tx_carrier_errors++; goto err; } if (rt->dst.dev == dev) { - netdev_dbg(dev, "circular route to SSGN %pI4\n", - &pctx->peer_addr_ip4.s_addr); + netdev_dbg(dev, "circular route to SSGN %pI4\n", &daddr); dev->stats.collisions++; goto err_rt; } @@ -518,11 +672,10 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, skb_dst_drop(skb); /* This is similar to tnl_update_pmtu(). */ - df = iph->frag_off; if (df) { mtu = dst_mtu(&rt->dst) - dev->hard_header_len - sizeof(struct iphdr) - sizeof(struct udphdr); - switch (pctx->gtp_version) { + switch (gtp_version) { case GTP_V0: mtu -= sizeof(struct gtp0_header); break; @@ -536,17 +689,38 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false); - if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && - mtu < ntohs(iph->tot_len)) { - netdev_dbg(dev, "packet too big, fragmentation needed\n"); + if (!skb_is_gso(skb) && (df & htons(IP_DF)) && mtu < skb->len) { + netdev_dbg(dev, "packet too big, fragmentation needed"); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); goto err_rt; } - gtp_set_pktinfo_ipv4(pktinfo, pctx->sk, iph, pctx, rt, &fl4, dev); - gtp_push_header(skb, pktinfo); + gtp_set_pktinfo_ipv4(pktinfo, sk, tos, rt, &fl4, dev); + + if (unlikely(opts)) { + int err; + + pktinfo->gtph_port = htons(GTP1U_PORT); + err = gtp1_push_control_header(skb, tun_id, opts, dev); + if (err) { + netdev_info(dev, "cntr pkt error %d", err); + goto err_rt; + } + return 0; + } + + switch (gtp_version) { + case GTP_V0: + pktinfo->gtph_port = htons(GTP0_PORT); + gtp0_push_header(skb, pctx); + break; + case GTP_V1: + pktinfo->gtph_port = htons(GTP1U_PORT); + gtp1_push_header(skb, tun_id); + break; + } return 0; err_rt: @@ -557,7 +731,6 @@ err: static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) { - unsigned int proto = ntohs(skb->protocol); struct gtp_pktinfo pktinfo; int err; @@ -569,32 +742,22 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) /* PDP context lookups in gtp_build_skb_*() need rcu read-side lock. */ rcu_read_lock(); - switch (proto) { - case ETH_P_IP: - err = gtp_build_skb_ip4(skb, dev, &pktinfo); - break; - default: - err = -EOPNOTSUPP; - break; - } + err = gtp_build_skb_ip4(skb, dev, &pktinfo); rcu_read_unlock(); if (err < 0) goto tx_err; - switch (proto) { - case ETH_P_IP: - netdev_dbg(pktinfo.dev, "gtp -> IP src: %pI4 dst: %pI4\n", - &pktinfo.iph->saddr, &pktinfo.iph->daddr); - udp_tunnel_xmit_skb(pktinfo.rt, pktinfo.sk, skb, - pktinfo.fl4.saddr, pktinfo.fl4.daddr, - pktinfo.iph->tos, - ip4_dst_hoplimit(&pktinfo.rt->dst), - 0, - pktinfo.gtph_port, pktinfo.gtph_port, - true, false); - break; - } + udp_tunnel_xmit_skb(pktinfo.rt, pktinfo.sk, skb, + pktinfo.fl4.saddr, + pktinfo.fl4.daddr, + pktinfo.tos, + ip4_dst_hoplimit(&pktinfo.rt->dst), + 0, + pktinfo.gtph_port, + pktinfo.gtph_port, + true, + false); return NETDEV_TX_OK; tx_err: @@ -610,6 +773,19 @@ static const struct net_device_ops gtp_netdev_ops = { .ndo_get_stats64 = dev_get_tstats64, }; +static struct gtp_dev *gtp_find_flow_based_dev(struct net *net) +{ + struct gtp_net *gn = net_generic(net, gtp_net_id); + struct gtp_dev *gtp; + + list_for_each_entry(gtp, &gn->gtp_dev_list, list) { + if (gtp->collect_md) + return gtp; + } + + return NULL; +} + static void gtp_link_setup(struct net_device *dev) { dev->netdev_ops = >p_netdev_ops; @@ -634,7 +810,7 @@ static void gtp_link_setup(struct net_device *dev) } static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize); -static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]); +static int gtp_encap_enable(struct gtp_dev *gtp, struct net_device *dev, struct nlattr *data[]); static void gtp_destructor(struct net_device *dev) { @@ -652,11 +828,24 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, struct gtp_net *gn; int hashsize, err; - if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1]) + if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1] && + !data[IFLA_GTP_COLLECT_METADATA]) return -EINVAL; gtp = netdev_priv(dev); + if (data[IFLA_GTP_COLLECT_METADATA]) { + if (data[IFLA_GTP_FD0]) { + netdev_dbg(dev, "LWT device does not support setting v0 socket"); + return -EINVAL; + } + if (gtp_find_flow_based_dev(src_net)) { + netdev_dbg(dev, "LWT device already exist"); + return -EBUSY; + } + gtp->collect_md = true; + } + if (!data[IFLA_GTP_PDP_HASHSIZE]) { hashsize = 1024; } else { @@ -669,7 +858,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, if (err < 0) return err; - err = gtp_encap_enable(gtp, data); + err = gtp_encap_enable(gtp, dev, data); if (err < 0) goto out_hashtable; @@ -683,7 +872,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, list_add_rcu(>p->list, &gn->gtp_dev_list); dev->priv_destructor = gtp_destructor; - netdev_dbg(dev, "registered new GTP interface\n"); + netdev_dbg(dev, "registered new GTP interface %s\n", dev->name); return 0; @@ -714,6 +903,7 @@ static const struct nla_policy gtp_policy[IFLA_GTP_MAX + 1] = { [IFLA_GTP_FD1] = { .type = NLA_U32 }, [IFLA_GTP_PDP_HASHSIZE] = { .type = NLA_U32 }, [IFLA_GTP_ROLE] = { .type = NLA_U32 }, + [IFLA_GTP_COLLECT_METADATA] = { .type = NLA_FLAG }, }; static int gtp_validate(struct nlattr *tb[], struct nlattr *data[], @@ -737,6 +927,9 @@ static int gtp_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u32(skb, IFLA_GTP_PDP_HASHSIZE, gtp->hash_size)) goto nla_put_failure; + if (gtp->collect_md && nla_put_flag(skb, IFLA_GTP_COLLECT_METADATA)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -782,35 +975,24 @@ err1: return -ENOMEM; } -static struct sock *gtp_encap_enable_socket(int fd, int type, - struct gtp_dev *gtp) +static int __gtp_encap_enable_socket(struct socket *sock, int type, + struct gtp_dev *gtp) { struct udp_tunnel_sock_cfg tuncfg = {NULL}; - struct socket *sock; struct sock *sk; - int err; - - pr_debug("enable gtp on %d, %d\n", fd, type); - - sock = sockfd_lookup(fd, &err); - if (!sock) { - pr_debug("gtp socket fd=%d not found\n", fd); - return NULL; - } sk = sock->sk; if (sk->sk_protocol != IPPROTO_UDP || sk->sk_type != SOCK_DGRAM || (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)) { - pr_debug("socket fd=%d not UDP\n", fd); - sk = ERR_PTR(-EINVAL); - goto out_sock; + pr_debug("socket not UDP\n"); + return -EINVAL; } lock_sock(sk); if (sk->sk_user_data) { - sk = ERR_PTR(-EBUSY); - goto out_rel_sock; + release_sock(sock->sk); + return -EBUSY; } sock_hold(sk); @@ -821,15 +1003,58 @@ static struct sock *gtp_encap_enable_socket(int fd, int type, tuncfg.encap_destroy = gtp_encap_destroy; setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg); - -out_rel_sock: release_sock(sock->sk); -out_sock: + return 0; +} + +static struct sock *gtp_encap_enable_socket(int fd, int type, + struct gtp_dev *gtp) +{ + struct socket *sock; + int err; + + pr_debug("enable gtp on %d, %d\n", fd, type); + + sock = sockfd_lookup(fd, &err); + if (!sock) { + pr_debug("gtp socket fd=%d not found\n", fd); + return NULL; + } + err = __gtp_encap_enable_socket(sock, type, gtp); sockfd_put(sock); - return sk; + if (err) + return ERR_PTR(err); + + return sock->sk; } -static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]) +static struct socket *gtp_create_gtp_socket(struct gtp_dev *gtp, struct net_device *dev) +{ + struct udp_port_cfg udp_conf; + struct socket *sock; + int err; + + memset(&udp_conf, 0, sizeof(udp_conf)); + udp_conf.family = AF_INET; + udp_conf.local_ip.s_addr = htonl(INADDR_ANY); + udp_conf.local_udp_port = htons(GTP1U_PORT); + + err = udp_sock_create(dev_net(dev), &udp_conf, &sock); + if (err < 0) { + pr_debug("create gtp sock failed: %d\n", err); + return ERR_PTR(err); + } + err = __gtp_encap_enable_socket(sock, UDP_ENCAP_GTP1U, gtp); + if (err) { + pr_debug("enable gtp sock encap failed: %d\n", err); + udp_tunnel_sock_release(sock); + return ERR_PTR(err); + } + pr_debug("create gtp sock done\n"); + return sock; +} + +static int gtp_encap_enable(struct gtp_dev *gtp, struct net_device *dev, struct nlattr *data[]) { struct sock *sk1u = NULL; struct sock *sk0 = NULL; @@ -853,11 +1078,25 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]) } } + if (data[IFLA_GTP_COLLECT_METADATA]) { + struct socket *sock; + + if (!sk1u) { + sock = gtp_create_gtp_socket(gtp, dev); + if (IS_ERR(sock)) + return PTR_ERR(sock); + + gtp->collect_md_sock = sock; + sk1u = sock->sk; + } else { + gtp->collect_md_sock = NULL; + } + } + if (data[IFLA_GTP_ROLE]) { role = nla_get_u32(data[IFLA_GTP_ROLE]); if (role > GTP_ROLE_SGSN) { - gtp_encap_disable_sock(sk0); - gtp_encap_disable_sock(sk1u); + gtp_encap_disable(gtp); return -EINVAL; } } @@ -1416,7 +1655,7 @@ static int __init gtp_init(void) if (err < 0) goto unreg_genl_family; - pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", + pr_info("GTP module loaded (pdp ctx size %zd bytes) with tnl-md support\n", sizeof(struct pdp_ctx)); return 0; diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h index 79f9191bbb24c..62aff78b7c569 100644 --- a/include/uapi/linux/gtp.h +++ b/include/uapi/linux/gtp.h @@ -2,6 +2,8 @@ #ifndef _UAPI_LINUX_GTP_H_ #define _UAPI_LINUX_GTP_H_ +#include + #define GTP_GENL_MCGRP_NAME "gtp" enum gtp_genl_cmds { @@ -34,4 +36,14 @@ enum gtp_attrs { }; #define GTPA_MAX (__GTPA_MAX + 1) +enum { + GTP_METADATA_V1 +}; + +struct gtpu_metadata { + __u8 ver; + __u8 flags; + __u8 type; +}; + #endif /* _UAPI_LINUX_GTP_H_ */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 82708c6db4322..2bd0d8bbcdb25 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -809,6 +809,7 @@ enum { IFLA_GTP_FD1, IFLA_GTP_PDP_HASHSIZE, IFLA_GTP_ROLE, + IFLA_GTP_COLLECT_METADATA, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 7d9105533c7b9..802da679fab1c 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -176,6 +176,7 @@ enum { #define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) #define TUNNEL_NOCACHE __cpu_to_be16(0x2000) #define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000) +#define TUNNEL_GTPU_OPT __cpu_to_be16(0x8000) #define TUNNEL_OPTIONS_PRESENT \ (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT) diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index d208b2af697fd..28d649bda686a 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -617,6 +617,7 @@ enum { IFLA_GTP_FD1, IFLA_GTP_PDP_HASHSIZE, IFLA_GTP_ROLE, + IFLA_GTP_COLLECT_METADATA, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) -- GitLab From 797f0375dd2ef5cdc68ac23450cbae9a5c67a74e Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 11 Jan 2021 15:45:01 -0800 Subject: [PATCH 0943/2012] RISC-V: Do not allocate memblock while iterating reserved memblocks Currently, resource tree allocates memory blocks while iterating on the list. It leads to following kernel warning because memblock allocation also invokes memory block reservation API. [ 0.000000] ------------[ cut here ]------------ [ 0.000000] WARNING: CPU: 0 PID: 0 at kernel/resource.c:795 __insert_resource+0x8e/0xd0 [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-00022-ge20097fb37e2-dirty #549 [ 0.000000] epc: c00125c2 ra : c001262c sp : c1c01f50 [ 0.000000] gp : c1d456e0 tp : c1c0a980 t0 : ffffcf20 [ 0.000000] t1 : 00000000 t2 : 00000000 s0 : c1c01f60 [ 0.000000] s1 : ffffcf00 a0 : ffffff00 a1 : c1c0c0c4 [ 0.000000] a2 : 80c12b15 a3 : 80402000 a4 : 80402000 [ 0.000000] a5 : c1c0c0c4 a6 : 80c12b15 a7 : f5faf600 [ 0.000000] s2 : c1c0c0c4 s3 : c1c0e000 s4 : c1009a80 [ 0.000000] s5 : c1c0c000 s6 : c1d48000 s7 : c1613b4c [ 0.000000] s8 : 00000fff s9 : 80000200 s10: c1613b40 [ 0.000000] s11: 00000000 t3 : c1d4a000 t4 : ffffffff This is also unnecessary as we can pre-compute the total memblocks required for each memory region and allocate it before the loop. It save precious boot time not going through memblock allocation code every time. Fixes: 00ab027a3b82 ("RISC-V: Add kernel image sections to the resource tree") Reviewed-by: Anup Patel Tested-by: Geert Uytterhoeven Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 1d85e9bf783cf..3fa3f26dde856 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -127,7 +127,9 @@ static void __init init_resources(void) { struct memblock_region *region = NULL; struct resource *res = NULL; - int ret = 0; + struct resource *mem_res = NULL; + size_t mem_res_sz = 0; + int ret = 0, i = 0; code_res.start = __pa_symbol(_text); code_res.end = __pa_symbol(_etext) - 1; @@ -145,16 +147,17 @@ static void __init init_resources(void) bss_res.end = __pa_symbol(__bss_stop) - 1; bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt) * sizeof(*mem_res); + mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES); + if (!mem_res) + panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz); /* * Start by adding the reserved regions, if they overlap * with /memory regions, insert_resource later on will take * care of it. */ for_each_reserved_mem_region(region) { - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); - if (!res) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct resource)); + res = &mem_res[i++]; res->name = "Reserved"; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; @@ -171,8 +174,10 @@ static void __init init_resources(void) * Ignore any other reserved regions within * system memory. */ - if (memblock_is_memory(res->start)) + if (memblock_is_memory(res->start)) { + memblock_free((phys_addr_t) res, sizeof(struct resource)); continue; + } ret = add_resource(&iomem_resource, res); if (ret < 0) @@ -181,10 +186,7 @@ static void __init init_resources(void) /* Add /memory regions to the resource tree */ for_each_mem_region(region) { - res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); - if (!res) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct resource)); + res = &mem_res[i++]; if (unlikely(memblock_is_nomap(region))) { res->name = "Reserved"; @@ -205,9 +207,9 @@ static void __init init_resources(void) return; error: - memblock_free((phys_addr_t) res, sizeof(struct resource)); /* Better an empty resource tree than an inconsistent one */ release_child_resources(&iomem_resource); + memblock_free((phys_addr_t) mem_res, mem_res_sz); } -- GitLab From abb8e86b269604e906a6a4af7a09f04b72dbb862 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 11 Jan 2021 15:45:02 -0800 Subject: [PATCH 0944/2012] RISC-V: Set current memblock limit Currently, linux kernel can not use last 4k bytes of addressable space because IS_ERR_VALUE macro treats those as an error. This will be an issue for RV32 as any memblock allocator potentially allocate chunk of memory from the end of DRAM (2GB) leading bad address error even though the address was technically valid. Fix this issue by limiting the memblock if available memory spans the entire address space. Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index bf5379135e39b..7cd4993f4ff21 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -157,9 +157,10 @@ disable: void __init setup_bootmem(void) { phys_addr_t mem_start = 0; - phys_addr_t start, end = 0; + phys_addr_t start, dram_end, end = 0; phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); + phys_addr_t max_mapped_addr = __pa(~(ulong)0); u64 i; /* Find the memory region containing the kernel */ @@ -181,7 +182,18 @@ void __init setup_bootmem(void) /* Reserve from the start of the kernel to the end of the kernel */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + dram_end = memblock_end_of_DRAM(); + + /* + * memblock allocator is not aware of the fact that last 4K bytes of + * the addressable memory can not be mapped because of IS_ERR_VALUE + * macro. Make sure that last 4k bytes are not usable by memblock + * if end of dram is equal to maximum addressable memory. + */ + if (max_mapped_addr == (dram_end - 1)) + memblock_set_current_limit(max_mapped_addr - 4096); + + max_pfn = PFN_DOWN(dram_end); max_low_pfn = max_pfn; dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn); -- GitLab From e557793799c5a8406afb08aa170509619f7eac36 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Mon, 11 Jan 2021 15:45:04 -0800 Subject: [PATCH 0945/2012] RISC-V: Fix maximum allowed phsyical memory for RV32 Linux kernel can only map 1GB of address space for RV32 as the page offset is set to 0xC0000000. The current description in the Kconfig is confusing as it indicates that RV32 can support 2GB of physical memory. That is simply not true for current kernel. In future, a 2GB split support can be added to allow 2GB physical address space. Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 81b76d44725d7..e9e2c1f0a6905 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -137,7 +137,7 @@ config PA_BITS config PAGE_OFFSET hex - default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB + default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB default 0x80000000 if 64BIT && !MMU default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB @@ -247,10 +247,12 @@ config MODULE_SECTIONS choice prompt "Maximum Physical Memory" - default MAXPHYSMEM_2GB if 32BIT + default MAXPHYSMEM_1GB if 32BIT default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY + config MAXPHYSMEM_1GB + bool "1GiB" config MAXPHYSMEM_2GB bool "2GiB" config MAXPHYSMEM_128GB -- GitLab From fca05d4d61e65fa573a3768f9019a42143c03349 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 16 Jan 2021 12:26:46 +0100 Subject: [PATCH 0946/2012] netfilter: nft_dynset: honor stateful expressions in set definition If the set definition contains stateful expressions, allocate them for the newly added entries from the packet path. Fixes: 65038428b2c6 ("netfilter: nf_tables: allow to specify stateful expression in set definition") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 5 ++--- net/netfilter/nft_dynset.c | 6 ++++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f4af8362d2348..4b6ecf5326238 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -721,6 +721,8 @@ void *nft_set_elem_init(const struct nft_set *set, const struct nft_set_ext_tmpl *tmpl, const u32 *key, const u32 *key_end, const u32 *data, u64 timeout, u64 expiration, gfp_t gfp); +int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_expr *expr_array[]); void nft_set_elem_destroy(const struct nft_set *set, void *elem, bool destroy_expr); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 15c467f1a9dd9..8d3aa97b52e71 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5235,9 +5235,8 @@ static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, kfree(elem); } -static int nft_set_elem_expr_clone(const struct nft_ctx *ctx, - struct nft_set *set, - struct nft_expr *expr_array[]) +int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_expr *expr_array[]) { struct nft_expr *expr; int err, i, k; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 0b053f75cd604..86204740f6c71 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -295,6 +295,12 @@ static int nft_dynset_init(const struct nft_ctx *ctx, err = -EOPNOTSUPP; goto err_expr_free; } + } else if (set->num_exprs > 0) { + err = nft_set_elem_expr_clone(ctx, set, priv->expr_array); + if (err < 0) + return err; + + priv->num_exprs = set->num_exprs; } nft_set_ext_prepare(&priv->tmpl); -- GitLab From 0c5b7a501e7400869ee905b4f7af3d6717802bcb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 16 Jan 2021 19:20:15 +0100 Subject: [PATCH 0947/2012] netfilter: nft_dynset: add timeout extension to template Otherwise, the newly create element shows no timeout when listing the ruleset. If the set definition does not specify a default timeout, then the set element only shows the expiration time, but not the timeout. This is a problem when restoring a stateful ruleset listing since it skips the timeout policy entirely. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_dynset.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 86204740f6c71..218c09e4fdddd 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -312,8 +312,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx, nft_dynset_ext_add_expr(priv); if (set->flags & NFT_SET_TIMEOUT) { - if (timeout || set->timeout) + if (timeout || set->timeout) { + nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT); nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION); + } } priv->timeout = timeout; -- GitLab From 29a951dfb3c3263c3a0f3bd9f7f2c2cfde4baedb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 8 Jan 2021 13:13:41 -0800 Subject: [PATCH 0948/2012] mm: fix clear_refs_write locking Turning page table entries read-only requires the mmap_sem held for writing. So stop doing the odd games with turning things from read locks to write locks and back. Just get the write lock. Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ee5a235b30562..ab7d700b2caa4 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1215,41 +1215,26 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, .type = type, }; + if (mmap_write_lock_killable(mm)) { + count = -EINTR; + goto out_mm; + } if (type == CLEAR_REFS_MM_HIWATER_RSS) { - if (mmap_write_lock_killable(mm)) { - count = -EINTR; - goto out_mm; - } - /* * Writing 5 to /proc/pid/clear_refs resets the peak * resident set size to this mm's current rss value. */ reset_mm_hiwater_rss(mm); - mmap_write_unlock(mm); - goto out_mm; + goto out_unlock; } - if (mmap_read_lock_killable(mm)) { - count = -EINTR; - goto out_mm; - } tlb_gather_mmu(&tlb, mm, 0, -1); if (type == CLEAR_REFS_SOFT_DIRTY) { for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!(vma->vm_flags & VM_SOFTDIRTY)) continue; - mmap_read_unlock(mm); - if (mmap_write_lock_killable(mm)) { - count = -EINTR; - goto out_mm; - } - for (vma = mm->mmap; vma; vma = vma->vm_next) { - vma->vm_flags &= ~VM_SOFTDIRTY; - vma_set_page_prot(vma); - } - mmap_write_downgrade(mm); - break; + vma->vm_flags &= ~VM_SOFTDIRTY; + vma_set_page_prot(vma); } mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, @@ -1261,7 +1246,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, if (type == CLEAR_REFS_SOFT_DIRTY) mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb, 0, -1); - mmap_read_unlock(mm); +out_unlock: + mmap_write_unlock(mm); out_mm: mmput(mm); } -- GitLab From 9348b73c2e1bfea74ccd4a44fb4ccc7276ab9623 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 9 Jan 2021 17:09:10 -0800 Subject: [PATCH 0949/2012] mm: don't play games with pinned pages in clear_page_refs Turning a pinned page read-only breaks the pinning after COW. Don't do it. The whole "track page soft dirty" state doesn't work with pinned pages anyway, since the page might be dirtied by the pinning entity without ever being noticed in the page tables. Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ab7d700b2caa4..602e3a52884d8 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1035,6 +1035,25 @@ struct clear_refs_private { }; #ifdef CONFIG_MEM_SOFT_DIRTY + +#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) + +static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte) +{ + struct page *page; + + if (!pte_write(pte)) + return false; + if (!is_cow_mapping(vma->vm_flags)) + return false; + if (likely(!atomic_read(&vma->vm_mm->has_pinned))) + return false; + page = vm_normal_page(vma, addr, pte); + if (!page) + return false; + return page_maybe_dma_pinned(page); +} + static inline void clear_soft_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { @@ -1049,6 +1068,8 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, if (pte_present(ptent)) { pte_t old_pte; + if (pte_is_pinned(vma, addr, ptent)) + return; old_pte = ptep_modify_prot_start(vma, addr, pte); ptent = pte_wrprotect(old_pte); ptent = pte_clear_soft_dirty(ptent); -- GitLab From ce5379963b2884e9d23bea0c5674a7251414c84b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 16 Jan 2021 19:50:34 +0100 Subject: [PATCH 0950/2012] netfilter: nft_dynset: dump expressions when set definition contains no expressions If the set definition provides no stateful expressions, then include the stateful expression in the ruleset listing. Without this fix, the dynset rule listing shows the stateful expressions provided by the set definition. Fixes: 65038428b2c6 ("netfilter: nf_tables: allow to specify stateful expression in set definition") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_dynset.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 218c09e4fdddd..d164ef9e68439 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -384,22 +384,25 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) nf_jiffies64_to_msecs(priv->timeout), NFTA_DYNSET_PAD)) goto nla_put_failure; - if (priv->num_exprs == 1) { - if (nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr_array[0])) - goto nla_put_failure; - } else if (priv->num_exprs > 1) { - struct nlattr *nest; - - nest = nla_nest_start_noflag(skb, NFTA_DYNSET_EXPRESSIONS); - if (!nest) - goto nla_put_failure; - - for (i = 0; i < priv->num_exprs; i++) { - if (nft_expr_dump(skb, NFTA_LIST_ELEM, - priv->expr_array[i])) + if (priv->set->num_exprs == 0) { + if (priv->num_exprs == 1) { + if (nft_expr_dump(skb, NFTA_DYNSET_EXPR, + priv->expr_array[0])) goto nla_put_failure; + } else if (priv->num_exprs > 1) { + struct nlattr *nest; + + nest = nla_nest_start_noflag(skb, NFTA_DYNSET_EXPRESSIONS); + if (!nest) + goto nla_put_failure; + + for (i = 0; i < priv->num_exprs; i++) { + if (nft_expr_dump(skb, NFTA_LIST_ELEM, + priv->expr_array[i])) + goto nla_put_failure; + } + nla_nest_end(skb, nest); } - nla_nest_end(skb, nest); } if (nla_put_be32(skb, NFTA_DYNSET_FLAGS, htonl(flags))) goto nla_put_failure; -- GitLab From c93cc9e16d88e0f5ea95d2d65d58a8a4dab258bc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 16 Jan 2021 11:52:11 -0700 Subject: [PATCH 0951/2012] io_uring: iopoll requests should also wake task ->in_idle state If we're freeing/finishing iopoll requests, ensure we check if the task is in idling in terms of cancelation. Otherwise we could end up waiting forever in __io_uring_task_cancel() if the task has active iopoll requests that need cancelation. Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 985a9e3f976d3..5cda878b69cfd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2270,6 +2270,8 @@ static void io_req_free_batch_finish(struct io_ring_ctx *ctx, struct io_uring_task *tctx = rb->task->io_uring; percpu_counter_sub(&tctx->inflight, rb->task_refs); + if (atomic_read(&tctx->in_idle)) + wake_up(&tctx->wait); put_task_struct_many(rb->task, rb->task_refs); rb->task = NULL; } @@ -2288,6 +2290,8 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req) struct io_uring_task *tctx = rb->task->io_uring; percpu_counter_sub(&tctx->inflight, rb->task_refs); + if (atomic_read(&tctx->in_idle)) + wake_up(&tctx->wait); put_task_struct_many(rb->task, rb->task_refs); } rb->task = req->task; -- GitLab From 6b393a1ff1746a1c91bd95cbb2d79b104d8f15ac Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 16 Jan 2021 05:32:29 +0000 Subject: [PATCH 0952/2012] io_uring: fix false positive sqo warning on flush WARNING: CPU: 1 PID: 9094 at fs/io_uring.c:8884 io_disable_sqo_submit+0x106/0x130 fs/io_uring.c:8884 Call Trace: io_uring_flush+0x28b/0x3a0 fs/io_uring.c:9099 filp_close+0xb4/0x170 fs/open.c:1280 close_fd+0x5c/0x80 fs/file.c:626 __do_sys_close fs/open.c:1299 [inline] __se_sys_close fs/open.c:1297 [inline] __x64_sys_close+0x2f/0xa0 fs/open.c:1297 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 io_uring's final close() may be triggered by any task not only the creator. It's well handled by io_uring_flush() including SQPOLL case, though a warning in io_disable_sqo_submit() will fallaciously fire by moving this warning out to the only call site that matters. Reported-by: syzbot+2f5d1785dc624932da78@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5cda878b69cfd..616c5f732a269 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8918,8 +8918,6 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, static void io_disable_sqo_submit(struct io_ring_ctx *ctx) { - WARN_ON_ONCE(ctx->sqo_task != current); - mutex_lock(&ctx->uring_lock); ctx->sqo_dead = 1; mutex_unlock(&ctx->uring_lock); @@ -8941,6 +8939,7 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { /* for SQPOLL only sqo_task has task notes */ + WARN_ON_ONCE(ctx->sqo_task != current); io_disable_sqo_submit(ctx); task = ctx->sq_data->thread; atomic_inc(&task->io_uring->in_idle); -- GitLab From 4325cb498cb743dacaa3edbec398c5255f476ef6 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 16 Jan 2021 05:32:30 +0000 Subject: [PATCH 0953/2012] io_uring: fix uring_flush in exit_files() warning WARNING: CPU: 1 PID: 11100 at fs/io_uring.c:9096 io_uring_flush+0x326/0x3a0 fs/io_uring.c:9096 RIP: 0010:io_uring_flush+0x326/0x3a0 fs/io_uring.c:9096 Call Trace: filp_close+0xb4/0x170 fs/open.c:1280 close_files fs/file.c:401 [inline] put_files_struct fs/file.c:416 [inline] put_files_struct+0x1cc/0x350 fs/file.c:413 exit_files+0x7e/0xa0 fs/file.c:433 do_exit+0xc22/0x2ae0 kernel/exit.c:820 do_group_exit+0x125/0x310 kernel/exit.c:922 get_signal+0x3e9/0x20a0 kernel/signal.c:2770 arch_do_signal_or_restart+0x2a8/0x1eb0 arch/x86/kernel/signal.c:811 handle_signal_work kernel/entry/common.c:147 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x148/0x250 kernel/entry/common.c:201 __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline] syscall_exit_to_user_mode+0x19/0x50 kernel/entry/common.c:302 entry_SYSCALL_64_after_hwframe+0x44/0xa9 An SQPOLL ring creator task may have gotten rid of its file note during exit and called io_disable_sqo_submit(), but the io_uring is still left referenced through fdtable, which will be put during close_files() and cause a false positive warning. First split the warning into two for more clarity when is hit, and the add sqo_dead check to handle the described case. Reported-by: syzbot+a32b546d58dde07875a1@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 616c5f732a269..d494c4269fc5e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9131,7 +9131,10 @@ static int io_uring_flush(struct file *file, void *data) if (ctx->flags & IORING_SETUP_SQPOLL) { /* there is only one file note, which is owned by sqo_task */ - WARN_ON_ONCE((ctx->sqo_task == current) == + WARN_ON_ONCE(ctx->sqo_task != current && + xa_load(&tctx->xa, (unsigned long)file)); + /* sqo_dead check is for when this happens after cancellation */ + WARN_ON_ONCE(ctx->sqo_task == current && !ctx->sqo_dead && !xa_load(&tctx->xa, (unsigned long)file)); io_disable_sqo_submit(ctx); -- GitLab From d36a1dd9f77ae1e72da48f4123ed35627848507d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 5 Jan 2021 14:43:46 -0500 Subject: [PATCH 0954/2012] dump_common_audit_data(): fix racy accesses to ->d_name We are not guaranteed the locking environment that would prevent dentry getting renamed right under us. And it's possible for old long name to be freed after rename, leading to UAF here. Cc: stable@kernel.org # v2.6.2+ Signed-off-by: Al Viro --- security/lsm_audit.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 7d8026f3f3772..a0cd28cd31a85 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -275,7 +275,9 @@ static void dump_common_audit_data(struct audit_buffer *ab, struct inode *inode; audit_log_format(ab, " name="); + spin_lock(&a->u.dentry->d_lock); audit_log_untrustedstring(ab, a->u.dentry->d_name.name); + spin_unlock(&a->u.dentry->d_lock); inode = d_backing_inode(a->u.dentry); if (inode) { @@ -293,8 +295,9 @@ static void dump_common_audit_data(struct audit_buffer *ab, dentry = d_find_alias(inode); if (dentry) { audit_log_format(ab, " name="); - audit_log_untrustedstring(ab, - dentry->d_name.name); + spin_lock(&dentry->d_lock); + audit_log_untrustedstring(ab, dentry->d_name.name); + spin_unlock(&dentry->d_lock); dput(dentry); } audit_log_format(ab, " dev="); -- GitLab From 7d2a92445e3f4e56b62c7dd1403feb93c6f9a680 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 15 Jan 2021 20:08:59 +0000 Subject: [PATCH 0955/2012] net: ethernet: smsc: smc91x: Fix function name in kernel-doc header Fixes the following W=1 kernel build warning(s): drivers/net/ethernet/smsc/smc91x.c:2200: warning: Function parameter or member 'dev' not described in 'try_toggle_control_gpio' drivers/net/ethernet/smsc/smc91x.c:2200: warning: Function parameter or member 'desc' not described in 'try_toggle_control_gpio' drivers/net/ethernet/smsc/smc91x.c:2200: warning: Function parameter or member 'name' not described in 'try_toggle_control_gpio' drivers/net/ethernet/smsc/smc91x.c:2200: warning: Function parameter or member 'index' not described in 'try_toggle_control_gpio' drivers/net/ethernet/smsc/smc91x.c:2200: warning: Function parameter or member 'value' not described in 'try_toggle_control_gpio' drivers/net/ethernet/smsc/smc91x.c:2200: warning: Function parameter or member 'nsdelay' not described in 'try_toggle_control_gpio' Signed-off-by: Lee Jones Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/smsc/smc91x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 742a1f7a838c9..891b49281bc64 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2191,7 +2191,7 @@ static const struct of_device_id smc91x_match[] = { MODULE_DEVICE_TABLE(of, smc91x_match); /** - * of_try_set_control_gpio - configure a gpio if it exists + * try_toggle_control_gpio - configure a gpio if it exists * @dev: net device * @desc: where to store the GPIO descriptor, if it exists * @name: name of the GPIO in DT -- GitLab From 090c7ae8e0d0a983b59c8a96628dd698d2ca8e1e Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 15 Jan 2021 20:09:00 +0000 Subject: [PATCH 0956/2012] net: xen-netback: xenbus: Demote nonconformant kernel-doc headers Fixes the following W=1 kernel build warning(s): drivers/net/xen-netback/xenbus.c:419: warning: Function parameter or member 'dev' not described in 'frontend_changed' drivers/net/xen-netback/xenbus.c:419: warning: Function parameter or member 'frontend_state' not described in 'frontend_changed' drivers/net/xen-netback/xenbus.c:1001: warning: Function parameter or member 'dev' not described in 'netback_probe' drivers/net/xen-netback/xenbus.c:1001: warning: Function parameter or member 'id' not described in 'netback_probe' Reviewed-by: Andrew Lunn Signed-off-by: Lee Jones Signed-off-by: Jakub Kicinski --- drivers/net/xen-netback/xenbus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 6f10e0998f1ce..a5439c130130f 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -411,7 +411,7 @@ static void read_xenbus_frontend_xdp(struct backend_info *be, vif->xdp_headroom = headroom; } -/** +/* * Callback received when the frontend's state changes. */ static void frontend_changed(struct xenbus_device *dev, @@ -996,7 +996,7 @@ static int netback_remove(struct xenbus_device *dev) return 0; } -/** +/* * Entry point to this code when a new device is created. Allocate the basic * structures and switch to InitWait. */ -- GitLab From 935888cda8209f4dde65bcbf9e7d059f0375399d Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 15 Jan 2021 20:09:01 +0000 Subject: [PATCH 0957/2012] net: ethernet: ti: am65-cpsw-qos: Demote non-conformant function header Fixes the following W=1 kernel build warning(s): drivers/net/ethernet/ti/am65-cpsw-qos.c:364: warning: Function parameter or member 'ndev' not described in 'am65_cpsw_timer_set' drivers/net/ethernet/ti/am65-cpsw-qos.c:364: warning: Function parameter or member 'est_new' not described in 'am65_cpsw_timer_set' Reviewed-by: Andrew Lunn Signed-off-by: Lee Jones Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-qos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c index 3bdd4dbcd2ff1..ebcc6386cc34a 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-qos.c +++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c @@ -356,7 +356,7 @@ static void am65_cpsw_est_set_sched_list(struct net_device *ndev, writel(~all_fetch_allow & AM65_CPSW_FETCH_ALLOW_MSK, ram_addr); } -/** +/* * Enable ESTf periodic output, set cycle start time and interval. */ static int am65_cpsw_timer_set(struct net_device *ndev, -- GitLab From e49e4647f3e2915305207d379bdd8cfba88b8b0f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 15 Jan 2021 20:09:02 +0000 Subject: [PATCH 0958/2012] net: ethernet: ti: am65-cpts: Document am65_cpts_rx_enable()'s 'en' parameter Fixes the following W=1 kernel build warning(s): drivers/net/ethernet/ti/am65-cpts.c:736: warning: Function parameter or member 'en' not described in 'am65_cpts_rx_enable' drivers/net/ethernet/ti/am65-cpts.c:736: warning: Excess function parameter 'skb' description in 'am65_cpts_rx_enable' Reviewed-by: Andrew Lunn Signed-off-by: Lee Jones Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c index 5dc60ecabe561..9caaae79fc957 100644 --- a/drivers/net/ethernet/ti/am65-cpts.c +++ b/drivers/net/ethernet/ti/am65-cpts.c @@ -727,7 +727,7 @@ static long am65_cpts_ts_work(struct ptp_clock_info *ptp) /** * am65_cpts_rx_enable - enable rx timestamping * @cpts: cpts handle - * @skb: packet + * @en: enable * * This functions enables rx packets timestamping. The CPTS can timestamp all * rx packets. -- GitLab From 807086021bf510ba03bd69a80a54e1de4a0fda30 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 15 Jan 2021 20:09:03 +0000 Subject: [PATCH 0959/2012] net: ethernet: ibm: ibmvnic: Fix some kernel-doc misdemeanours MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 kernel build warning(s): from drivers/net/ethernet/ibm/ibmvnic.c:35: inlined from ‘handle_vpd_rsp’ at drivers/net/ethernet/ibm/ibmvnic.c:4124:3: drivers/net/ethernet/ibm/ibmvnic.c:1362: warning: Function parameter or member 'hdr_field' not described in 'build_hdr_data' drivers/net/ethernet/ibm/ibmvnic.c:1362: warning: Function parameter or member 'skb' not described in 'build_hdr_data' drivers/net/ethernet/ibm/ibmvnic.c:1362: warning: Function parameter or member 'hdr_len' not described in 'build_hdr_data' drivers/net/ethernet/ibm/ibmvnic.c:1362: warning: Function parameter or member 'hdr_data' not described in 'build_hdr_data' drivers/net/ethernet/ibm/ibmvnic.c:1423: warning: Function parameter or member 'hdr_field' not described in 'create_hdr_descs' drivers/net/ethernet/ibm/ibmvnic.c:1423: warning: Function parameter or member 'hdr_data' not described in 'create_hdr_descs' drivers/net/ethernet/ibm/ibmvnic.c:1423: warning: Function parameter or member 'len' not described in 'create_hdr_descs' drivers/net/ethernet/ibm/ibmvnic.c:1423: warning: Function parameter or member 'hdr_len' not described in 'create_hdr_descs' drivers/net/ethernet/ibm/ibmvnic.c:1423: warning: Function parameter or member 'scrq_arr' not described in 'create_hdr_descs' drivers/net/ethernet/ibm/ibmvnic.c:1474: warning: Function parameter or member 'txbuff' not described in 'build_hdr_descs_arr' drivers/net/ethernet/ibm/ibmvnic.c:1474: warning: Function parameter or member 'num_entries' not described in 'build_hdr_descs_arr' drivers/net/ethernet/ibm/ibmvnic.c:1474: warning: Function parameter or member 'hdr_field' not described in 'build_hdr_descs_arr' drivers/net/ethernet/ibm/ibmvnic.c:1832: warning: Function parameter or member 'adapter' not described in 'do_change_param_reset' drivers/net/ethernet/ibm/ibmvnic.c:1832: warning: Function parameter or member 'rwi' not described in 'do_change_param_reset' drivers/net/ethernet/ibm/ibmvnic.c:1832: warning: Function parameter or member 'reset_state' not described in 'do_change_param_reset' drivers/net/ethernet/ibm/ibmvnic.c:1911: warning: Function parameter or member 'adapter' not described in 'do_reset' drivers/net/ethernet/ibm/ibmvnic.c:1911: warning: Function parameter or member 'rwi' not described in 'do_reset' drivers/net/ethernet/ibm/ibmvnic.c:1911: warning: Function parameter or member 'reset_state' not described in 'do_reset' Signed-off-by: Lee Jones Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 27 +++++++++++++-------------- drivers/net/xen-netfront.c | 6 +++--- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index aed985e08e8ad..4c4252e68de5a 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1384,10 +1384,10 @@ static int ibmvnic_close(struct net_device *netdev) /** * build_hdr_data - creates L2/L3/L4 header data buffer - * @hdr_field - bitfield determining needed headers - * @skb - socket buffer - * @hdr_len - array of header lengths - * @tot_len - total length of data + * @hdr_field: bitfield determining needed headers + * @skb: socket buffer + * @hdr_len: array of header lengths + * @hdr_data: buffer to write the header to * * Reads hdr_field to determine which headers are needed by firmware. * Builds a buffer containing these headers. Saves individual header @@ -1444,11 +1444,11 @@ static int build_hdr_data(u8 hdr_field, struct sk_buff *skb, /** * create_hdr_descs - create header and header extension descriptors - * @hdr_field - bitfield determining needed headers - * @data - buffer containing header data - * @len - length of data buffer - * @hdr_len - array of individual header lengths - * @scrq_arr - descriptor array + * @hdr_field: bitfield determining needed headers + * @hdr_data: buffer containing header data + * @len: length of data buffer + * @hdr_len: array of individual header lengths + * @scrq_arr: descriptor array * * Creates header and, if needed, header extension descriptors and * places them in a descriptor array, scrq_arr @@ -1496,10 +1496,9 @@ static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len, /** * build_hdr_descs_arr - build a header descriptor array - * @skb - socket buffer - * @num_entries - number of descriptors to be sent - * @subcrq - first TX descriptor - * @hdr_field - bit field determining which headers will be sent + * @txbuff: tx buffer + * @num_entries: number of descriptors to be sent + * @hdr_field: bit field determining which headers will be sent * * This function will build a TX descriptor array with applicable * L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect. @@ -1925,7 +1924,7 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) return rc; } -/** +/* * do_reset returns zero if we are able to keep processing reset events, or * non-zero if we hit a fatal error and must halt. */ diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index c20b78120bb42..6ef2adbd283a3 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1580,7 +1580,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) return ERR_PTR(err); } -/** +/* * Entry point to this code when a new device is created. Allocate the basic * structures and the ring buffers for communication with the backend, and * inform the backend of the appropriate details for those. @@ -1657,7 +1657,7 @@ static void xennet_disconnect_backend(struct netfront_info *info) } } -/** +/* * We are reconnecting to the backend, due to a suspend/resume, or a backend * driver restart. We tear down our netif structure and recreate it, but * leave the device-layer structures intact so that this is transparent to the @@ -2303,7 +2303,7 @@ static int xennet_connect(struct net_device *dev) return 0; } -/** +/* * Callback received when the backend's state changes. */ static void netback_changed(struct xenbus_device *dev, -- GitLab From b51036321461e73ba2d52651bc47045ceb814101 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 15 Jan 2021 20:09:04 +0000 Subject: [PATCH 0960/2012] net: ethernet: toshiba: ps3_gelic_net: Fix some kernel-doc misdemeanours Fixes the following W=1 kernel build warning(s): drivers/net/ethernet/toshiba/ps3_gelic_net.c:1107: warning: Function parameter or member 'irq' not described in 'gelic_card_interrupt' drivers/net/ethernet/toshiba/ps3_gelic_net.c:1107: warning: Function parameter or member 'ptr' not described in 'gelic_card_interrupt' drivers/net/ethernet/toshiba/ps3_gelic_net.c:1407: warning: Function parameter or member 'txqueue' not described in 'gelic_net_tx_timeout' drivers/net/ethernet/toshiba/ps3_gelic_net.c:1439: warning: Function parameter or member 'napi' not described in 'gelic_ether_setup_netdev_ops' drivers/net/ethernet/toshiba/ps3_gelic_net.c:1639: warning: Function parameter or member 'dev' not described in 'ps3_gelic_driver_probe' drivers/net/ethernet/toshiba/ps3_gelic_net.c:1795: warning: Function parameter or member 'dev' not described in 'ps3_gelic_driver_remove' Reviewed-by: Andrew Lunn Signed-off-by: Lee Jones Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/toshiba/ps3_gelic_net.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index 3d1fc8d2ca667..55e652624bd76 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1100,7 +1100,7 @@ static int gelic_net_poll(struct napi_struct *napi, int budget) return packets_done; } -/** +/* * gelic_card_interrupt - event handler for gelic_net */ static irqreturn_t gelic_card_interrupt(int irq, void *ptr) @@ -1400,6 +1400,7 @@ out: /** * gelic_net_tx_timeout - called when the tx timeout watchdog kicks in. * @netdev: interface device structure + * @txqueue: unused * * called, if tx hangs. Schedules a task that resets the interface */ @@ -1431,6 +1432,7 @@ static const struct net_device_ops gelic_netdevice_ops = { /** * gelic_ether_setup_netdev_ops - initialization of net_device operations * @netdev: net_device structure + * @napi: napi structure * * fills out function pointers in the net_device structure */ @@ -1632,7 +1634,7 @@ static void gelic_card_get_vlan_info(struct gelic_card *card) dev_info(ctodev(card), "internal vlan %s\n", card->vlan_required? "enabled" : "disabled"); } -/** +/* * ps3_gelic_driver_probe - add a device to the control of this driver */ static int ps3_gelic_driver_probe(struct ps3_system_bus_device *dev) @@ -1787,7 +1789,7 @@ fail_open: return result; } -/** +/* * ps3_gelic_driver_remove - remove a device from the control of this driver */ -- GitLab From e242d598996506c228cb0ee418b4de31608d39c1 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Fri, 15 Jan 2021 20:09:05 +0000 Subject: [PATCH 0961/2012] net: ethernet: toshiba: spider_net: Document a whole bunch of function parameters Fixes the following W=1 kernel build warning(s): drivers/net/ethernet/toshiba/spider_net.c:263: warning: Function parameter or member 'hwdescr' not described in 'spider_net_get_descr_status' drivers/net/ethernet/toshiba/spider_net.c:263: warning: Excess function parameter 'descr' description in 'spider_net_get_descr_status' drivers/net/ethernet/toshiba/spider_net.c:554: warning: Function parameter or member 'netdev' not described in 'spider_net_get_multicast_hash' drivers/net/ethernet/toshiba/spider_net.c:902: warning: Function parameter or member 't' not described in 'spider_net_cleanup_tx_ring' drivers/net/ethernet/toshiba/spider_net.c:902: warning: Excess function parameter 'card' description in 'spider_net_cleanup_tx_ring' drivers/net/ethernet/toshiba/spider_net.c:1074: warning: Function parameter or member 'card' not described in 'spider_net_resync_head_ptr' drivers/net/ethernet/toshiba/spider_net.c:1234: warning: Function parameter or member 'napi' not described in 'spider_net_poll' drivers/net/ethernet/toshiba/spider_net.c:1234: warning: Excess function parameter 'netdev' description in 'spider_net_poll' drivers/net/ethernet/toshiba/spider_net.c:1278: warning: Function parameter or member 'p' not described in 'spider_net_set_mac' drivers/net/ethernet/toshiba/spider_net.c:1278: warning: Excess function parameter 'ptr' description in 'spider_net_set_mac' drivers/net/ethernet/toshiba/spider_net.c:1350: warning: Function parameter or member 'error_reg1' not described in 'spider_net_handle_error_irq' drivers/net/ethernet/toshiba/spider_net.c:1350: warning: Function parameter or member 'error_reg2' not described in 'spider_net_handle_error_irq' drivers/net/ethernet/toshiba/spider_net.c:1968: warning: Function parameter or member 't' not described in 'spider_net_link_phy' drivers/net/ethernet/toshiba/spider_net.c:1968: warning: Excess function parameter 'data' description in 'spider_net_link_phy' drivers/net/ethernet/toshiba/spider_net.c:2149: warning: Function parameter or member 'work' not described in 'spider_net_tx_timeout_task' drivers/net/ethernet/toshiba/spider_net.c:2149: warning: Excess function parameter 'data' description in 'spider_net_tx_timeout_task' drivers/net/ethernet/toshiba/spider_net.c:2182: warning: Function parameter or member 'txqueue' not described in 'spider_net_tx_timeout' Reviewed-by: Andrew Lunn Signed-off-by: Lee Jones Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/toshiba/spider_net.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index 5f5b33e6653b2..d5a75ef7e3ca9 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -254,7 +254,7 @@ spider_net_set_promisc(struct spider_net_card *card) /** * spider_net_get_descr_status -- returns the status of a descriptor - * @descr: descriptor to look at + * @hwdescr: descriptor to look at * * returns the status as in the dmac_cmd_status field of the descriptor */ @@ -542,6 +542,7 @@ error: /** * spider_net_get_multicast_hash - generates hash for multicast filter table + * @netdev: interface device structure * @addr: multicast address * * returns the hash value. @@ -890,7 +891,7 @@ spider_net_xmit(struct sk_buff *skb, struct net_device *netdev) /** * spider_net_cleanup_tx_ring - cleans up the TX ring - * @card: card structure + * @t: timer context used to obtain the pointer to net card data structure * * spider_net_cleanup_tx_ring is called by either the tx_timer * or from the NAPI polling routine. @@ -1063,6 +1064,7 @@ static void show_rx_chain(struct spider_net_card *card) /** * spider_net_resync_head_ptr - Advance head ptr past empty descrs + * @card: card structure * * If the driver fails to keep up and empty the queue, then the * hardware wil run out of room to put incoming packets. This @@ -1220,7 +1222,7 @@ bad_desc: /** * spider_net_poll - NAPI poll function called by the stack to return packets - * @netdev: interface device structure + * @napi: napi device structure * @budget: number of packets we can pass to the stack at most * * returns 0 if no more packets available to the driver/stack. Returns 1, @@ -1268,7 +1270,7 @@ static int spider_net_poll(struct napi_struct *napi, int budget) /** * spider_net_set_mac - sets the MAC of an interface * @netdev: interface device structure - * @ptr: pointer to new MAC address + * @p: pointer to new MAC address * * Returns 0 on success, <0 on failure. Currently, we don't support this * and will always return EOPNOTSUPP. @@ -1340,6 +1342,8 @@ spider_net_link_reset(struct net_device *netdev) * spider_net_handle_error_irq - handles errors raised by an interrupt * @card: card structure * @status_reg: interrupt status register 0 (GHIINT0STS) + * @error_reg1: interrupt status register 1 (GHIINT1STS) + * @error_reg2: interrupt status register 2 (GHIINT2STS) * * spider_net_handle_error_irq treats or ignores all error conditions * found when an interrupt is presented @@ -1961,8 +1965,7 @@ init_firmware_failed: /** * spider_net_link_phy - * @data: used for pointer to card structure - * + * @t: timer context used to obtain the pointer to net card data structure */ static void spider_net_link_phy(struct timer_list *t) { @@ -2140,7 +2143,7 @@ spider_net_stop(struct net_device *netdev) /** * spider_net_tx_timeout_task - task scheduled by the watchdog timeout * function (to be called not under interrupt status) - * @data: data, is interface device structure + * @work: work context used to obtain the pointer to net card data structure * * called as task when tx hangs, resets interface (if interface is up) */ @@ -2174,6 +2177,7 @@ out: /** * spider_net_tx_timeout - called when the tx timeout watchdog kicks in. * @netdev: interface device structure + * @txqueue: unused * * called, if tx hangs. Schedules a task that resets the interface */ -- GitLab From b660bccbc345b001a13e0df29a723d2612419d91 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Fri, 15 Jan 2021 15:50:59 +0530 Subject: [PATCH 0962/2012] cxgb4: enable interrupt based Tx completions for T5 Enable interrupt based Tx completions to improve latency for T5. The consumer index (CIDX) will now come via interrupts so that Tx SKBs can be freed up sooner in Rx path. Also, enforce CIDX flush threshold override (CIDXFTHRESHO) to improve latency for slow traffic. This ensures that the interrupt is generated immediately whenever hardware catches up with driver (i.e. CIDX == PIDX is reached), which is often the case for slow traffic. Signed-off-by: Raju Rangoju Link: https://lore.kernel.org/r/20210115102059.6846-1-rajur@chelsio.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 38 ++++++++++++++++++++---- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 196652a114c5f..550cc065649fc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1600,7 +1600,8 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) * has opened up. */ eth_txq_stop(q); - wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + if (chip_ver > CHELSIO_T5) + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; } wr = (void *)&q->q.desc[q->q.pidx]; @@ -1832,6 +1833,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, struct adapter *adapter; int qidx, credits, ret; size_t fw_hdr_copy_len; + unsigned int chip_ver; u64 cntrl, *end; u32 wr_mid; @@ -1896,6 +1898,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, goto out_free; } + chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip); wr_mid = FW_WR_LEN16_V(DIV_ROUND_UP(flits, 2)); if (unlikely(credits < ETHTXQ_STOP_THRES)) { /* After we're done injecting the Work Request for this @@ -1907,7 +1910,8 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, * has opened up. */ eth_txq_stop(txq); - wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + if (chip_ver > CHELSIO_T5) + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; } /* Start filling in our Work Request. Note that we do _not_ handle @@ -1960,7 +1964,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, */ cpl = (void *)(lso + 1); - if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) + if (chip_ver <= CHELSIO_T5) cntrl = TXPKT_ETHHDR_LEN_V(eth_xtra_len); else cntrl = T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len); @@ -3598,6 +3602,25 @@ static void t4_tx_completion_handler(struct sge_rspq *rspq, } txq = &s->ethtxq[pi->first_qset + rspq->idx]; + + /* We've got the Hardware Consumer Index Update in the Egress Update + * message. These Egress Update messages will be our sole CIDX Updates + * we get since we don't want to chew up PCIe bandwidth for both Ingress + * Messages and Status Page writes. However, The code which manages + * reclaiming successfully DMA'ed TX Work Requests uses the CIDX value + * stored in the Status Page at the end of the TX Queue. It's easiest + * to simply copy the CIDX Update value from the Egress Update message + * to the Status Page. Also note that no Endian issues need to be + * considered here since both are Big Endian and we're just copying + * bytes consistently ... + */ + if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) { + struct cpl_sge_egr_update *egr; + + egr = (struct cpl_sge_egr_update *)rsp; + WRITE_ONCE(txq->q.stat->cidx, egr->cidx); + } + t4_sge_eth_txq_egress_update(adapter, txq, -1); } @@ -4583,11 +4606,15 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, * write the CIDX Updates into the Status Page at the end of the * TX Queue. */ - c.autoequiqe_to_viid = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F | + c.autoequiqe_to_viid = htonl(((chip_ver <= CHELSIO_T5) ? + FW_EQ_ETH_CMD_AUTOEQUIQE_F : + FW_EQ_ETH_CMD_AUTOEQUEQE_F) | FW_EQ_ETH_CMD_VIID_V(pi->viid)); c.fetchszm_to_iqid = - htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) | + htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V((chip_ver <= CHELSIO_T5) ? + HOSTFCMODE_INGRESS_QUEUE_X : + HOSTFCMODE_STATUS_PAGE_X) | FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) | FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid)); @@ -4598,6 +4625,7 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, : FETCHBURSTMIN_64B_T6_X) | FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X) | FW_EQ_ETH_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X) | + FW_EQ_ETH_CMD_CIDXFTHRESHO_V(chip_ver == CHELSIO_T5) | FW_EQ_ETH_CMD_EQSIZE_V(nentries)); c.eqaddr = cpu_to_be64(txq->q.phys_addr); -- GitLab From 20efd2c79afbd8f0a3929edb9b8ab5ce4c83fb6f Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Fri, 15 Jan 2021 09:55:44 +0000 Subject: [PATCH 0963/2012] net: mscc: ocelot: Remove unneeded semicolon fix semicolon.cocci warnings: drivers/net/ethernet/mscc/ocelot_net.c:460:2-3: Unneeded semicolon Signed-off-by: Xu Wang Link: https://lore.kernel.org/r/20210115095544.33164-1-vulab@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index a520fd485912d..4585c35c24e52 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -634,7 +634,7 @@ static void ocelot_mact_work(struct work_struct *work) break; default: break; - }; + } kfree(w); } -- GitLab From 66c556025d687dbdd0f748c5e1df89c977b6c02a Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 15 Jan 2021 15:04:40 +0000 Subject: [PATCH 0964/2012] skbuff: back tiny skbs with kmalloc() in __netdev_alloc_skb() too Commit 3226b158e67c ("net: avoid 32 x truesize under-estimation for tiny skbs") ensured that skbs with data size lower than 1025 bytes will be kmalloc'ed to avoid excessive page cache fragmentation and memory consumption. However, the fix adressed only __napi_alloc_skb() (primarily for virtio_net and napi_get_frags()), but the issue can still be achieved through __netdev_alloc_skb(), which is still used by several drivers. Drivers often allocate a tiny skb for headers and place the rest of the frame to frags (so-called copybreak). Mirror the condition to __netdev_alloc_skb() to handle this case too. Since v1 [0]: - fix "Fixes:" tag; - refine commit message (mention copybreak usecase). [0] https://lore.kernel.org/netdev/20210114235423.232737-1-alobakin@pm.me Fixes: a1c7fff7e18f ("net: netdev_alloc_skb() use build_skb()") Signed-off-by: Alexander Lobakin Link: https://lore.kernel.org/r/20210115150354.85967-1-alobakin@pm.me Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c1a6f262636a1..785daff48030d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -437,7 +437,11 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, len += NET_SKB_PAD; - if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) || + /* If requested length is either too small or too big, + * we use kmalloc() for skb->head allocation. + */ + if (len <= SKB_WITH_OVERHEAD(1024) || + len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); if (!skb) -- GitLab From dbd50f238decfe58d2eac4980681a1e62a35c5b5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 15 Jan 2021 17:36:38 +0800 Subject: [PATCH 0965/2012] net: move the hsize check to the else block in skb_segment After commit 89319d3801d1 ("net: Add frag_list support to skb_segment"), it goes to process frag_list when !hsize in skb_segment(). However, when using skb frag_list, sg normally should not be set. In this case, hsize will be set with len right before !hsize check, then it won't go to frag_list processing code. So the right thing to do is move the hsize check to the else block, so that it won't affect the !hsize check for frag_list processing. v1->v2: - change to do "hsize <= 0" check instead of "!hsize", and also move "hsize < 0" into else block, to save some cycles, as Alex suggested. Signed-off-by: Xin Long Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 60390696e184e..e835193cabcc3 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3894,12 +3894,8 @@ normal: } hsize = skb_headlen(head_skb) - offset; - if (hsize < 0) - hsize = 0; - if (hsize > len || !sg) - hsize = len; - if (!hsize && i >= nfrags && skb_headlen(list_skb) && + if (hsize <= 0 && i >= nfrags && skb_headlen(list_skb) && (skb_headlen(list_skb) == len || sg)) { BUG_ON(skb_headlen(list_skb) > len); @@ -3942,6 +3938,11 @@ normal: skb_release_head_state(nskb); __skb_push(nskb, doffset); } else { + if (hsize > len || !sg) + hsize = len; + else if (hsize < 0) + hsize = 0; + nskb = __alloc_skb(hsize + doffset + headroom, GFP_ATOMIC, skb_alloc_rx_flag(head_skb), NUMA_NO_NODE); -- GitLab From 1fef8544bf41fb77d6603feb9a31cd76b4578489 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 15 Jan 2021 17:36:39 +0800 Subject: [PATCH 0966/2012] sctp: remove the NETIF_F_SG flag before calling skb_segment It makes more sense to clear NETIF_F_SG instead of set it when calling skb_segment() in sctp_gso_segment(), since SCTP GSO is using head_skb's fraglist, of which all frags are linear skbs. This will make SCTP GSO code more understandable. Suggested-by: Alexander Duyck Signed-off-by: Xin Long Signed-off-by: Jakub Kicinski --- net/sctp/offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/offload.c b/net/sctp/offload.c index ce281a9a28752..eb874e3c399a5 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -68,7 +68,7 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb, goto out; } - segs = skb_segment(skb, features | NETIF_F_HW_CSUM | NETIF_F_SG); + segs = skb_segment(skb, (features | NETIF_F_HW_CSUM) & ~NETIF_F_SG); if (IS_ERR(segs)) goto out; -- GitLab From 0b5cd6c32b14413bf87e10ee62be3162588dcbe6 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 17 Jan 2021 02:29:56 +0000 Subject: [PATCH 0967/2012] io_uring: fix skipping disabling sqo on exec If there are no requests at the time __io_uring_task_cancel() is called, tctx_inflight() returns zero and and it terminates not getting a chance to go through __io_uring_files_cancel() and do io_disable_sqo_submit(). And we absolutely want them disabled by the time cancellation ends. Reported-by: Jens Axboe Signed-off-by: Pavel Begunkov Fixes: d9d05217cb69 ("io_uring: stop SQPOLL submit on creator's death") Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index d494c4269fc5e..383ff6ed37342 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9085,6 +9085,10 @@ void __io_uring_task_cancel(void) /* make sure overflow events are dropped */ atomic_inc(&tctx->in_idle); + /* trigger io_disable_sqo_submit() */ + if (tctx->sqpoll) + __io_uring_files_cancel(NULL); + do { /* read completions before cancelations */ inflight = tctx_inflight(tctx); -- GitLab From bc1c2048abbe3c3074b4de91d213595c57741a6b Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Tue, 12 Jan 2021 12:22:25 +0200 Subject: [PATCH 0968/2012] i2c: bpmp-tegra: Ignore unknown I2C_M flags In order to not to start returning errors when new I2C_M flags are added, change behavior to just ignore all flags that we don't know about. This includes the I2C_M_DMA_SAFE flag that already exists but causes -EINVAL to be returned for valid transactions. Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Mikko Perttunen Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra-bpmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-tegra-bpmp.c b/drivers/i2c/busses/i2c-tegra-bpmp.c index ec7a7e917eddb..c0c7d01473f2b 100644 --- a/drivers/i2c/busses/i2c-tegra-bpmp.c +++ b/drivers/i2c/busses/i2c-tegra-bpmp.c @@ -80,7 +80,7 @@ static int tegra_bpmp_xlate_flags(u16 flags, u16 *out) flags &= ~I2C_M_RECV_LEN; } - return (flags != 0) ? -EINVAL : 0; + return 0; } /** -- GitLab From 2f3a0828d46166d4e7df227479ed31766ee67e4a Mon Sep 17 00:00:00 2001 From: Sowjanya Komatineni Date: Tue, 12 Jan 2021 11:02:41 -0800 Subject: [PATCH 0969/2012] i2c: tegra: Create i2c_writesl_vi() to use with VI I2C for filling TX FIFO VI I2C controller has known hardware bug where immediate multiple writes to TX_FIFO register gets stuck. Recommended software work around is to read I2C register after each write to TX_FIFO register to flush out the data. This patch implements this work around for VI I2C controller. Signed-off-by: Sowjanya Komatineni Reviewed-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-tegra.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 0727383f49402..8b113ae32dc71 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -326,6 +326,8 @@ static void i2c_writel(struct tegra_i2c_dev *i2c_dev, u32 val, unsigned int reg) /* read back register to make sure that register writes completed */ if (reg != I2C_TX_FIFO) readl_relaxed(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg)); + else if (i2c_dev->is_vi) + readl_relaxed(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, I2C_INT_STATUS)); } static u32 i2c_readl(struct tegra_i2c_dev *i2c_dev, unsigned int reg) @@ -339,6 +341,21 @@ static void i2c_writesl(struct tegra_i2c_dev *i2c_dev, void *data, writesl(i2c_dev->base + tegra_i2c_reg_addr(i2c_dev, reg), data, len); } +static void i2c_writesl_vi(struct tegra_i2c_dev *i2c_dev, void *data, + unsigned int reg, unsigned int len) +{ + u32 *data32 = data; + + /* + * VI I2C controller has known hardware bug where writes get stuck + * when immediate multiple writes happen to TX_FIFO register. + * Recommended software work around is to read I2C register after + * each write to TX_FIFO register to flush out the data. + */ + while (len--) + i2c_writel(i2c_dev, *data32++, reg); +} + static void i2c_readsl(struct tegra_i2c_dev *i2c_dev, void *data, unsigned int reg, unsigned int len) { @@ -811,7 +828,10 @@ static int tegra_i2c_fill_tx_fifo(struct tegra_i2c_dev *i2c_dev) i2c_dev->msg_buf_remaining = buf_remaining; i2c_dev->msg_buf = buf + words_to_transfer * BYTES_PER_FIFO_WORD; - i2c_writesl(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer); + if (i2c_dev->is_vi) + i2c_writesl_vi(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer); + else + i2c_writesl(i2c_dev, buf, I2C_TX_FIFO, words_to_transfer); buf += words_to_transfer * BYTES_PER_FIFO_WORD; } -- GitLab From 1b2cfa2d1dbdcc3b6dba1ecb7026a537a1d7277f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 9 Jan 2021 13:43:08 +0100 Subject: [PATCH 0970/2012] i2c: octeon: check correct size of maximum RECV_LEN packet I2C_SMBUS_BLOCK_MAX defines already the maximum number as defined in the SMBus 2.0 specs. No reason to add one to it. Fixes: 886f6f8337dd ("i2c: octeon: Support I2C_M_RECV_LEN") Signed-off-by: Wolfram Sang Reviewed-by: Robert Richter Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-octeon-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-octeon-core.c b/drivers/i2c/busses/i2c-octeon-core.c index d9607905dc2f1..845eda70b8cab 100644 --- a/drivers/i2c/busses/i2c-octeon-core.c +++ b/drivers/i2c/busses/i2c-octeon-core.c @@ -347,7 +347,7 @@ static int octeon_i2c_read(struct octeon_i2c *i2c, int target, if (result) return result; if (recv_len && i == 0) { - if (data[i] > I2C_SMBUS_BLOCK_MAX + 1) + if (data[i] > I2C_SMBUS_BLOCK_MAX) return -EPROTO; length += data[i]; } -- GitLab From 32c2bc8f2d855d4415c9a05b727e34649397bfbe Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 18 Dec 2020 18:35:50 +0200 Subject: [PATCH 0971/2012] ia64: fix build failure caused by memory model changes The change of ia64's default memory model to SPARSEMEM causes defconfig build to fail: CC kernel/async.o In file included from include/linux/numa.h:25, from include/linux/async.h:13, from kernel/async.c:47: arch/ia64/include/asm/sparsemem.h:14:40: warning: "PAGE_SHIFT" is not defined, evaluates to 0 [-Wundef] 14 | #if ((CONFIG_FORCE_MAX_ZONEORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS) | ^~~~~~~~~~ In file included from include/linux/gfp.h:6, from include/linux/xarray.h:14, from include/linux/radix-tree.h:19, from include/linux/idr.h:15, from include/linux/kernfs.h:13, from include/linux/sysfs.h:16, from include/linux/kobject.h:20, from include/linux/energy_model.h:7, from include/linux/device.h:16, from include/linux/async.h:14, from kernel/async.c:47: include/linux/mmzone.h:1156:2: error: #error Allocator MAX_ORDER exceeds SECTION_SIZE 1156 | #error Allocator MAX_ORDER exceeds SECTION_SIZE | ^~~~~ The error cause is the missing definition of PAGE_SHIFT in the calculation of SECTION_SIZE_BITS. Add include of to arch/ia64/include/asm/sparsemem.h to solve the problem. Fixes: 214496cb1870 ("ia64: make SPARSEMEM default and disable DISCONTIGMEM") Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Mike Rapoport --- arch/ia64/include/asm/sparsemem.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/ia64/include/asm/sparsemem.h b/arch/ia64/include/asm/sparsemem.h index dd8c166ffd7b5..42ed5248fae98 100644 --- a/arch/ia64/include/asm/sparsemem.h +++ b/arch/ia64/include/asm/sparsemem.h @@ -3,6 +3,7 @@ #define _ASM_IA64_SPARSEMEM_H #ifdef CONFIG_SPARSEMEM +#include /* * SECTION_SIZE_BITS 2^N: how big each section will be * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space -- GitLab From fff7b5e6ee63c5d20406a131b260c619cdd24fd1 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Sat, 16 Jan 2021 14:31:36 -0800 Subject: [PATCH 0972/2012] x86/hyperv: Initialize clockevents after LAPIC is initialized With commit 4df4cb9e99f8, the Hyper-V direct-mode STIMER is actually initialized before LAPIC is initialized: see apic_intr_mode_init() x86_platform.apic_post_init() hyperv_init() hv_stimer_alloc() apic_bsp_setup() setup_local_APIC() setup_local_APIC() temporarily disables LAPIC, initializes it and re-eanble it. The direct-mode STIMER depends on LAPIC, and when it's registered, it can be programmed immediately and the timer can fire very soon: hv_stimer_init clockevents_config_and_register clockevents_register_device tick_check_new_device tick_setup_device tick_setup_periodic(), tick_setup_oneshot() clockevents_program_event When the timer fires in the hypervisor, if the LAPIC is in the disabled state, new versions of Hyper-V ignore the event and don't inject the timer interrupt into the VM, and hence the VM hangs when it boots. Note: when the VM starts/reboots, the LAPIC is pre-enabled by the firmware, so the window of LAPIC being temporarily disabled is pretty small, and the issue can only happen once out of 100~200 reboots for a 40-vCPU VM on one dev host, and on another host the issue doesn't reproduce after 2000 reboots. The issue is more noticeable for kdump/kexec, because the LAPIC is disabled by the first kernel, and stays disabled until the kdump/kexec kernel enables it. This is especially an issue to a Generation-2 VM (for which Hyper-V doesn't emulate the PIT timer) when CONFIG_HZ=1000 (rather than CONFIG_HZ=250) is used. Fix the issue by moving hv_stimer_alloc() to a later place where the LAPIC timer is initialized. Fixes: 4df4cb9e99f8 ("x86/hyperv: Initialize clockevents earlier in CPU onlining") Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20210116223136.13892-1-decui@microsoft.com Signed-off-by: Wei Liu --- arch/x86/hyperv/hv_init.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 4638a52d8eaea..6375967a8244d 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -315,6 +315,25 @@ static struct syscore_ops hv_syscore_ops = { .resume = hv_resume, }; +static void (* __initdata old_setup_percpu_clockev)(void); + +static void __init hv_stimer_setup_percpu_clockev(void) +{ + /* + * Ignore any errors in setting up stimer clockevents + * as we can run with the LAPIC timer as a fallback. + */ + (void)hv_stimer_alloc(); + + /* + * Still register the LAPIC timer, because the direct-mode STIMER is + * not supported by old versions of Hyper-V. This also allows users + * to switch to LAPIC timer via /sys, if they want to. + */ + if (old_setup_percpu_clockev) + old_setup_percpu_clockev(); +} + /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. @@ -393,10 +412,14 @@ void __init hyperv_init(void) wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); /* - * Ignore any errors in setting up stimer clockevents - * as we can run with the LAPIC timer as a fallback. + * hyperv_init() is called before LAPIC is initialized: see + * apic_intr_mode_init() -> x86_platform.apic_post_init() and + * apic_bsp_setup() -> setup_local_APIC(). The direct-mode STIMER + * depends on LAPIC, so hv_stimer_alloc() should be called from + * x86_init.timers.setup_percpu_clockev. */ - (void)hv_stimer_alloc(); + old_setup_percpu_clockev = x86_init.timers.setup_percpu_clockev; + x86_init.timers.setup_percpu_clockev = hv_stimer_setup_percpu_clockev; hv_apic_init(); -- GitLab From feb889fb40fafc6933339cf1cca8f770126819fb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 16 Jan 2021 15:34:57 -0800 Subject: [PATCH 0973/2012] mm: don't put pinned pages into the swap cache So technically there is nothing wrong with adding a pinned page to the swap cache, but the pinning obviously means that the page can't actually be free'd right now anyway, so it's a bit pointless. However, the real problem is not with it being a bit pointless: the real issue is that after we've added it to the swap cache, we'll try to unmap the page. That will succeed, because the code in mm/rmap.c doesn't know or care about pinned pages. Even the unmapping isn't fatal per se, since the page will stay around in memory due to the pinning, and we do hold the connection to it using the swap cache. But when we then touch it next and take a page fault, the logic in do_swap_page() will map it back into the process as a possibly read-only page, and we'll then break the page association on the next COW fault. Honestly, this issue could have been fixed in any of those other places: (a) we could refuse to unmap a pinned page (which makes conceptual sense), or (b) we could make sure to re-map a pinned page writably in do_swap_page(), or (c) we could just make do_wp_page() not COW the pinned page (which was what we historically did before that "mm: do_wp_page() simplification" commit). But while all of them are equally valid models for breaking this chain, not putting pinned pages into the swap cache in the first place is the simplest one by far. It's also the safest one: the reason why do_wp_page() was changed in the first place was that getting the "can I re-use this page" wrong is so fraught with errors. If you do it wrong, you end up with an incorrectly shared page. As a result, using "page_maybe_dma_pinned()" in either do_wp_page() or do_swap_page() would be a serious bug since it is only a (very good) heuristic. Re-using the page requires a hard black-and-white rule with no room for ambiguity. In contrast, saying "this page is very likely dma pinned, so let's not add it to the swap cache and try to unmap it" is an obviously safe thing to do, and if the heuristic might very rarely be a false positive, no harm is done. Fixes: 09854ba94c6a ("mm: do_wp_page() simplification") Reported-and-tested-by: Martin Raiber Cc: Pavel Begunkov Cc: Jens Axboe Cc: Peter Xu Signed-off-by: Linus Torvalds --- mm/vmscan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/vmscan.c b/mm/vmscan.c index 257cba79a96dd..b1b574ad199d2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1238,6 +1238,8 @@ static unsigned int shrink_page_list(struct list_head *page_list, if (!PageSwapCache(page)) { if (!(sc->gfp_mask & __GFP_IO)) goto keep_locked; + if (page_maybe_dma_pinned(page)) + goto keep_locked; if (PageTransHuge(page)) { /* cannot split THP, skip it */ if (!can_split_huge_page(page, NULL)) -- GitLab From 19c329f6808995b142b3966301f217c831e7cf31 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 17 Jan 2021 16:37:05 -0800 Subject: [PATCH 0974/2012] Linux 5.11-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9e73f82e0d863..b0e4767735dca 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- GitLab From 2be449fcf38ff7e44cf76a2bba1376e923637eb1 Mon Sep 17 00:00:00 2001 From: Jiapeng Zhong Date: Thu, 14 Jan 2021 17:09:20 +0800 Subject: [PATCH 0975/2012] fs/cifs: Assign boolean values to a bool variable Fix the following coccicheck warnings: ./fs/cifs/connect.c:3386:2-21: WARNING: Assignment of 0/1 to bool variable. Reported-by: Abaci Robot Signed-off-by: Jiapeng Zhong Signed-off-by: Steve French --- fs/cifs/connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5d39129406ea6..7c3325c0fadc8 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2195,7 +2195,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) if (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING) tcon->nohandlecache = ctx->nohandlecache; else - tcon->nohandlecache = 1; + tcon->nohandlecache = true; tcon->nodelete = ctx->nodelete; tcon->local_lease = ctx->local_lease; INIT_LIST_HEAD(&tcon->pending_opens); -- GitLab From 16a78851e1f52eaed7034b75707d3662b4b13b77 Mon Sep 17 00:00:00 2001 From: Jiapeng Zhong Date: Thu, 14 Jan 2021 18:02:23 +0800 Subject: [PATCH 0976/2012] fs/cifs: Simplify bool comparison. Fix the follow warnings: ./fs/cifs/connect.c: WARNING: Comparison of 0/1 to bool variable Reported-by: Abaci Robot Signed-off-by: Jiapeng Zhong Signed-off-by: Steve French --- fs/cifs/connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7c3325c0fadc8..c8ef24bac94fc 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2628,7 +2628,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, } else if (ctx) tcon->unix_ext = 1; /* Unix Extensions supported */ - if (tcon->unix_ext == 0) { + if (!tcon->unix_ext) { cifs_dbg(FYI, "Unix extensions disabled so not set on reconnect\n"); return; } -- GitLab From bd9dcef67ffcae2de49e319fba349df76472fd10 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 15 Jan 2021 11:11:23 -0800 Subject: [PATCH 0977/2012] x86/xen: fix 'nopvspin' build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix build error in x86/xen/ when PARAVIRT_SPINLOCKS is not enabled. Fixes this build error: ../arch/x86/xen/smp_hvm.c: In function ‘xen_hvm_smp_init’: ../arch/x86/xen/smp_hvm.c:77:3: error: ‘nopvspin’ undeclared (first use in this function) nopvspin = true; Fixes: 3d7746bea925 ("x86/xen: Fix xen_hvm_smp_init() when vector callback not available") Signed-off-by: Randy Dunlap Reviewed-by: Juergen Gross Cc: David Woodhouse Cc: Juergen Gross Link: https://lore.kernel.org/r/20210115191123.27572-1-rdunlap@infradead.org Signed-off-by: Juergen Gross --- arch/x86/xen/smp_hvm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index 056430a1080bb..6ff3c887e0b99 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -74,7 +74,9 @@ void __init xen_hvm_smp_init(void) smp_ops.cpu_die = xen_hvm_cpu_die; if (!xen_have_vector_callback) { +#ifdef CONFIG_PARAVIRT_SPINLOCKS nopvspin = true; +#endif return; } -- GitLab From eb363edace688898956b99e48daa8d7e05cee795 Mon Sep 17 00:00:00 2001 From: Bruno Thomsen Date: Mon, 11 Jan 2021 16:15:37 +0100 Subject: [PATCH 0978/2012] ARM: dts: imx7d-flex-concentrator: fix pcf2127 reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RTC pcf2127 device driver has changed default behaviour of the watchdog feature in v5.11-rc1. Now you need to explicitly enable it with a device tree property, "reset-source", when used in the board design. Fixes: 71ac13457d9d ("rtc: pcf2127: only use watchdog when explicitly available") Signed-off-by: Bruno Thomsen Cc: Bruno Thomsen Cc: Uwe Kleine-König Cc: Rasmus Villemoes Reviewed-by: Alexandre Belloni Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7d-flex-concentrator.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/imx7d-flex-concentrator.dts b/arch/arm/boot/dts/imx7d-flex-concentrator.dts index 84b095279e656..bd6b5285aa8d2 100644 --- a/arch/arm/boot/dts/imx7d-flex-concentrator.dts +++ b/arch/arm/boot/dts/imx7d-flex-concentrator.dts @@ -115,6 +115,7 @@ compatible = "nxp,pcf2127"; reg = <0>; spi-max-frequency = <2000000>; + reset-source; }; }; -- GitLab From a88afa46b86ff461c89cc33fc3a45267fff053e8 Mon Sep 17 00:00:00 2001 From: Max Krummenacher Date: Mon, 11 Jan 2021 16:17:04 +0100 Subject: [PATCH 0979/2012] ARM: imx: build suspend-imx6.S with arm instruction set When the kernel is configured to use the Thumb-2 instruction set "suspend-to-memory" fails to resume. Observed on a Colibri iMX6ULL (i.MX 6ULL) and Apalis iMX6 (i.MX 6Q). It looks like the CPU resumes unconditionally in ARM instruction mode and then chokes on the presented Thumb-2 code it should execute. Fix this by using the arm instruction set for all code in suspend-imx6.S. Signed-off-by: Max Krummenacher Fixes: df595746fa69 ("ARM: imx: add suspend in ocram support for i.mx6q") Acked-by: Oleksandr Suvorov Signed-off-by: Shawn Guo --- arch/arm/mach-imx/suspend-imx6.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S index 1eabf2d2834be..e06f946b75b96 100644 --- a/arch/arm/mach-imx/suspend-imx6.S +++ b/arch/arm/mach-imx/suspend-imx6.S @@ -67,6 +67,7 @@ #define MX6Q_CCM_CCR 0x0 .align 3 + .arm .macro sync_l2_cache -- GitLab From 2cc0bfc9c12784188482a8f3d751d44af45b0d97 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 14 Jan 2021 10:53:06 +0000 Subject: [PATCH 0980/2012] ARM: dts: imx6qdl-sr-som: fix some cubox-i platforms The PHY address bit 2 is configured by the LED pin. Attaching a LED to this pin is not sufficient to guarantee this configuration pin is correctly read. This leads to some platforms having their PHY at address 0 and others at address 4. If there is no phy-handle specified, the FEC driver will scan the PHY bus for a PHY and use that. Consequently, adding the DT configuration of the PHY and the phy properties to the FEC driver broke some boards. Fix this by removing the phy-handle property, and listing two PHY entries for both possible PHY addresses, so that the DT configuration for the PHY can be found by the PHY driver. Fixes: 86b08bd5b994 ("ARM: dts: imx6-sr-som: add ethernet PHY configuration") Reported-by: Christoph Mattheis Signed-off-by: Russell King Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-sr-som.dtsi | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi index b06577808ff4e..7e4e5fd0143a1 100644 --- a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi @@ -53,7 +53,6 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_microsom_enet_ar8035>; - phy-handle = <&phy>; phy-mode = "rgmii-id"; phy-reset-duration = <2>; phy-reset-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>; @@ -63,10 +62,19 @@ #address-cells = <1>; #size-cells = <0>; - phy: ethernet-phy@0 { + /* + * The PHY can appear at either address 0 or 4 due to the + * configuration (LED) pin not being pulled sufficiently. + */ + ethernet-phy@0 { reg = <0>; qca,clk-out-frequency = <125000000>; }; + + ethernet-phy@4 { + reg = <4>; + qca,clk-out-frequency = <125000000>; + }; }; }; -- GitLab From b764eb65e1c932f0500b30fcc06417cd9bc3e583 Mon Sep 17 00:00:00 2001 From: Jacky Bai Date: Fri, 15 Jan 2021 09:18:05 +0800 Subject: [PATCH 0981/2012] arm64: dts: imx8mp: Correct the gpio ranges of gpio3 On i.MX8MP, The GPIO3's secondary gpio-ranges's 'gpio controller offset' cell value should be 26, so correct it. Signed-off-by: Jacky Bai Fixes: 6d9b8d20431f ("arm64: dts: freescale: Add i.MX8MP dtsi support") Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index ecccfbb4f5ad6..23f5a5e37167b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -266,7 +266,7 @@ #gpio-cells = <2>; interrupt-controller; #interrupt-cells = <2>; - gpio-ranges = <&iomuxc 0 56 26>, <&iomuxc 0 144 4>; + gpio-ranges = <&iomuxc 0 56 26>, <&iomuxc 26 144 4>; }; gpio4: gpio@30230000 { -- GitLab From 82c082784e03a9a9c043345f9bc04bc8254cf6da Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 15 Jan 2021 19:32:59 -0800 Subject: [PATCH 0982/2012] firmware: imx: select SOC_BUS to fix firmware build Fix build error in firmware/imx/ selecting SOC_BUS. riscv32-linux-ld: drivers/firmware/imx/imx-scu-soc.o: in function `.L9': imx-scu-soc.c:(.text+0x1b0): undefined reference to `soc_device_register' Fixes: edbee095fafb ("firmware: imx: add SCU firmware driver support") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Atish Patra Cc: Palmer Dabbelt Cc: Ard Biesheuvel Cc: Anson Huang Cc: Daniel Baluta Reviewed-by: Dong Aisheng Signed-off-by: Shawn Guo --- drivers/firmware/imx/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/imx/Kconfig b/drivers/firmware/imx/Kconfig index 1d2e5b85d7ca8..c027d99f2a599 100644 --- a/drivers/firmware/imx/Kconfig +++ b/drivers/firmware/imx/Kconfig @@ -13,6 +13,7 @@ config IMX_DSP config IMX_SCU bool "IMX SCU Protocol driver" depends on IMX_MBOX + select SOC_BUS help The System Controller Firmware (SCFW) is a low-level system function which runs on a dedicated Cortex-M core to provide power, clock, and -- GitLab From b3c95d0bdb0855b1f28370629e9eebec6bceac17 Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Mon, 11 Jan 2021 13:41:02 +0530 Subject: [PATCH 0983/2012] drm/i915/hdcp: Update CP property in update_pipe When crtc state need_modeset is true it is not necessary it is going to be a real modeset, it can turns to be a fastset instead of modeset. This turns content protection property to be DESIRED and hdcp update_pipe left with property to be in DESIRED state but actual hdcp->value was ENABLED. This issue is caught with DP MST setup, where we have multiple connector in same DP_MST topology. When disabling HDCP on one of DP MST connector leads to set the crtc state need_modeset to true for all other crtc driving the other DP-MST topology connectors. This turns up other DP MST connectors CP property to be DESIRED despite the actual hdcp->value is ENABLED. Above scenario fails the DP MST HDCP IGT test, disabling HDCP on one MST stream should not cause to disable HDCP on another MST stream on same DP MST topology. v2: - Fixed connector->base.registration_state == DRM_CONNECTOR_REGISTERED WARN_ON. v3: - Commit log improvement. [Uma] - Added a comment before scheduling prop_work. [Uma] Fixes: 33f9a623bfc6 ("drm/i915/hdcp: Update CP as per the kernel internal state") Cc: Ramalingam C Reviewed-by: Uma Shankar Reviewed-by: Ramalingam C Tested-by: Karthik B S Signed-off-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20210111081120.28417-2-anshuman.gupta@intel.com (cherry picked from commit d276e16702e2d634094f75f69df3b493f359fe31) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_hdcp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index b2a4bbcfdcd25..eee8263405b9f 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -2221,6 +2221,14 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state, desired_and_not_enabled = hdcp->value != DRM_MODE_CONTENT_PROTECTION_ENABLED; mutex_unlock(&hdcp->mutex); + /* + * If HDCP already ENABLED and CP property is DESIRED, schedule + * prop_work to update correct CP property to user space. + */ + if (!desired_and_not_enabled && !content_protection_type_changed) { + drm_connector_get(&connector->base); + schedule_work(&hdcp->prop_work); + } } if (desired_and_not_enabled || content_protection_type_changed) -- GitLab From 8662e1119a7d1baa1b2001689b2923e9050754bd Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Mon, 11 Jan 2021 13:41:03 +0530 Subject: [PATCH 0984/2012] drm/i915/hdcp: Get conn while content_type changed Get DRM connector reference count while scheduling a prop work to avoid any possible destroy of DRM connector when it is in DRM_CONNECTOR_REGISTERED state. Fixes: a6597faa2d59 ("drm/i915: Protect workers against disappearing connectors") Cc: Sean Paul Cc: Ramalingam C Reviewed-by: Uma Shankar Reviewed-by: Ramalingam C Tested-by: Karthik B S Signed-off-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20210111081120.28417-3-anshuman.gupta@intel.com (cherry picked from commit b3c6661aad979ec3d4f5675cf3e6a35828607d6a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_hdcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index eee8263405b9f..b9d8825e2bb12 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -2210,6 +2210,7 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state, if (content_protection_type_changed) { mutex_lock(&hdcp->mutex); hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED; + drm_connector_get(&connector->base); schedule_work(&hdcp->prop_work); mutex_unlock(&hdcp->mutex); } -- GitLab From bf9eee249ac2032521677dd74e31ede5429afbc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 13 Jan 2021 14:02:04 +0100 Subject: [PATCH 0985/2012] drm/ttm: stop using GFP_TRANSHUGE_LIGHT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only flag we really need is __GFP_NOMEMALLOC, highmem depends on dma32 and moveable/compound should never be set in the first place. Signed-off-by: Christian König Link: https://patchwork.freedesktop.org/patch/413812/ Link: https://patchwork.freedesktop.org/patch/413964/ Fixes: d099fc8f540a ("drm/ttm: new TT backend allocation pool v3") Reported-by: Hans de Goede Reviewed-by: Daniel Vetter --- drivers/gpu/drm/ttm/ttm_pool.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 8cd776adc592b..11e0313db0ea6 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -79,12 +79,13 @@ static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, struct page *p; void *vaddr; - if (order) { - gfp_flags |= GFP_TRANSHUGE_LIGHT | __GFP_NORETRY | + /* Don't set the __GFP_COMP flag for higher order allocations. + * Mapping pages directly into an userspace process and calling + * put_page() on a TTM allocated page is illegal. + */ + if (order) + gfp_flags |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_KSWAPD_RECLAIM; - gfp_flags &= ~__GFP_MOVABLE; - gfp_flags &= ~__GFP_COMP; - } if (!pool->use_dma_alloc) { p = alloc_pages(gfp_flags, order); -- GitLab From 87cb9af9f8a2b242cea7f828206d619e8cbb6a1a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 18 Jan 2021 08:58:14 +0100 Subject: [PATCH 0986/2012] ALSA: usb-audio: Fix UAC1 rate setup for secondary endpoints The current sample rate setup function for UAC1 assumes only the first endpoint retrieved from the interface:altset pair, but the rate set up may be needed also for the secondary endpoint. Also, retrieving the endpoint number from the interface descriptor is redundant; we have already the target endpoint in the given audioformat object. This patch simplifies the code and corrects the target endpoint as described in the above. It simply refers to fmt->endpoint directly. Also, this patch drops the pioneer_djm_set_format_quirk() that is caleld from snd_usb_set_format_quirk(); this function does the sample rate setup but for the capture endpoint (0x82), and that's exactly what the change above fixes. Link: https://lore.kernel.org/r/20210118075816.25068-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/clock.c | 21 ++++++--------------- sound/usb/quirks.c | 28 ---------------------------- 2 files changed, 6 insertions(+), 43 deletions(-) diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 31051f2be46da..dc68ed65e4787 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -485,18 +485,9 @@ static int set_sample_rate_v1(struct snd_usb_audio *chip, const struct audioformat *fmt, int rate) { struct usb_device *dev = chip->dev; - struct usb_host_interface *alts; - unsigned int ep; unsigned char data[3]; int err, crate; - alts = snd_usb_get_host_interface(chip, fmt->iface, fmt->altsetting); - if (!alts) - return -EINVAL; - if (get_iface_desc(alts)->bNumEndpoints < 1) - return -EINVAL; - ep = get_endpoint(alts, 0)->bEndpointAddress; - /* if endpoint doesn't have sampling rate control, bail out */ if (!(fmt->attributes & UAC_EP_CS_ATTR_SAMPLE_RATE)) return 0; @@ -506,11 +497,11 @@ static int set_sample_rate_v1(struct snd_usb_audio *chip, data[2] = rate >> 16; err = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), UAC_SET_CUR, USB_TYPE_CLASS | USB_RECIP_ENDPOINT | USB_DIR_OUT, - UAC_EP_CS_ATTR_SAMPLE_RATE << 8, ep, - data, sizeof(data)); + UAC_EP_CS_ATTR_SAMPLE_RATE << 8, + fmt->endpoint, data, sizeof(data)); if (err < 0) { dev_err(&dev->dev, "%d:%d: cannot set freq %d to ep %#x\n", - fmt->iface, fmt->altsetting, rate, ep); + fmt->iface, fmt->altsetting, rate, fmt->endpoint); return err; } @@ -524,11 +515,11 @@ static int set_sample_rate_v1(struct snd_usb_audio *chip, err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC_GET_CUR, USB_TYPE_CLASS | USB_RECIP_ENDPOINT | USB_DIR_IN, - UAC_EP_CS_ATTR_SAMPLE_RATE << 8, ep, - data, sizeof(data)); + UAC_EP_CS_ATTR_SAMPLE_RATE << 8, + fmt->endpoint, data, sizeof(data)); if (err < 0) { dev_err(&dev->dev, "%d:%d: cannot get freq at ep %#x\n", - fmt->iface, fmt->altsetting, ep); + fmt->iface, fmt->altsetting, fmt->endpoint); chip->sample_rate_read_error++; return 0; /* some devices don't support reading */ } diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 89e172642d98b..e196e364cef19 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1470,30 +1470,6 @@ static void set_format_emu_quirk(struct snd_usb_substream *subs, subs->pkt_offset_adj = (emu_samplerate_id >= EMU_QUIRK_SR_176400HZ) ? 4 : 0; } - -/* - * Pioneer DJ DJM-900NXS2 - * Device needs to know the sample rate each time substream is started - */ -static int pioneer_djm_set_format_quirk(struct snd_usb_substream *subs) -{ - unsigned int cur_rate = subs->data_endpoint->cur_rate; - /* Convert sample rate value to little endian */ - u8 sr[3]; - - sr[0] = cur_rate & 0xff; - sr[1] = (cur_rate >> 8) & 0xff; - sr[2] = (cur_rate >> 16) & 0xff; - - /* Configure device */ - usb_set_interface(subs->dev, 0, 1); - snd_usb_ctl_msg(subs->stream->chip->dev, - usb_rcvctrlpipe(subs->stream->chip->dev, 0), - 0x01, 0x22, 0x0100, 0x0082, &sr, 0x0003); - - return 0; -} - void snd_usb_set_format_quirk(struct snd_usb_substream *subs, const struct audioformat *fmt) { @@ -1504,10 +1480,6 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs, case USB_ID(0x041e, 0x3f19): /* E-Mu 0204 USB */ set_format_emu_quirk(subs, fmt); break; - case USB_ID(0x2b73, 0x000a): /* Pioneer DJ DJM-900NXS2 */ - case USB_ID(0x2b73, 0x0017): /* Pioneer DJ DJM-250MK2 */ - pioneer_djm_set_format_quirk(subs); - break; case USB_ID(0x534d, 0x2109): /* MacroSilicon MS2109 */ subs->stream_offset_adj = 2; break; -- GitLab From 3784d449d795ba11a92681bd22d183329f976421 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 18 Jan 2021 08:58:15 +0100 Subject: [PATCH 0987/2012] ALSA: usb-audio: Set sample rate for all sharing EPs on UAC1 The UAC2/3 sample rate setup is based on the clock node, which is usually shared in the interface, and can't be re-setup without deselecting the interface once, and that's how the current code behaves. OTOH, the sample rate setup of UAC1 is per endpoint, hence we basically need to call for each endpoint usage even if those share the same interface. This patch fixes the behavior of UAC1 to call always snd_usb_init_sample_rate() in snd_usb_endpoint_configure(). Fixes: bf6313a0ff76 ("ALSA: usb-audio: Refactor endpoint management") Link: https://lore.kernel.org/r/20210118075816.25068-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/endpoint.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index fe73fe3ff2bca..8e568823c9924 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -1252,6 +1252,15 @@ int snd_usb_endpoint_configure(struct snd_usb_audio *chip, /* If the interface has been already set up, just set EP parameters */ if (!ep->iface_ref->need_setup) { + /* sample rate setup of UAC1 is per endpoint, and we need + * to update at each EP configuration + */ + if (ep->cur_audiofmt->protocol == UAC_VERSION_1) { + err = snd_usb_init_sample_rate(chip, ep->cur_audiofmt, + ep->cur_rate); + if (err < 0) + goto unlock; + } err = snd_usb_endpoint_set_params(chip, ep); if (err < 0) goto unlock; -- GitLab From 532a208ad61018b586cebfca8431291fe9c10ce7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 18 Jan 2021 08:58:16 +0100 Subject: [PATCH 0988/2012] ALSA: usb-audio: Avoid implicit feedback on Pioneer devices For addressing the regression on Pioneer devices, we recently corrected the quirk code to enable the implicit feedback mode on those devices properly. However, the devices still showed problems with the full duplex operations with JACK, and after debug sessions, we figured out that the older kernels that had worked with JACK also didn't use the implicit feedback mode at all although they had the quirk code to enable it; instead, the old code worked just to skip the normal sync endpoint setup that would have been detected without it. IOW, what broke without the implicit-fb quirk in the past was the application of the normal sync endpoint that is actually the capture data endpoint on these devices. This patch covers the overseen piece: it modifies the quirk code again not to enable the implicit feedback mode but just to make the driver skipping the sync endpoint detection. This made the driver working with JACK full-duplex mode again. Still it's not quite clear why the implicit feedback doesn't work on those devices yet; maybe it's about some issues in the URB setup. But at least, with this patch, the driver should work in the level of the older kernels again. Fixes: 167c9dc84ec3 ("ALSA: usb-audio: Fix implicit feedback sync setup for Pioneer devices") Link: https://lore.kernel.org/r/20210118075816.25068-4-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/implicit.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 1ac2cc6c33fb1..521cc846d9d9f 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -175,11 +175,13 @@ static int add_roland_implicit_fb(struct snd_usb_audio *chip, ifnum, alts); } -/* Pioneer devices: playback and capture streams sharing the same iface/altset +/* Playback and capture EPs on Pioneer devices share the same iface/altset, + * but they don't seem working with the implicit fb mode well, hence we + * just return as if the sync were already set up. */ -static int add_pioneer_implicit_fb(struct snd_usb_audio *chip, - struct audioformat *fmt, - struct usb_host_interface *alts) +static int skip_pioneer_sync_ep(struct snd_usb_audio *chip, + struct audioformat *fmt, + struct usb_host_interface *alts) { struct usb_endpoint_descriptor *epd; @@ -194,8 +196,7 @@ static int add_pioneer_implicit_fb(struct snd_usb_audio *chip, (epd->bmAttributes & USB_ENDPOINT_USAGE_MASK) != USB_ENDPOINT_USAGE_IMPLICIT_FB)) return 0; - return add_implicit_fb_sync_ep(chip, fmt, epd->bEndpointAddress, 1, - alts->desc.bInterfaceNumber, alts); + return 1; /* don't handle with the implicit fb, just skip sync EP */ } static int __add_generic_implicit_fb(struct snd_usb_audio *chip, @@ -298,11 +299,11 @@ static int audioformat_implicit_fb_quirk(struct snd_usb_audio *chip, return 1; } - /* Pioneer devices implicit feedback with vendor spec class */ + /* Pioneer devices with vendor spec class */ if (attr == USB_ENDPOINT_SYNC_ASYNC && alts->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC && USB_ID_VENDOR(chip->usb_id) == 0x2b73 /* Pioneer */) { - if (add_pioneer_implicit_fb(chip, fmt, alts)) + if (skip_pioneer_sync_ep(chip, fmt, alts)) return 1; } -- GitLab From 2fe7c2f99440d52613e1cf845c96e8e463c28111 Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Thu, 7 Jan 2021 12:00:19 +0800 Subject: [PATCH 0989/2012] tools: gpio: fix %llu warning in gpio-event-mon.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some platforms, such as mips64, don't map __u64 to long long unsigned int so using %llu produces a warning: gpio-event-mon.c:110:37: warning: format ‘%llu’ expects argument of type ‘long long unsigned int’, but argument 3 has type ‘__u64’ {aka ‘long unsigned int’} [-Wformat=] 110 | fprintf(stdout, "GPIO EVENT at %llu on line %d (%d|%d) ", | ~~~^ | | | long long unsigned int | %lu 111 | event.timestamp_ns, event.offset, event.line_seqno, | ~~~~~~~~~~~~~~~~~~ | | | __u64 {aka long unsigned int} Replace the %llu with PRIu64 and cast the argument to uint64_t. Fixes: 03fd11b03362 ("tools/gpio/gpio-event-mon: fix warning") Signed-off-by: Kent Gibson Signed-off-by: Bartosz Golaszewski --- tools/gpio/gpio-event-mon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index cacd66ad79261..a2b233fdb572e 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -107,8 +107,8 @@ int monitor_device(const char *device_name, ret = -EIO; break; } - fprintf(stdout, "GPIO EVENT at %llu on line %d (%d|%d) ", - event.timestamp_ns, event.offset, event.line_seqno, + fprintf(stdout, "GPIO EVENT at %" PRIu64 " on line %d (%d|%d) ", + (uint64_t)event.timestamp_ns, event.offset, event.line_seqno, event.seqno); switch (event.id) { case GPIO_V2_LINE_EVENT_RISING_EDGE: -- GitLab From 1fc7c1ef37f86f207b4db40aba57084bb2f6a69a Mon Sep 17 00:00:00 2001 From: Kent Gibson Date: Thu, 7 Jan 2021 12:00:20 +0800 Subject: [PATCH 0990/2012] tools: gpio: fix %llu warning in gpio-watch.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some platforms, such as mips64, don't map __u64 to long long unsigned int so using %llu produces a warning: gpio-watch.c: In function ‘main’: gpio-watch.c:89:30: warning: format ‘%llu’ expects argument of type ‘long long unsigned int’, but argument 4 has type ‘__u64’ {aka ‘long unsigned int’} [-Wformat=] 89 | printf("line %u: %s at %llu\n", | ~~~^ | | | long long unsigned int | %lu 90 | chg.info.offset, event, chg.timestamp_ns); | ~~~~~~~~~~~~~~~~ | | | __u64 {aka long unsigned int} Replace the %llu with PRIu64 and cast the argument to uint64_t. Fixes: 33f0c47b8fb4 ("tools: gpio: implement gpio-watch") Signed-off-by: Kent Gibson Signed-off-by: Bartosz Golaszewski --- tools/gpio/gpio-watch.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/gpio/gpio-watch.c b/tools/gpio/gpio-watch.c index f229ec62301b7..41e76d2441922 100644 --- a/tools/gpio/gpio-watch.c +++ b/tools/gpio/gpio-watch.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -86,8 +87,8 @@ int main(int argc, char **argv) return EXIT_FAILURE; } - printf("line %u: %s at %llu\n", - chg.info.offset, event, chg.timestamp_ns); + printf("line %u: %s at %" PRIu64 "\n", + chg.info.offset, event, (uint64_t)chg.timestamp_ns); } } -- GitLab From 33c74535b03ecf11359de14bc88302595b1de44f Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Fri, 15 Jan 2021 20:12:09 +0100 Subject: [PATCH 0991/2012] drm/vc4: Unify PCM card's driver_name User-space ALSA matches a card's driver name against an internal list of aliases in order to select the correct configuration for the system. When the driver name isn't defined, the match is performed against the card's name. With the introduction of RPi4 we now have two HDMI ports with two distinct audio cards. This is reflected in their names, making them different from previous RPi versions. With this, ALSA ultimately misses the board's configuration on RPi4. In order to avoid this, set "card->driver_name" to "vc4-hdmi" unanimously. Signed-off-by: Nicolas Saenz Julienne Fixes: f437bc1ec731 ("drm/vc4: drv: Support BCM2711") Reviewed-by: Takashi Iwai Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20210115191209.12852-1-nsaenzjulienne@suse.de --- drivers/gpu/drm/vc4/vc4_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 5551062205782..98cab0bbe92d8 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -1267,6 +1267,7 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) card->dai_link = dai_link; card->num_links = 1; card->name = vc4_hdmi->variant->card_name; + card->driver_name = "vc4-hdmi"; card->dev = dev; card->owner = THIS_MODULE; -- GitLab From 488751a0ef9b5ce572c47301ce62d54fc6b5a74d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 18 Jan 2021 09:53:32 +0000 Subject: [PATCH 0992/2012] drm/i915/gt: Prevent use of engine->wa_ctx after error On error we unpin and free the wa_ctx.vma, but do not clear any of the derived flags. During lrc_init, we look at the flags and attempt to dereference the wa_ctx.vma if they are set. To protect the error path where we try to limp along without the wa_ctx, make sure we clear those flags! Reported-by: Matt Roper Fixes: 604a8f6f1e33 ("drm/i915/lrc: Only enable per-context and per-bb buffers if set") Signed-off-by: Chris Wilson Cc: Matt Roper Cc: Tvrtko Ursulin Cc: Mika Kuoppala Cc: # v4.15+ Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210108204026.20682-1-chris@chris-wilson.co.uk (cherry-picked from 5b4dc95cf7f573e927fbbd406ebe54225d41b9b2) Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210118095332.458813-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 7614a3d24fca5..26c7d0a50585a 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3988,6 +3988,9 @@ err: static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) { i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); + + /* Called on error unwind, clear all flags to prevent further use */ + memset(&engine->wa_ctx, 0, sizeof(engine->wa_ctx)); } typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); -- GitLab From 171a8e99828144050015672016dd63494c6d200a Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 18 Jan 2021 10:07:24 +0000 Subject: [PATCH 0993/2012] drm/i915/pmu: Don't grab wakeref when enabling events Chris found a CI report which points out calling intel_runtime_pm_get from inside i915_pmu_enable hook is not allowed since it can be invoked from hard irq context. This is something we knew but forgot, so lets fix it once again. We do this by syncing the internal book keeping with hardware rc6 counter on driver load. v2: * Always sync on parking and fully sync on init. Signed-off-by: Tvrtko Ursulin Fixes: f4e9894b6952 ("drm/i915/pmu: Correct the rc6 offset upon enabling") Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20201214094349.3563876-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit dbe13ae1d6abaab417edf3c37601c6a56594a4cd) Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210118100724.465555-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_pmu.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index d76685ce03998..9856479b56d8b 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -184,13 +184,24 @@ static u64 get_rc6(struct intel_gt *gt) return val; } -static void park_rc6(struct drm_i915_private *i915) +static void init_rc6(struct i915_pmu *pmu) { - struct i915_pmu *pmu = &i915->pmu; + struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); + intel_wakeref_t wakeref; - if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY)) + with_intel_runtime_pm(i915->gt.uncore->rpm, wakeref) { pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); + pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = + pmu->sample[__I915_SAMPLE_RC6].cur; + pmu->sleep_last = ktime_get(); + } +} +static void park_rc6(struct drm_i915_private *i915) +{ + struct i915_pmu *pmu = &i915->pmu; + + pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); pmu->sleep_last = ktime_get(); } @@ -201,6 +212,7 @@ static u64 get_rc6(struct intel_gt *gt) return __get_rc6(gt); } +static void init_rc6(struct i915_pmu *pmu) { } static void park_rc6(struct drm_i915_private *i915) {} #endif @@ -612,10 +624,8 @@ static void i915_pmu_enable(struct perf_event *event) container_of(event->pmu, typeof(*i915), pmu.base); unsigned int bit = event_enabled_bit(event); struct i915_pmu *pmu = &i915->pmu; - intel_wakeref_t wakeref; unsigned long flags; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); spin_lock_irqsave(&pmu->lock, flags); /* @@ -626,13 +636,6 @@ static void i915_pmu_enable(struct perf_event *event) GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); GEM_BUG_ON(pmu->enable_count[bit] == ~0); - if (pmu->enable_count[bit] == 0 && - config_enabled_mask(I915_PMU_RC6_RESIDENCY) & BIT_ULL(bit)) { - pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = 0; - pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); - pmu->sleep_last = ktime_get(); - } - pmu->enable |= BIT_ULL(bit); pmu->enable_count[bit]++; @@ -673,8 +676,6 @@ static void i915_pmu_enable(struct perf_event *event) * an existing non-zero value. */ local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); - - intel_runtime_pm_put(&i915->runtime_pm, wakeref); } static void i915_pmu_disable(struct perf_event *event) @@ -1130,6 +1131,7 @@ void i915_pmu_register(struct drm_i915_private *i915) hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample; pmu->cpuhp.cpu = -1; + init_rc6(pmu); if (!is_igp(i915)) { pmu->name = kasprintf(GFP_KERNEL, -- GitLab From 45db630e5f7ec83817c57c8ae387fe219bd42adf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 18 Jan 2021 10:17:55 +0000 Subject: [PATCH 0994/2012] drm/i915: Check for rq->hwsp validity after acquiring RCU lock Since we allow removing the timeline map at runtime, there is a risk that rq->hwsp points into a stale page. To control that risk, we hold the RCU read lock while reading *rq->hwsp, but we missed a couple of important barriers. First, the unpinning / removal of the timeline map must be after all RCU readers into that map are complete, i.e. after an rcu barrier (in this case courtesy of call_rcu()). Secondly, we must make sure that the rq->hwsp we are about to dereference under the RCU lock is valid. In this case, we make the rq->hwsp pointer safe during i915_request_retire() and so we know that rq->hwsp may become invalid only after the request has been signaled. Therefore is the request is not yet signaled when we acquire rq->hwsp under the RCU, we know that rq->hwsp will remain valid for the duration of the RCU read lock. This is a very small window that may lead to either considering the request not completed (causing a delay until the request is checked again, any wait for the request is not affected) or dereferencing an invalid pointer. Fixes: 3adac4689f58 ("drm/i915: Introduce concept of per-timeline (context) HWSP") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v5.1+ Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20201218122421.18344-1-chris@chris-wilson.co.uk (cherry picked from commit 9bb36cf66091ddf2d8840e5aa705ad3c93a6279b) Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210118101755.476744-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 9 ++--- drivers/gpu/drm/i915/gt/intel_timeline.c | 10 +++--- drivers/gpu/drm/i915/i915_request.h | 37 ++++++++++++++++++--- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index a24cc1ff08a0c..0625cbb3b4312 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b, return true; } -static inline bool __request_completed(const struct i915_request *rq) -{ - return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno); -} - __maybe_unused static bool check_signal_order(struct intel_context *ce, struct i915_request *rq) { @@ -257,7 +252,7 @@ static void signal_irq_work(struct irq_work *work) list_for_each_entry_rcu(rq, &ce->signals, signal_link) { bool release; - if (!__request_completed(rq)) + if (!__i915_request_is_complete(rq)) break; if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, @@ -379,7 +374,7 @@ static void insert_breadcrumb(struct i915_request *rq) * straight onto a signaled list, and queue the irq worker for * its signal completion. */ - if (__request_completed(rq)) { + if (__i915_request_is_complete(rq)) { if (__signal_request(rq) && llist_add(&rq->signal_node, &b->signaled_requests)) irq_work_queue(&b->irq_work); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 7ea94d201fe6f..8015964043eb7 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu) struct intel_timeline_cacheline *cl = container_of(rcu, typeof(*cl), rcu); + /* Must wait until after all *rq->hwsp are complete before removing */ + i915_gem_object_unpin_map(cl->hwsp->vma->obj); + __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); + i915_active_fini(&cl->active); kfree(cl); } @@ -133,11 +137,6 @@ static void __rcu_cacheline_free(struct rcu_head *rcu) static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) { GEM_BUG_ON(!i915_active_is_idle(&cl->active)); - - i915_gem_object_unpin_map(cl->hwsp->vma->obj); - i915_vma_put(cl->hwsp->vma); - __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); - call_rcu(&cl->rcu, __rcu_cacheline_free); } @@ -179,7 +178,6 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) return ERR_CAST(vaddr); } - i915_vma_get(hwsp->vma); cl->hwsp = hwsp; cl->vaddr = page_pack_bits(vaddr, cacheline); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 620b6fab2c5cf..92adfee30c7c0 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -434,7 +434,7 @@ static inline u32 hwsp_seqno(const struct i915_request *rq) static inline bool __i915_request_has_started(const struct i915_request *rq) { - return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1); + return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1); } /** @@ -465,11 +465,19 @@ static inline bool __i915_request_has_started(const struct i915_request *rq) */ static inline bool i915_request_started(const struct i915_request *rq) { + bool result; + if (i915_request_signaled(rq)) return true; - /* Remember: started but may have since been preempted! */ - return __i915_request_has_started(rq); + result = true; + rcu_read_lock(); /* the HWSP may be freed at runtime */ + if (likely(!i915_request_signaled(rq))) + /* Remember: started but may have since been preempted! */ + result = __i915_request_has_started(rq); + rcu_read_unlock(); + + return result; } /** @@ -482,10 +490,16 @@ static inline bool i915_request_started(const struct i915_request *rq) */ static inline bool i915_request_is_running(const struct i915_request *rq) { + bool result; + if (!i915_request_is_active(rq)) return false; - return __i915_request_has_started(rq); + rcu_read_lock(); + result = __i915_request_has_started(rq) && i915_request_is_active(rq); + rcu_read_unlock(); + + return result; } /** @@ -509,12 +523,25 @@ static inline bool i915_request_is_ready(const struct i915_request *rq) return !list_empty(&rq->sched.link); } +static inline bool __i915_request_is_complete(const struct i915_request *rq) +{ + return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno); +} + static inline bool i915_request_completed(const struct i915_request *rq) { + bool result; + if (i915_request_signaled(rq)) return true; - return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno); + result = true; + rcu_read_lock(); /* the HWSP may be freed at runtime */ + if (likely(!i915_request_signaled(rq))) + result = __i915_request_is_complete(rq); + rcu_read_unlock(); + + return result; } static inline void i915_request_mark_complete(struct i915_request *rq) -- GitLab From 5cdc4a6950a883594e9640b1decb3fcf6222a594 Mon Sep 17 00:00:00 2001 From: lianzhi chang Date: Thu, 14 Jan 2021 15:57:41 +0800 Subject: [PATCH 0995/2012] udf: fix the problem that the disc content is not displayed When the capacity of the disc is too large (assuming the 4.7G specification), the disc (UDF file system) will be burned multiple times in the windows (Multisession Usage). When the remaining capacity of the CD is less than 300M (estimated value, for reference only), open the CD in the Linux system, the content of the CD is displayed as blank (the kernel will say "No VRS found"). Windows can display the contents of the CD normally. Through analysis, in the "fs/udf/super.c": udf_check_vsd function, the actual value of VSD_MAX_SECTOR_OFFSET may be much larger than 0x800000. According to the current code logic, it is found that the type of sbi->s_session is "__s32", when the remaining capacity of the disc is less than 300M (take a set of test values: sector=3154903040, sbi->s_session=1540464, sb->s_blocksize_bits=11 ), the calculation result of "sbi->s_session << sb->s_blocksize_bits" will overflow. Therefore, it is necessary to convert the type of s_session to "loff_t" (when udf_check_vsd starts, assign a value to _sector, which is also converted in this way), so that the result will not overflow, and then the content of the disc can be displayed normally. Link: https://lore.kernel.org/r/20210114075741.30448-1-changlianzhi@uniontech.com Signed-off-by: lianzhi chang Signed-off-by: Jan Kara --- fs/udf/super.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/udf/super.c b/fs/udf/super.c index 5bef3a68395d8..d0df217f4712a 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -705,6 +705,7 @@ static int udf_check_vsd(struct super_block *sb) struct buffer_head *bh = NULL; int nsr = 0; struct udf_sb_info *sbi; + loff_t session_offset; sbi = UDF_SB(sb); if (sb->s_blocksize < sizeof(struct volStructDesc)) @@ -712,7 +713,8 @@ static int udf_check_vsd(struct super_block *sb) else sectorsize = sb->s_blocksize; - sector += (((loff_t)sbi->s_session) << sb->s_blocksize_bits); + session_offset = (loff_t)sbi->s_session << sb->s_blocksize_bits; + sector += session_offset; udf_debug("Starting at sector %u (%lu byte sectors)\n", (unsigned int)(sector >> sb->s_blocksize_bits), @@ -757,8 +759,7 @@ static int udf_check_vsd(struct super_block *sb) if (nsr > 0) return 1; - else if (!bh && sector - (sbi->s_session << sb->s_blocksize_bits) == - VSD_FIRST_SECTOR_OFFSET) + else if (!bh && sector - session_offset == VSD_FIRST_SECTOR_OFFSET) return -1; else return 0; -- GitLab From a8939f2e138e418c2b059056ff5b501eaf2eae54 Mon Sep 17 00:00:00 2001 From: James Schulman Date: Fri, 15 Jan 2021 14:11:05 -0600 Subject: [PATCH 0996/2012] ASoC: wm_adsp: Fix control name parsing for multi-fw When switching between firmware types, the wrong control can be selected when requesting control in kernel API. Use the currently selected DSP firwmare type to select the proper mixer control. Signed-off-by: James Schulman Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20210115201105.14075-1-james.schulman@cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index dec8716aa8ef5..985b2dcecf138 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -2031,11 +2031,14 @@ static struct wm_coeff_ctl *wm_adsp_get_ctl(struct wm_adsp *dsp, unsigned int alg) { struct wm_coeff_ctl *pos, *rslt = NULL; + const char *fw_txt = wm_adsp_fw_text[dsp->fw]; list_for_each_entry(pos, &dsp->ctl_list, list) { if (!pos->subname) continue; if (strncmp(pos->subname, name, pos->subname_len) == 0 && + strncmp(pos->fw_name, fw_txt, + SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == 0 && pos->alg_region.alg == alg && pos->alg_region.type == type) { rslt = pos; -- GitLab From e36626bb099e5159a7868dbfad6957ff6b0e4102 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sat, 16 Jan 2021 02:34:03 +0100 Subject: [PATCH 0997/2012] ASoC: dt-bindings: mt8192-mt6359: Fix indentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The items of the 'maintainers' list are indented with three spaces. Use the usual two spaces instead, for consistency and to silence yamllint. Signed-off-by: Jonathan Neuschäfer Link: https://lore.kernel.org/r/20210116013403.3490518-1-j.neuschaefer@gmx.net Signed-off-by: Mark Brown --- .../bindings/sound/mt8192-mt6359-rt1015-rt5682.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/mt8192-mt6359-rt1015-rt5682.yaml b/Documentation/devicetree/bindings/sound/mt8192-mt6359-rt1015-rt5682.yaml index bf8c8ba25009d..54650823b29a4 100644 --- a/Documentation/devicetree/bindings/sound/mt8192-mt6359-rt1015-rt5682.yaml +++ b/Documentation/devicetree/bindings/sound/mt8192-mt6359-rt1015-rt5682.yaml @@ -7,8 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Mediatek MT8192 with MT6359, RT1015 and RT5682 ASoC sound card driver maintainers: - - Jiaxin Yu - - Shane Chien + - Jiaxin Yu + - Shane Chien description: This binding describes the MT8192 sound card. -- GitLab From 18d3bff411c8d46d40537483bdc0b61b33ce0371 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 16 Dec 2020 11:22:05 -0500 Subject: [PATCH 0998/2012] btrfs: don't get an EINTR during drop_snapshot for reloc This was partially fixed by f3e3d9cc3525 ("btrfs: avoid possible signal interruption of btrfs_drop_snapshot() on relocation tree"), however it missed a spot when we restart a trans handle because we need to end the transaction. The fix is the same, simply use btrfs_join_transaction() instead of btrfs_start_transaction() when deleting reloc roots. Fixes: f3e3d9cc3525 ("btrfs: avoid possible signal interruption of btrfs_drop_snapshot() on relocation tree") CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d79b8369e6aaf..30b1a630dc2f8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5549,7 +5549,15 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc) goto out_free; } - trans = btrfs_start_transaction(tree_root, 0); + /* + * Use join to avoid potential EINTR from transaction + * start. See wait_reserve_ticket and the whole + * reservation callchain. + */ + if (for_reloc) + trans = btrfs_join_transaction(tree_root); + else + trans = btrfs_start_transaction(tree_root, 0); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out_free; -- GitLab From 49ecc679ab48b40ca799bf94b327d5284eac9e46 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 16 Dec 2020 11:22:11 -0500 Subject: [PATCH 0999/2012] btrfs: do not double free backref nodes on error Zygo reported the following KASAN splat: BUG: KASAN: use-after-free in btrfs_backref_cleanup_node+0x18a/0x420 Read of size 8 at addr ffff888112402950 by task btrfs/28836 CPU: 0 PID: 28836 Comm: btrfs Tainted: G W 5.10.0-e35f27394290-for-next+ #23 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Call Trace: dump_stack+0xbc/0xf9 ? btrfs_backref_cleanup_node+0x18a/0x420 print_address_description.constprop.8+0x21/0x210 ? record_print_text.cold.34+0x11/0x11 ? btrfs_backref_cleanup_node+0x18a/0x420 ? btrfs_backref_cleanup_node+0x18a/0x420 kasan_report.cold.10+0x20/0x37 ? btrfs_backref_cleanup_node+0x18a/0x420 __asan_load8+0x69/0x90 btrfs_backref_cleanup_node+0x18a/0x420 btrfs_backref_release_cache+0x83/0x1b0 relocate_block_group+0x394/0x780 ? merge_reloc_roots+0x4a0/0x4a0 btrfs_relocate_block_group+0x26e/0x4c0 btrfs_relocate_chunk+0x52/0x120 btrfs_balance+0xe2e/0x1900 ? check_flags.part.50+0x6c/0x1e0 ? btrfs_relocate_chunk+0x120/0x120 ? kmem_cache_alloc_trace+0xa06/0xcb0 ? _copy_from_user+0x83/0xc0 btrfs_ioctl_balance+0x3a7/0x460 btrfs_ioctl+0x24c8/0x4360 ? __kasan_check_read+0x11/0x20 ? check_chain_key+0x1f4/0x2f0 ? __asan_loadN+0xf/0x20 ? btrfs_ioctl_get_supported_features+0x30/0x30 ? kvm_sched_clock_read+0x18/0x30 ? check_chain_key+0x1f4/0x2f0 ? lock_downgrade+0x3f0/0x3f0 ? handle_mm_fault+0xad6/0x2150 ? do_vfs_ioctl+0xfc/0x9d0 ? ioctl_file_clone+0xe0/0xe0 ? check_flags.part.50+0x6c/0x1e0 ? check_flags.part.50+0x6c/0x1e0 ? check_flags+0x26/0x30 ? lock_is_held_type+0xc3/0xf0 ? syscall_enter_from_user_mode+0x1b/0x60 ? do_syscall_64+0x13/0x80 ? rcu_read_lock_sched_held+0xa1/0xd0 ? __kasan_check_read+0x11/0x20 ? __fget_light+0xae/0x110 __x64_sys_ioctl+0xc3/0x100 do_syscall_64+0x37/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f4c4bdfe427 Allocated by task 28836: kasan_save_stack+0x21/0x50 __kasan_kmalloc.constprop.18+0xbe/0xd0 kasan_kmalloc+0x9/0x10 kmem_cache_alloc_trace+0x410/0xcb0 btrfs_backref_alloc_node+0x46/0xf0 btrfs_backref_add_tree_node+0x60d/0x11d0 build_backref_tree+0xc5/0x700 relocate_tree_blocks+0x2be/0xb90 relocate_block_group+0x2eb/0x780 btrfs_relocate_block_group+0x26e/0x4c0 btrfs_relocate_chunk+0x52/0x120 btrfs_balance+0xe2e/0x1900 btrfs_ioctl_balance+0x3a7/0x460 btrfs_ioctl+0x24c8/0x4360 __x64_sys_ioctl+0xc3/0x100 do_syscall_64+0x37/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 28836: kasan_save_stack+0x21/0x50 kasan_set_track+0x20/0x30 kasan_set_free_info+0x1f/0x30 __kasan_slab_free+0xf3/0x140 kasan_slab_free+0xe/0x10 kfree+0xde/0x200 btrfs_backref_error_cleanup+0x452/0x530 build_backref_tree+0x1a5/0x700 relocate_tree_blocks+0x2be/0xb90 relocate_block_group+0x2eb/0x780 btrfs_relocate_block_group+0x26e/0x4c0 btrfs_relocate_chunk+0x52/0x120 btrfs_balance+0xe2e/0x1900 btrfs_ioctl_balance+0x3a7/0x460 btrfs_ioctl+0x24c8/0x4360 __x64_sys_ioctl+0xc3/0x100 do_syscall_64+0x37/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xa9 This occurred because we freed our backref node in btrfs_backref_error_cleanup(), but then tried to free it again in btrfs_backref_release_cache(). This is because btrfs_backref_release_cache() will cycle through all of the cache->leaves nodes and free them up. However btrfs_backref_error_cleanup() freed the backref node with btrfs_backref_free_node(), which simply kfree()d the backref node without unlinking it from the cache. Change this to a btrfs_backref_drop_node(), which does the appropriate cleanup and removes the node from the cache->leaves list, so when we go to free the remaining cache we don't trip over items we've already dropped. Fixes: 75bfb9aff45e ("Btrfs: cleanup error handling in build_backref_tree") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/backref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 02d7d7b2563b5..9cadacf3ec275 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -3117,7 +3117,7 @@ void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache, list_del_init(&lower->list); if (lower == node) node = NULL; - btrfs_backref_free_node(cache, lower); + btrfs_backref_drop_node(cache, lower); } btrfs_backref_cleanup_node(cache, node); -- GitLab From fb286100974e7239af243bc2255a52f29442f9c8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 16 Dec 2020 11:22:14 -0500 Subject: [PATCH 1000/2012] btrfs: fix lockdep splat in btrfs_recover_relocation While testing the error paths of relocation I hit the following lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 5.10.0-rc6+ #217 Not tainted ------------------------------------------------------ mount/779 is trying to acquire lock: ffffa0e676945418 (&fs_info->balance_mutex){+.+.}-{3:3}, at: btrfs_recover_balance+0x2f0/0x340 but task is already holding lock: ffffa0e60ee31da8 (btrfs-root-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x27/0x100 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (btrfs-root-00){++++}-{3:3}: down_read_nested+0x43/0x130 __btrfs_tree_read_lock+0x27/0x100 btrfs_read_lock_root_node+0x31/0x40 btrfs_search_slot+0x462/0x8f0 btrfs_update_root+0x55/0x2b0 btrfs_drop_snapshot+0x398/0x750 clean_dirty_subvols+0xdf/0x120 btrfs_recover_relocation+0x534/0x5a0 btrfs_start_pre_rw_mount+0xcb/0x170 open_ctree+0x151f/0x1726 btrfs_mount_root.cold+0x12/0xea legacy_get_tree+0x30/0x50 vfs_get_tree+0x28/0xc0 vfs_kern_mount.part.0+0x71/0xb0 btrfs_mount+0x10d/0x380 legacy_get_tree+0x30/0x50 vfs_get_tree+0x28/0xc0 path_mount+0x433/0xc10 __x64_sys_mount+0xe3/0x120 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #1 (sb_internal#2){.+.+}-{0:0}: start_transaction+0x444/0x700 insert_balance_item.isra.0+0x37/0x320 btrfs_balance+0x354/0xf40 btrfs_ioctl_balance+0x2cf/0x380 __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #0 (&fs_info->balance_mutex){+.+.}-{3:3}: __lock_acquire+0x1120/0x1e10 lock_acquire+0x116/0x370 __mutex_lock+0x7e/0x7b0 btrfs_recover_balance+0x2f0/0x340 open_ctree+0x1095/0x1726 btrfs_mount_root.cold+0x12/0xea legacy_get_tree+0x30/0x50 vfs_get_tree+0x28/0xc0 vfs_kern_mount.part.0+0x71/0xb0 btrfs_mount+0x10d/0x380 legacy_get_tree+0x30/0x50 vfs_get_tree+0x28/0xc0 path_mount+0x433/0xc10 __x64_sys_mount+0xe3/0x120 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 other info that might help us debug this: Chain exists of: &fs_info->balance_mutex --> sb_internal#2 --> btrfs-root-00 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(btrfs-root-00); lock(sb_internal#2); lock(btrfs-root-00); lock(&fs_info->balance_mutex); *** DEADLOCK *** 2 locks held by mount/779: #0: ffffa0e60dc040e0 (&type->s_umount_key#47/1){+.+.}-{3:3}, at: alloc_super+0xb5/0x380 #1: ffffa0e60ee31da8 (btrfs-root-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x27/0x100 stack backtrace: CPU: 0 PID: 779 Comm: mount Not tainted 5.10.0-rc6+ #217 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 Call Trace: dump_stack+0x8b/0xb0 check_noncircular+0xcf/0xf0 ? trace_call_bpf+0x139/0x260 __lock_acquire+0x1120/0x1e10 lock_acquire+0x116/0x370 ? btrfs_recover_balance+0x2f0/0x340 __mutex_lock+0x7e/0x7b0 ? btrfs_recover_balance+0x2f0/0x340 ? btrfs_recover_balance+0x2f0/0x340 ? rcu_read_lock_sched_held+0x3f/0x80 ? kmem_cache_alloc_trace+0x2c4/0x2f0 ? btrfs_get_64+0x5e/0x100 btrfs_recover_balance+0x2f0/0x340 open_ctree+0x1095/0x1726 btrfs_mount_root.cold+0x12/0xea ? rcu_read_lock_sched_held+0x3f/0x80 legacy_get_tree+0x30/0x50 vfs_get_tree+0x28/0xc0 vfs_kern_mount.part.0+0x71/0xb0 btrfs_mount+0x10d/0x380 ? __kmalloc_track_caller+0x2f2/0x320 legacy_get_tree+0x30/0x50 vfs_get_tree+0x28/0xc0 ? capable+0x3a/0x60 path_mount+0x433/0xc10 __x64_sys_mount+0xe3/0x120 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 This is straightforward to fix, simply release the path before we setup the balance_ctl. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2c0aa03b64376..0c7f4f6237e8c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4318,6 +4318,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info) btrfs_warn(fs_info, "balance: cannot set exclusive op status, resume manually"); + btrfs_release_path(path); + mutex_lock(&fs_info->balance_mutex); BUG_ON(fs_info->balance_ctl); spin_lock(&fs_info->balance_lock); -- GitLab From 34d1eb0e599875064955a74712f08ff14c8e3d5f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 16 Dec 2020 11:22:17 -0500 Subject: [PATCH 1001/2012] btrfs: don't clear ret in btrfs_start_dirty_block_groups If we fail to update a block group item in the loop we'll break, however we'll do btrfs_run_delayed_refs and lose our error value in ret, and thus not clean up properly. Fix this by only running the delayed refs if there was no failure. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Qu Wenruo Reviewed-by: Johannes Thumshirn Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 52f2198d44c95..0886e81e55402 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2669,7 +2669,8 @@ again: * Go through delayed refs for all the stuff we've just kicked off * and then loop back (just once) */ - ret = btrfs_run_delayed_refs(trans, 0); + if (!ret) + ret = btrfs_run_delayed_refs(trans, 0); if (!ret && loops == 0) { loops++; spin_lock(&cur_trans->dirty_bgs_lock); -- GitLab From a82e537807d5c85706cd4c16fd2de77a8495dc8d Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 14 Jan 2021 19:16:21 -0800 Subject: [PATCH 1002/2012] pinctrl: qcom: Allow SoCs to specify a GPIO function that's not 0 There's currently a comment in the code saying function 0 is GPIO. Instead of hardcoding it, let's add a member where an SoC can specify it. No known SoCs use a number other than 0, but this just makes the code clearer. NOTE: no SoC code needs to be updated since we can rely on zero-initialization. Signed-off-by: Douglas Anderson Reviewed-by: Stephen Boyd Reviewed-by: Maulik Shah Tested-by: Maulik Shah Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210114191601.v7.1.I3ad184e3423d8e479bc3e86f5b393abb1704a1d1@changeid Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-msm.c | 3 +-- drivers/pinctrl/qcom/pinctrl-msm.h | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index e051aecf95c4e..d1261188fb6ee 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -210,8 +210,7 @@ static int msm_pinmux_request_gpio(struct pinctrl_dev *pctldev, if (!g->nfuncs) return 0; - /* For now assume function 0 is GPIO because it always is */ - return msm_pinmux_set_mux(pctldev, g->funcs[0], offset); + return msm_pinmux_set_mux(pctldev, g->funcs[pctrl->soc->gpio_func], offset); } static const struct pinmux_ops msm_pinmux_ops = { diff --git a/drivers/pinctrl/qcom/pinctrl-msm.h b/drivers/pinctrl/qcom/pinctrl-msm.h index 333f99243c43a..e31a5167c91ec 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.h +++ b/drivers/pinctrl/qcom/pinctrl-msm.h @@ -118,6 +118,7 @@ struct msm_gpio_wakeirq_map { * @wakeirq_dual_edge_errata: If true then GPIOs using the wakeirq_map need * to be aware that their parent can't handle dual * edge interrupts. + * @gpio_func: Which function number is GPIO (usually 0). */ struct msm_pinctrl_soc_data { const struct pinctrl_pin_desc *pins; @@ -134,6 +135,7 @@ struct msm_pinctrl_soc_data { const struct msm_gpio_wakeirq_map *wakeirq_map; unsigned int nwakeirq_map; bool wakeirq_dual_edge_errata; + unsigned int gpio_func; }; extern const struct dev_pm_ops msm_pinctrl_dev_pm_ops; -- GitLab From 4079d35fa4fca4ee0ffd66968312fc86a5e8c290 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 14 Jan 2021 19:16:22 -0800 Subject: [PATCH 1003/2012] pinctrl: qcom: No need to read-modify-write the interrupt status When the Qualcomm pinctrl driver wants to Ack an interrupt, it does a read-modify-write on the interrupt status register. On some SoCs it makes sure that the status bit is 1 to "Ack" and on others it makes sure that the bit is 0 to "Ack". Presumably the first type of interrupt controller is a "write 1 to clear" type register and the second just let you directly set the interrupt status register. As far as I can tell from scanning structure definitions, the interrupt status bit is always in a register by itself. Thus with both types of interrupt controllers it is safe to "Ack" interrupts without doing a read-modify-write. We can do a simple write. It should be noted that if the interrupt status bit _was_ ever in a register with other things (like maybe status bits for other GPIOs): a) For "write 1 clear" type controllers then read-modify-write would be totally wrong because we'd accidentally end up clearing interrupts we weren't looking at. b) For "direct set" type controllers then read-modify-write would also be wrong because someone setting one of the other bits in the register might accidentally clear (or set) our interrupt. I say this simply to show that the current read-modify-write doesn't provide any sort of "future proofing" of the code. In fact (for "write 1 clear" controllers) the new code is slightly more "future proof" since it would allow more than one interrupt status bits to share a register. NOTE: this code fixes no bugs--it simply avoids an extra register read. Signed-off-by: Douglas Anderson Reviewed-by: Maulik Shah Tested-by: Maulik Shah Reviewed-by: Stephen Boyd Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210114191601.v7.2.I3635de080604e1feda770591c5563bd6e63dd39d@changeid Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-msm.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index d1261188fb6ee..2f363c28d9d98 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -791,16 +791,13 @@ static void msm_gpio_irq_clear_unmask(struct irq_data *d, bool status_clear) raw_spin_lock_irqsave(&pctrl->lock, flags); - if (status_clear) { - /* - * clear the interrupt status bit before unmask to avoid - * any erroneous interrupts that would have got latched - * when the interrupt is not in use. - */ - val = msm_readl_intr_status(pctrl, g); - val &= ~BIT(g->intr_status_bit); - msm_writel_intr_status(val, pctrl, g); - } + /* + * clear the interrupt status bit before unmask to avoid + * any erroneous interrupts that would have got latched + * when the interrupt is not in use. + */ + if (status_clear) + msm_writel_intr_status(0, pctrl, g); val = msm_readl_intr_cfg(pctrl, g); val |= BIT(g->intr_raw_status_bit); @@ -905,11 +902,7 @@ static void msm_gpio_irq_ack(struct irq_data *d) raw_spin_lock_irqsave(&pctrl->lock, flags); - val = msm_readl_intr_status(pctrl, g); - if (g->intr_ack_high) - val |= BIT(g->intr_status_bit); - else - val &= ~BIT(g->intr_status_bit); + val = g->intr_ack_high ? BIT(g->intr_status_bit) : 0; msm_writel_intr_status(val, pctrl, g); if (test_bit(d->hwirq, pctrl->dual_edge_irqs)) -- GitLab From a95881d6aa2c000e3649f27a1a7329cf356e6bb3 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 14 Jan 2021 19:16:23 -0800 Subject: [PATCH 1004/2012] pinctrl: qcom: Properly clear "intr_ack_high" interrupts when unmasking In commit 4b7618fdc7e6 ("pinctrl: qcom: Add irq_enable callback for msm gpio") we tried to Ack interrupts during unmask. However, that patch forgot to check "intr_ack_high" so, presumably, it only worked for a certain subset of SoCs. Let's add a small accessor so we don't need to open-code the logic in both places. This was found by code inspection. I don't have any access to the hardware in question nor software that needs the Ack during unmask. Fixes: 4b7618fdc7e6 ("pinctrl: qcom: Add irq_enable callback for msm gpio") Signed-off-by: Douglas Anderson Reviewed-by: Maulik Shah Tested-by: Maulik Shah Reviewed-by: Stephen Boyd Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210114191601.v7.3.I32d0f4e174d45363b49ab611a13c3da8f1e87d0f@changeid Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-msm.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 2f363c28d9d98..192ed31eabf48 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -96,6 +96,14 @@ MSM_ACCESSOR(intr_cfg) MSM_ACCESSOR(intr_status) MSM_ACCESSOR(intr_target) +static void msm_ack_intr_status(struct msm_pinctrl *pctrl, + const struct msm_pingroup *g) +{ + u32 val = g->intr_ack_high ? BIT(g->intr_status_bit) : 0; + + msm_writel_intr_status(val, pctrl, g); +} + static int msm_get_groups_count(struct pinctrl_dev *pctldev) { struct msm_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); @@ -797,7 +805,7 @@ static void msm_gpio_irq_clear_unmask(struct irq_data *d, bool status_clear) * when the interrupt is not in use. */ if (status_clear) - msm_writel_intr_status(0, pctrl, g); + msm_ack_intr_status(pctrl, g); val = msm_readl_intr_cfg(pctrl, g); val |= BIT(g->intr_raw_status_bit); @@ -890,7 +898,6 @@ static void msm_gpio_irq_ack(struct irq_data *d) struct msm_pinctrl *pctrl = gpiochip_get_data(gc); const struct msm_pingroup *g; unsigned long flags; - u32 val; if (test_bit(d->hwirq, pctrl->skip_wake_irqs)) { if (test_bit(d->hwirq, pctrl->dual_edge_irqs)) @@ -902,8 +909,7 @@ static void msm_gpio_irq_ack(struct irq_data *d) raw_spin_lock_irqsave(&pctrl->lock, flags); - val = g->intr_ack_high ? BIT(g->intr_status_bit) : 0; - msm_writel_intr_status(val, pctrl, g); + msm_ack_intr_status(pctrl, g); if (test_bit(d->hwirq, pctrl->dual_edge_irqs)) msm_gpio_update_dual_edge_pos(pctrl, g, d); -- GitLab From cf9d052aa6005f1e8dfaf491d83bf37f368af69e Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Thu, 14 Jan 2021 19:16:24 -0800 Subject: [PATCH 1005/2012] pinctrl: qcom: Don't clear pending interrupts when enabling In Linux, if a driver does disable_irq() and later does enable_irq() on its interrupt, I believe it's expecting these properties: * If an interrupt was pending when the driver disabled then it will still be pending after the driver re-enables. * If an edge-triggered interrupt comes in while an interrupt is disabled it should assert when the interrupt is re-enabled. If you think that the above sounds a lot like the disable_irq() and enable_irq() are supposed to be masking/unmasking the interrupt instead of disabling/enabling it then you've made an astute observation. Specifically when talking about interrupts, "mask" usually means to stop posting interrupts but keep tracking them and "disable" means to fully shut off interrupt detection. It's unfortunate that this is so confusing, but presumably this is all the way it is for historical reasons. Perhaps more confusing than the above is that, even though clients of IRQs themselves don't have a way to request mask/unmask vs. disable/enable calls, IRQ chips themselves can implement both. ...and yet more confusing is that if an IRQ chip implements disable/enable then they will be called when a client driver calls disable_irq() / enable_irq(). It does feel like some of the above could be cleared up. However, without any other core interrupt changes it should be clear that when an IRQ chip gets a request to "disable" an IRQ that it has to treat it like a mask of that IRQ. In any case, after that long interlude you can see that the "unmask and clear" can break things. Maulik tried to fix it so that we no longer did "unmask and clear" in commit 71266d9d3936 ("pinctrl: qcom: Move clearing pending IRQ to .irq_request_resources callback"), but it only handled the PDC case and it had problems (it caused sc7180-trogdor devices to fail to suspend). Let's fix. >From my understanding the source of the phantom interrupt in the were these two things: 1. One that could have been introduced in msm_gpio_irq_set_type() (only for the non-PDC case). 2. Edges could have been detected when a GPIO was muxed away. Fixing case #1 is easy. We can just add a clear in msm_gpio_irq_set_type(). Fixing case #2 is harder. Let's use a concrete example. In sc7180-trogdor.dtsi we configure the uart3 to have two pinctrl states, sleep and default, and mux between the two during runtime PM and system suspend (see geni_se_resources_{on,off}() for more details). The difference between the sleep and default state is that the RX pin is muxed to a GPIO during sleep and muxed to the UART otherwise. As per Qualcomm, when we mux the pin over to the UART function the PDC (or the non-PDC interrupt detection logic) is still watching it / latching edges. These edges don't cause interrupts because the current code masks the interrupt unless we're entering suspend. However, as soon as we enter suspend we unmask the interrupt and it's counted as a wakeup. Let's deal with the problem like this: * When we mux away, we'll mask our interrupt. This isn't necessary in the above case since the client already masked us, but it's a good idea in general. * When we mux back will clear any interrupts and unmask. Fixes: 4b7618fdc7e6 ("pinctrl: qcom: Add irq_enable callback for msm gpio") Fixes: 71266d9d3936 ("pinctrl: qcom: Move clearing pending IRQ to .irq_request_resources callback") Signed-off-by: Douglas Anderson Reviewed-by: Maulik Shah Tested-by: Maulik Shah Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20210114191601.v7.4.I7cf3019783720feb57b958c95c2b684940264cd1@changeid Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-msm.c | 74 ++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 24 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 192ed31eabf48..d70caecd21d25 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -51,6 +51,7 @@ * @dual_edge_irqs: Bitmap of irqs that need sw emulated dual edge * detection. * @skip_wake_irqs: Skip IRQs that are handled by wakeup interrupt controller + * @disabled_for_mux: These IRQs were disabled because we muxed away. * @soc: Reference to soc_data of platform specific data. * @regs: Base addresses for the TLMM tiles. * @phys_base: Physical base address @@ -72,6 +73,7 @@ struct msm_pinctrl { DECLARE_BITMAP(dual_edge_irqs, MAX_NR_GPIO); DECLARE_BITMAP(enabled_irqs, MAX_NR_GPIO); DECLARE_BITMAP(skip_wake_irqs, MAX_NR_GPIO); + DECLARE_BITMAP(disabled_for_mux, MAX_NR_GPIO); const struct msm_pinctrl_soc_data *soc; void __iomem *regs[MAX_NR_TILES]; @@ -179,6 +181,10 @@ static int msm_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned group) { struct msm_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); + struct gpio_chip *gc = &pctrl->chip; + unsigned int irq = irq_find_mapping(gc->irq.domain, group); + struct irq_data *d = irq_get_irq_data(irq); + unsigned int gpio_func = pctrl->soc->gpio_func; const struct msm_pingroup *g; unsigned long flags; u32 val, mask; @@ -195,6 +201,20 @@ static int msm_pinmux_set_mux(struct pinctrl_dev *pctldev, if (WARN_ON(i == g->nfuncs)) return -EINVAL; + /* + * If an GPIO interrupt is setup on this pin then we need special + * handling. Specifically interrupt detection logic will still see + * the pin twiddle even when we're muxed away. + * + * When we see a pin with an interrupt setup on it then we'll disable + * (mask) interrupts on it when we mux away until we mux back. Note + * that disable_irq() refcounts and interrupts are disabled as long as + * at least one disable_irq() has been called. + */ + if (d && i != gpio_func && + !test_and_set_bit(d->hwirq, pctrl->disabled_for_mux)) + disable_irq(irq); + raw_spin_lock_irqsave(&pctrl->lock, flags); val = msm_readl_ctl(pctrl, g); @@ -204,6 +224,20 @@ static int msm_pinmux_set_mux(struct pinctrl_dev *pctldev, raw_spin_unlock_irqrestore(&pctrl->lock, flags); + if (d && i == gpio_func && + test_and_clear_bit(d->hwirq, pctrl->disabled_for_mux)) { + /* + * Clear interrupts detected while not GPIO since we only + * masked things. + */ + if (d->parent_data && test_bit(d->hwirq, pctrl->skip_wake_irqs)) + irq_chip_set_parent_state(d, IRQCHIP_STATE_PENDING, false); + else + msm_ack_intr_status(pctrl, g); + + enable_irq(irq); + } + return 0; } @@ -781,7 +815,7 @@ static void msm_gpio_irq_mask(struct irq_data *d) raw_spin_unlock_irqrestore(&pctrl->lock, flags); } -static void msm_gpio_irq_clear_unmask(struct irq_data *d, bool status_clear) +static void msm_gpio_irq_unmask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct msm_pinctrl *pctrl = gpiochip_get_data(gc); @@ -799,14 +833,6 @@ static void msm_gpio_irq_clear_unmask(struct irq_data *d, bool status_clear) raw_spin_lock_irqsave(&pctrl->lock, flags); - /* - * clear the interrupt status bit before unmask to avoid - * any erroneous interrupts that would have got latched - * when the interrupt is not in use. - */ - if (status_clear) - msm_ack_intr_status(pctrl, g); - val = msm_readl_intr_cfg(pctrl, g); val |= BIT(g->intr_raw_status_bit); val |= BIT(g->intr_enable_bit); @@ -826,7 +852,7 @@ static void msm_gpio_irq_enable(struct irq_data *d) irq_chip_enable_parent(d); if (!test_bit(d->hwirq, pctrl->skip_wake_irqs)) - msm_gpio_irq_clear_unmask(d, true); + msm_gpio_irq_unmask(d); } static void msm_gpio_irq_disable(struct irq_data *d) @@ -841,11 +867,6 @@ static void msm_gpio_irq_disable(struct irq_data *d) msm_gpio_irq_mask(d); } -static void msm_gpio_irq_unmask(struct irq_data *d) -{ - msm_gpio_irq_clear_unmask(d, false); -} - /** * msm_gpio_update_dual_edge_parent() - Prime next edge for IRQs handled by parent. * @d: The irq dta. @@ -934,6 +955,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) struct msm_pinctrl *pctrl = gpiochip_get_data(gc); const struct msm_pingroup *g; unsigned long flags; + bool was_enabled; u32 val; if (msm_gpio_needs_dual_edge_parent_workaround(d, type)) { @@ -995,6 +1017,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) * could cause the INTR_STATUS to be set for EDGE interrupts. */ val = msm_readl_intr_cfg(pctrl, g); + was_enabled = val & BIT(g->intr_raw_status_bit); val |= BIT(g->intr_raw_status_bit); if (g->intr_detection_width == 2) { val &= ~(3 << g->intr_detection_bit); @@ -1044,6 +1067,14 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) } msm_writel_intr_cfg(val, pctrl, g); + /* + * The first time we set RAW_STATUS_EN it could trigger an interrupt. + * Clear the interrupt. This is safe because we have + * IRQCHIP_SET_TYPE_MASKED. + */ + if (!was_enabled) + msm_ack_intr_status(pctrl, g); + if (test_bit(d->hwirq, pctrl->dual_edge_irqs)) msm_gpio_update_dual_edge_pos(pctrl, g, d); @@ -1097,16 +1128,11 @@ static int msm_gpio_irq_reqres(struct irq_data *d) } /* - * Clear the interrupt that may be pending before we enable - * the line. - * This is especially a problem with the GPIOs routed to the - * PDC. These GPIOs are direct-connect interrupts to the GIC. - * Disabling the interrupt line at the PDC does not prevent - * the interrupt from being latched at the GIC. The state at - * GIC needs to be cleared before enabling. + * The disable / clear-enable workaround we do in msm_pinmux_set_mux() + * only works if disable is not lazy since we only clear any bogus + * interrupt in hardware. Explicitly mark the interrupt as UNLAZY. */ - if (d->parent_data && test_bit(d->hwirq, pctrl->skip_wake_irqs)) - irq_chip_set_parent_state(d, IRQCHIP_STATE_PENDING, 0); + irq_set_status_flags(d->irq, IRQ_DISABLE_UNLAZY); return 0; out: -- GitLab From d24c790577ef01bfa01da2b131313a38c843a634 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 14 Jan 2021 18:10:52 +0100 Subject: [PATCH 1006/2012] mt7601u: fix rx buffer refcounting Fix the following crash due to erroneous page refcounting: [ 32.445919] BUG: Bad page state in process swapper/1 pfn:11f65a [ 32.447409] page:00000000938f0632 refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 pfn:0x11f65a [ 32.449605] flags: 0x8000000000000000() [ 32.450421] raw: 8000000000000000 ffffffff825b0148 ffffea00045ae988 0000000000000000 [ 32.451795] raw: 0000000000000000 0000000000000001 00000000ffffff7f 0000000000000000 [ 32.452999] page dumped because: nonzero mapcount [ 32.453888] Modules linked in: [ 32.454492] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.11.0-rc2+ #1976 [ 32.455695] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-1.fc33 04/01/2014 [ 32.457157] Call Trace: [ 32.457636] [ 32.457993] dump_stack+0x77/0x97 [ 32.458576] bad_page.cold+0x65/0x96 [ 32.459198] get_page_from_freelist+0x46a/0x11f0 [ 32.460008] __alloc_pages_nodemask+0x10a/0x2b0 [ 32.460794] mt7601u_rx_tasklet+0x651/0x720 [ 32.461505] tasklet_action_common.constprop.0+0x6b/0xd0 [ 32.462343] __do_softirq+0x152/0x46c [ 32.462928] asm_call_irq_on_stack+0x12/0x20 [ 32.463610] [ 32.463953] do_softirq_own_stack+0x5b/0x70 [ 32.464582] irq_exit_rcu+0x9f/0xe0 [ 32.465028] common_interrupt+0xae/0x1a0 [ 32.465536] asm_common_interrupt+0x1e/0x40 [ 32.466071] RIP: 0010:default_idle+0x18/0x20 [ 32.468981] RSP: 0018:ffffc90000077f00 EFLAGS: 00000246 [ 32.469648] RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 [ 32.470550] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff81aac3dd [ 32.471463] RBP: ffff88810022ab00 R08: 0000000000000001 R09: 0000000000000001 [ 32.472335] R10: 0000000000000046 R11: 0000000000005aa0 R12: 0000000000000000 [ 32.473235] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 32.474139] ? default_idle_call+0x4d/0x200 [ 32.474681] default_idle_call+0x74/0x200 [ 32.475192] do_idle+0x1d5/0x250 [ 32.475612] cpu_startup_entry+0x19/0x20 [ 32.476114] secondary_startup_64_no_verify+0xb0/0xbb [ 32.476765] Disabling lock debugging due to kernel taint Fixes: c869f77d6abb ("add mt7601u driver") Co-developed-by: Felix Fietkau Signed-off-by: Felix Fietkau Signed-off-by: Lorenzo Bianconi Acked-by: Jakub Kicinski Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/62b2380c8c2091834cfad05e1059b55f945bd114.1610643952.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt7601u/dma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c index 5f99054f535b4..47710da5b2a58 100644 --- a/drivers/net/wireless/mediatek/mt7601u/dma.c +++ b/drivers/net/wireless/mediatek/mt7601u/dma.c @@ -152,8 +152,7 @@ mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf_rx *e) if (new_p) { /* we have one extra ref from the allocator */ - __free_pages(e->p, MT_RX_ORDER); - + put_page(e->p); e->p = new_p; } } -- GitLab From 952de419b6179ad1424f512d52ec7122662fdf63 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 14 Jan 2021 18:26:47 +0100 Subject: [PATCH 1007/2012] mt76: mt7663s: fix rx buffer refcounting Similar to mt7601u driver, fix erroneous rx page refcounting Fixes: a66cbdd6573d ("mt76: mt7615: introduce mt7663s support") Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/dca19c9d445156201bc41f7cbb6e894bbc9a678c.1610644945.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c index 13d77f8fca866..9fb506f2ace6d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_txrx.c @@ -83,7 +83,7 @@ static int mt7663s_rx_run_queue(struct mt76_dev *dev, enum mt76_rxq_id qid, { struct mt76_queue *q = &dev->q_rx[qid]; struct mt76_sdio *sdio = &dev->sdio; - int len = 0, err, i, order; + int len = 0, err, i; struct page *page; u8 *buf; @@ -96,8 +96,7 @@ static int mt7663s_rx_run_queue(struct mt76_dev *dev, enum mt76_rxq_id qid, if (len > sdio->func->cur_blksize) len = roundup(len, sdio->func->cur_blksize); - order = get_order(len); - page = __dev_alloc_pages(GFP_KERNEL, order); + page = __dev_alloc_pages(GFP_KERNEL, get_order(len)); if (!page) return -ENOMEM; @@ -106,7 +105,7 @@ static int mt7663s_rx_run_queue(struct mt76_dev *dev, enum mt76_rxq_id qid, err = sdio_readsb(sdio->func, buf, MCR_WRDR(qid), len); if (err < 0) { dev_err(dev->dev, "sdio read data failed:%d\n", err); - __free_pages(page, order); + put_page(page); return err; } @@ -123,7 +122,7 @@ static int mt7663s_rx_run_queue(struct mt76_dev *dev, enum mt76_rxq_id qid, if (q->queued + i + 1 == q->ndesc) break; } - __free_pages(page, order); + put_page(page); spin_lock_bh(&q->lock); q->head = (q->head + i) % q->ndesc; -- GitLab From 4d6b1c95b974761c01cbad92321b82232b66d2a2 Mon Sep 17 00:00:00 2001 From: Revanth Rajashekar Date: Thu, 14 Jan 2021 18:55:07 -0700 Subject: [PATCH 1008/2012] nvme: check the PRINFO bit before deciding the host buffer length According to NVMe spec v1.4, section 8.3.1, the PRINFO bit and the metadata size play a vital role in deteriming the host buffer size. If PRIFNO bit is set and MS==8, the host doesn't add the metadata buffer, instead the controller adds it. Signed-off-by: Revanth Rajashekar Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 200bdd672c281..8caf9b34734dc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1543,8 +1543,21 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) } length = (io.nblocks + 1) << ns->lba_shift; - meta_len = (io.nblocks + 1) * ns->ms; - metadata = nvme_to_user_ptr(io.metadata); + + if ((io.control & NVME_RW_PRINFO_PRACT) && + ns->ms == sizeof(struct t10_pi_tuple)) { + /* + * Protection information is stripped/inserted by the + * controller. + */ + if (nvme_to_user_ptr(io.metadata)) + return -EINVAL; + meta_len = 0; + metadata = NULL; + } else { + meta_len = (io.nblocks + 1) * ns->ms; + metadata = nvme_to_user_ptr(io.metadata); + } if (ns->features & NVME_NS_EXT_LBAS) { length += meta_len; -- GitLab From 7674073b2ed35ac951a49c425dec6b39d5a57140 Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Thu, 14 Jan 2021 17:09:25 +0800 Subject: [PATCH 1009/2012] nvme-rdma: avoid request double completion for concurrent nvme_rdma_timeout A crash happens when inject completing request long time(nearly 30s). Each name space has a request queue, when inject completing request long time, multi request queues may have time out requests at the same time, nvme_rdma_timeout will execute concurrently. Multi requests in different request queues may be queued in the same rdma queue, multi nvme_rdma_timeout may call nvme_rdma_stop_queue at the same time. The first nvme_rdma_timeout will clear NVME_RDMA_Q_LIVE and continue stopping the rdma queue(drain qp), but the others check NVME_RDMA_Q_LIVE is already cleared, and then directly complete the requests, complete request before the qp is fully drained may lead to a use-after-free condition. Add a multex lock to serialize nvme_rdma_stop_queue. Signed-off-by: Chao Leng Tested-by: Israel Rukshin Reviewed-by: Israel Rukshin Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index cf6c49d09c820..b7ce4f221d990 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -97,6 +97,7 @@ struct nvme_rdma_queue { struct completion cm_done; bool pi_support; int cq_size; + struct mutex queue_lock; }; struct nvme_rdma_ctrl { @@ -579,6 +580,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, int ret; queue = &ctrl->queues[idx]; + mutex_init(&queue->queue_lock); queue->ctrl = ctrl; if (idx && ctrl->ctrl.max_integrity_segments) queue->pi_support = true; @@ -598,7 +600,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, if (IS_ERR(queue->cm_id)) { dev_info(ctrl->ctrl.device, "failed to create CM ID: %ld\n", PTR_ERR(queue->cm_id)); - return PTR_ERR(queue->cm_id); + ret = PTR_ERR(queue->cm_id); + goto out_destroy_mutex; } if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) @@ -628,6 +631,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, out_destroy_cm_id: rdma_destroy_id(queue->cm_id); nvme_rdma_destroy_queue_ib(queue); +out_destroy_mutex: + mutex_destroy(&queue->queue_lock); return ret; } @@ -639,9 +644,10 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) { - if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) - return; - __nvme_rdma_stop_queue(queue); + mutex_lock(&queue->queue_lock); + if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) + __nvme_rdma_stop_queue(queue); + mutex_unlock(&queue->queue_lock); } static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) @@ -651,6 +657,7 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); + mutex_destroy(&queue->queue_lock); } static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl) -- GitLab From 9ebbfe495ecd2e51bc92ac21ed5817c3b9e223ce Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Thu, 14 Jan 2021 17:09:26 +0800 Subject: [PATCH 1010/2012] nvme-tcp: avoid request double completion for concurrent nvme_tcp_timeout Each name space has a request queue, if complete request long time, multi request queues may have time out requests at the same time, nvme_tcp_timeout will execute concurrently. Multi requests in different request queues may be queued in the same tcp queue, multi nvme_tcp_timeout may call nvme_tcp_stop_queue at the same time. The first nvme_tcp_stop_queue will clear NVME_TCP_Q_LIVE and continue stopping the tcp queue(cancel io_work), but the others check NVME_TCP_Q_LIVE is already cleared, and then directly complete the requests, complete request before the io work is completely canceled may lead to a use-after-free condition. Add a multex lock to serialize nvme_tcp_stop_queue. Signed-off-by: Chao Leng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 216619926563e..881d28eb15e9d 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -76,6 +76,7 @@ struct nvme_tcp_queue { struct work_struct io_work; int io_cpu; + struct mutex queue_lock; struct mutex send_mutex; struct llist_head req_list; struct list_head send_list; @@ -1219,6 +1220,7 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) sock_release(queue->sock); kfree(queue->pdu); + mutex_destroy(&queue->queue_lock); } static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) @@ -1380,6 +1382,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, struct nvme_tcp_queue *queue = &ctrl->queues[qid]; int ret, rcv_pdu_size; + mutex_init(&queue->queue_lock); queue->ctrl = ctrl; init_llist_head(&queue->req_list); INIT_LIST_HEAD(&queue->send_list); @@ -1398,7 +1401,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, if (ret) { dev_err(nctrl->device, "failed to create socket: %d\n", ret); - return ret; + goto err_destroy_mutex; } /* Single syn retry */ @@ -1507,6 +1510,8 @@ err_crypto: err_sock: sock_release(queue->sock); queue->sock = NULL; +err_destroy_mutex: + mutex_destroy(&queue->queue_lock); return ret; } @@ -1534,9 +1539,10 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; - if (!test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) - return; - __nvme_tcp_stop_queue(queue); + mutex_lock(&queue->queue_lock); + if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) + __nvme_tcp_stop_queue(queue); + mutex_unlock(&queue->queue_lock); } static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) -- GitLab From 20d3bb92e84d417b0494a3b6867f0c86713db257 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Fri, 15 Jan 2021 07:30:46 +0100 Subject: [PATCH 1011/2012] nvme-pci: allow use of cmb on v1.4 controllers Since NVMe v1.4 the Controller Memory Buffer must be explicitly enabled by the host. Signed-off-by: Klaus Jensen [hch: avoid a local variable and add a comment] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 14 ++++++++++++++ include/linux/nvme.h | 6 ++++++ 2 files changed, 20 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 50d9a20568a28..25456d02eddb8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -1795,6 +1796,9 @@ static void nvme_map_cmb(struct nvme_dev *dev) if (dev->cmb_size) return; + if (NVME_CAP_CMBS(dev->ctrl.cap)) + writel(NVME_CMBMSC_CRE, dev->bar + NVME_REG_CMBMSC); + dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); if (!dev->cmbsz) return; @@ -1808,6 +1812,16 @@ static void nvme_map_cmb(struct nvme_dev *dev) if (offset > bar_size) return; + /* + * Tell the controller about the host side address mapping the CMB, + * and enable CMB decoding for the NVMe 1.4+ scheme: + */ + if (NVME_CAP_CMBS(dev->ctrl.cap)) { + hi_lo_writeq(NVME_CMBMSC_CRE | NVME_CMBMSC_CMSE | + (pci_bus_address(pdev, bar) + offset), + dev->bar + NVME_REG_CMBMSC); + } + /* * Controllers may support a CMB size larger than their BAR, * for example, due to being behind a bridge. Reduce the CMB to diff --git a/include/linux/nvme.h b/include/linux/nvme.h index d925359976873..bfed36e342ccb 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -116,6 +116,9 @@ enum { NVME_REG_BPMBL = 0x0048, /* Boot Partition Memory Buffer * Location */ + NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory + * Space Control + */ NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */ NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */ NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */ @@ -135,6 +138,7 @@ enum { #define NVME_CAP_CSS(cap) (((cap) >> 37) & 0xff) #define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) #define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf) +#define NVME_CAP_CMBS(cap) (((cap) >> 57) & 0x1) #define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7) #define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff) @@ -192,6 +196,8 @@ enum { NVME_CSTS_SHST_OCCUR = 1 << 2, NVME_CSTS_SHST_CMPLT = 2 << 2, NVME_CSTS_SHST_MASK = 3 << 2, + NVME_CMBMSC_CRE = 1 << 0, + NVME_CMBMSC_CMSE = 1 << 1, }; struct nvme_id_power_state { -- GitLab From bffcd507780ea614b5543c66f2e37ce0d55cd449 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 13 Jan 2021 17:33:51 -0800 Subject: [PATCH 1012/2012] nvmet: set right status on error in id-ns handler The function nvmet_execute_identify_ns() doesn't set the status if call to nvmet_find_namespace() fails. In that case we set the status of the request to the value return by the nvmet_copy_sgl(). Set the status to NVME_SC_INVALID_NS and adjust the code such that request will have the right status on nvmet_find_namespace() failure. Without this patch :- NVME Identify Namespace 3: nsze : 0 ncap : 0 nuse : 0 nsfeat : 0 nlbaf : 0 flbas : 0 mc : 0 dpc : 0 dps : 0 nmic : 0 rescap : 0 fpi : 0 dlfeat : 0 nawun : 0 nawupf : 0 nacwu : 0 nabsn : 0 nabo : 0 nabspf : 0 noiob : 0 nvmcap : 0 mssrl : 0 mcl : 0 msrc : 0 nsattr : 0 nvmsetid: 0 anagrpid: 0 endgid : 0 nguid : 00000000000000000000000000000000 eui64 : 0000000000000000 lbaf 0 : ms:0 lbads:0 rp:0 (in use) With this patch-series :- feb3b88b501e (HEAD -> nvme-5.11) nvmet: remove extra variable in identify ns 6302aa67210a nvmet: remove extra variable in id-desclist ed57951da453 nvmet: remove extra variable in smart log nsid be384b8c24dc nvmet: set right status on error in id-ns handler NVMe status: INVALID_NS: The namespace or the format of that namespace is invalid(0xb) Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 8d90235e4fcc5..dc1ea468b182b 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -487,8 +487,10 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) /* return an all zeroed buffer if we can't find an active namespace */ ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid); - if (!ns) + if (!ns) { + status = NVME_SC_INVALID_NS; goto done; + } nvmet_ns_revalidate(ns); @@ -541,7 +543,9 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) id->nsattr |= (1 << 0); nvmet_put_namespace(ns); done: - status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); + if (!status) + status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); + kfree(id); out: nvmet_req_complete(req, status); -- GitLab From 3ed86b9a7140bb9b5ff0669778e56bf9b0e582a5 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 15 Jan 2021 18:41:53 +0100 Subject: [PATCH 1013/2012] kasan, arm64: fix pointer tags in KASAN reports As of the "arm64: expose FAR_EL1 tag bits in siginfo" patch, the address that is passed to report_tag_fault has pointer tags in the format of 0x0X, while KASAN uses 0xFX format (note the difference in the top 4 bits). Fix up the pointer tag for kernel pointers in do_tag_check_fault by setting them to the same value as bit 55. Explicitly use __untagged_addr() instead of untagged_addr(), as the latter doesn't affect TTBR1 addresses. Fixes: dceec3ff7807 ("arm64: expose FAR_EL1 tag bits in siginfo") Fixes: 4291e9ee6189 ("kasan, arm64: print report from tag fault handler") Signed-off-by: Andrey Konovalov Reviewed-by: Catalin Marinas Reviewed-by: Vincenzo Frascino Link: https://linux-review.googlesource.com/id/I9ced973866036d8679e8f4ae325de547eb969649 Link: https://lore.kernel.org/r/ff30b0afe6005fd046f9ac72bfb71822aedccd89.1610731872.git.andreyknvl@google.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 3c40da479899d..35d75c60e2b8d 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -709,10 +709,11 @@ static int do_tag_check_fault(unsigned long far, unsigned int esr, struct pt_regs *regs) { /* - * The architecture specifies that bits 63:60 of FAR_EL1 are UNKNOWN for tag - * check faults. Mask them out now so that userspace doesn't see them. + * The architecture specifies that bits 63:60 of FAR_EL1 are UNKNOWN + * for tag check faults. Set them to corresponding bits in the untagged + * address. */ - far &= (1UL << 60) - 1; + far = (__untagged_addr(far) & ~MTE_TAG_MASK) | (far & MTE_TAG_MASK); do_bad_area(far, esr, regs); return 0; } -- GitLab From 070222731be52d741e55d8967b1764482b81e54c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 14 Jan 2021 15:34:32 +0100 Subject: [PATCH 1014/2012] platform/x86: intel-vbtn: Drop HP Stream x360 Convertible PC 11 from allow-list THe HP Stream x360 Convertible PC 11 DSDT has the following VGBS function: Method (VGBS, 0, Serialized) { If ((^^PCI0.LPCB.EC0.ROLS == Zero)) { VBDS = Zero } Else { VBDS = Zero } Return (VBDS) /* \_SB_.VGBI.VBDS */ } Which is obviously wrong, because it always returns 0 independent of the 2-in-1 being in laptop or tablet mode. This causes the intel-vbtn driver to initially report SW_TABLET_MODE = 1 to userspace, which is known to cause problems when the 2-in-1 is actually in laptop mode. During earlier testing this turned out to not be a problem because the 2-in-1 would do a Notify(..., 0xCC) or Notify(..., 0xCD) soon after the intel-vbtn driver loaded, correcting the SW_TABLET_MODE state. Further testing however has shown that this Notify() soon after the intel-vbtn driver loads, does not always happen. When the Notify does not happen, then intel-vbtn reports SW_TABLET_MODE = 1 resulting in a non-working touchpad. IOW the tablet-mode reporting is not reliable on this device, so it should be dropped from the allow-list, fixing the touchpad sometimes not working. Fixes: 8169bd3e6e19 ("platform/x86: intel-vbtn: Switch to an allow-list for SW_TABLET_MODE reporting") Link: https://lore.kernel.org/r/20210114143432.31750-1-hdegoede@redhat.com Signed-off-by: Hans de Goede --- drivers/platform/x86/intel-vbtn.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index 9bbdb26d43054..30a9062d2b4b8 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -204,12 +204,6 @@ static const struct dmi_system_id dmi_switches_allow_list[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7130"), }, }, - { - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP Stream x360 Convertible PC 11"), - }, - }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), -- GitLab From 79267ae22615496655feee2db0848f6786bcf67a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 18 Jan 2021 15:52:10 +0200 Subject: [PATCH 1015/2012] net: mscc: ocelot: allow offloading of bridge on top of LAG The blamed commit was too aggressive, and it made ocelot_netdevice_event react only to network interface events emitted for the ocelot switch ports. In fact, only the PRECHANGEUPPER should have had that check. When we ignore all events that are not for us, we miss the fact that the upper of the LAG changes, and the bonding interface gets enslaved to a bridge. This is an operation we could offload under certain conditions. Fixes: 7afb3e575e5a ("net: mscc: ocelot: don't handle netdev events for other netdevs") Signed-off-by: Vladimir Oltean Reviewed-by: Alexandre Belloni Link: https://lore.kernel.org/r/20210118135210.2666246-1-olteanv@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_net.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 2bd2840d88bdc..42230f92ca9c8 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -1042,10 +1042,8 @@ static int ocelot_netdevice_event(struct notifier_block *unused, struct net_device *dev = netdev_notifier_info_to_dev(ptr); int ret = 0; - if (!ocelot_netdevice_dev_check(dev)) - return 0; - if (event == NETDEV_PRECHANGEUPPER && + ocelot_netdevice_dev_check(dev) && netif_is_lag_master(info->upper_dev)) { struct netdev_lag_upper_info *lag_upper_info = info->upper_info; struct netlink_ext_ack *extack; -- GitLab From bf52e27bb35377d3582370732fa1e99cb446c670 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Jan 2021 06:50:44 -0600 Subject: [PATCH 1016/2012] net: ipa: rename interconnect settings Use "bandwidth" rather than "rate" in describing the average and peak values to use for IPA interconnects. They should have been named that way to begin with. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_clock.c | 20 ++++++++++---------- drivers/net/ipa/ipa_data-sc7180.c | 16 ++++++++-------- drivers/net/ipa/ipa_data-sdm845.c | 16 ++++++++-------- drivers/net/ipa/ipa_data.h | 10 +++++----- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c index 135c393437f12..459c357e09678 100644 --- a/drivers/net/ipa/ipa_clock.c +++ b/drivers/net/ipa/ipa_clock.c @@ -109,20 +109,20 @@ static int ipa_interconnect_enable(struct ipa *ipa) int ret; data = &clock->interconnect_data[IPA_INTERCONNECT_MEMORY]; - ret = icc_set_bw(clock->memory_path, data->average_rate, - data->peak_rate); + ret = icc_set_bw(clock->memory_path, data->average_bandwidth, + data->peak_bandwidth); if (ret) return ret; data = &clock->interconnect_data[IPA_INTERCONNECT_IMEM]; - ret = icc_set_bw(clock->imem_path, data->average_rate, - data->peak_rate); + ret = icc_set_bw(clock->imem_path, data->average_bandwidth, + data->peak_bandwidth); if (ret) goto err_memory_path_disable; data = &clock->interconnect_data[IPA_INTERCONNECT_CONFIG]; - ret = icc_set_bw(clock->config_path, data->average_rate, - data->peak_rate); + ret = icc_set_bw(clock->config_path, data->average_bandwidth, + data->peak_bandwidth); if (ret) goto err_imem_path_disable; @@ -159,12 +159,12 @@ static int ipa_interconnect_disable(struct ipa *ipa) err_imem_path_reenable: data = &clock->interconnect_data[IPA_INTERCONNECT_IMEM]; - (void)icc_set_bw(clock->imem_path, data->average_rate, - data->peak_rate); + (void)icc_set_bw(clock->imem_path, data->average_bandwidth, + data->peak_bandwidth); err_memory_path_reenable: data = &clock->interconnect_data[IPA_INTERCONNECT_MEMORY]; - (void)icc_set_bw(clock->memory_path, data->average_rate, - data->peak_rate); + (void)icc_set_bw(clock->memory_path, data->average_bandwidth, + data->peak_bandwidth); return ret; } diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c index 5cc0ed77edb9c..491572c0a34dc 100644 --- a/drivers/net/ipa/ipa_data-sc7180.c +++ b/drivers/net/ipa/ipa_data-sc7180.c @@ -311,20 +311,20 @@ static struct ipa_mem_data ipa_mem_data = { static struct ipa_clock_data ipa_clock_data = { .core_clock_rate = 100 * 1000 * 1000, /* Hz */ - /* Interconnect rates are in 1000 byte/second units */ + /* Interconnect bandwidths are in 1000 byte/second units */ .interconnect = { [IPA_INTERCONNECT_MEMORY] = { - .peak_rate = 465000, /* 465 MBps */ - .average_rate = 80000, /* 80 MBps */ + .peak_bandwidth = 465000, /* 465 MBps */ + .average_bandwidth = 80000, /* 80 MBps */ }, - /* Average rate is unused for the next two interconnects */ + /* Average bandwidth unused for the next two interconnects */ [IPA_INTERCONNECT_IMEM] = { - .peak_rate = 68570, /* 68.570 MBps */ - .average_rate = 0, /* unused */ + .peak_bandwidth = 68570, /* 68.57 MBps */ + .average_bandwidth = 0, /* unused */ }, [IPA_INTERCONNECT_CONFIG] = { - .peak_rate = 30000, /* 30 MBps */ - .average_rate = 0, /* unused */ + .peak_bandwidth = 30000, /* 30 MBps */ + .average_bandwidth = 0, /* unused */ }, }, }; diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c index f8fee8d3ca42a..c62b86171b929 100644 --- a/drivers/net/ipa/ipa_data-sdm845.c +++ b/drivers/net/ipa/ipa_data-sdm845.c @@ -331,20 +331,20 @@ static struct ipa_mem_data ipa_mem_data = { static struct ipa_clock_data ipa_clock_data = { .core_clock_rate = 75 * 1000 * 1000, /* Hz */ - /* Interconnect rates are in 1000 byte/second units */ + /* Interconnect bandwidths are in 1000 byte/second units */ .interconnect = { [IPA_INTERCONNECT_MEMORY] = { - .peak_rate = 600000, /* 600 MBps */ - .average_rate = 80000, /* 80 MBps */ + .peak_bandwidth = 600000, /* 600 MBps */ + .average_bandwidth = 80000, /* 80 MBps */ }, - /* Average rate is unused for the next two interconnects */ + /* Average bandwidth unused for the next two interconnects */ [IPA_INTERCONNECT_IMEM] = { - .peak_rate = 350000, /* 350 MBps */ - .average_rate = 0, /* unused */ + .peak_bandwidth = 350000, /* 350 MBps */ + .average_bandwidth = 0, /* unused */ }, [IPA_INTERCONNECT_CONFIG] = { - .peak_rate = 40000, /* 40 MBps */ - .average_rate = 0, /* unused */ + .peak_bandwidth = 40000, /* 40 MBps */ + .average_bandwidth = 0, /* unused */ }, }, }; diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h index 0ed5ffe2b8da0..96a9771a6cc05 100644 --- a/drivers/net/ipa/ipa_data.h +++ b/drivers/net/ipa/ipa_data.h @@ -267,13 +267,13 @@ enum ipa_interconnect_id { }; /** - * struct ipa_interconnect_data - description of IPA interconnect rates - * @peak_rate: Peak interconnect bandwidth (in 1000 byte/sec units) - * @average_rate: Average interconnect bandwidth (in 1000 byte/sec units) + * struct ipa_interconnect_data - description of IPA interconnect bandwidths + * @peak_bandwidth: Peak interconnect bandwidth (in 1000 byte/sec units) + * @average_bandwidth: Average interconnect bandwidth (in 1000 byte/sec units) */ struct ipa_interconnect_data { - u32 peak_rate; - u32 average_rate; + u32 peak_bandwidth; + u32 average_bandwidth; }; /** -- GitLab From ec0ef6d3c8c2887724e24b6337f48e893e191d08 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Jan 2021 06:50:45 -0600 Subject: [PATCH 1017/2012] net: ipa: don't return an error from ipa_interconnect_disable() If disabling interconnects fails there's not a lot we can do. The only two callers of ipa_interconnect_disable() ignore the return value, so just give the function a void return type. Print an error message if disabling any of the interconnects is not successful. Return (and print) only the first error seen. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_clock.c | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c index 459c357e09678..baedb481fe824 100644 --- a/drivers/net/ipa/ipa_clock.c +++ b/drivers/net/ipa/ipa_clock.c @@ -137,36 +137,27 @@ err_memory_path_disable: } /* To disable an interconnect, we just its bandwidth to 0 */ -static int ipa_interconnect_disable(struct ipa *ipa) +static void ipa_interconnect_disable(struct ipa *ipa) { - const struct ipa_interconnect_data *data; struct ipa_clock *clock = ipa->clock; + int result = 0; int ret; ret = icc_set_bw(clock->memory_path, 0, 0); if (ret) - return ret; + result = ret; ret = icc_set_bw(clock->imem_path, 0, 0); - if (ret) - goto err_memory_path_reenable; + if (ret && !result) + result = ret; ret = icc_set_bw(clock->config_path, 0, 0); - if (ret) - goto err_imem_path_reenable; - - return 0; + if (ret && !result) + result = ret; -err_imem_path_reenable: - data = &clock->interconnect_data[IPA_INTERCONNECT_IMEM]; - (void)icc_set_bw(clock->imem_path, data->average_bandwidth, - data->peak_bandwidth); -err_memory_path_reenable: - data = &clock->interconnect_data[IPA_INTERCONNECT_MEMORY]; - (void)icc_set_bw(clock->memory_path, data->average_bandwidth, - data->peak_bandwidth); - - return ret; + if (result) + dev_err(&ipa->pdev->dev, + "error %d disabling IPA interconnects\n", ret); } /* Turn on IPA clocks, including interconnects */ @@ -189,7 +180,7 @@ static int ipa_clock_enable(struct ipa *ipa) static void ipa_clock_disable(struct ipa *ipa) { clk_disable_unprepare(ipa->clock->core); - (void)ipa_interconnect_disable(ipa); + ipa_interconnect_disable(ipa); } /* Get an IPA clock reference, but only if the reference count is -- GitLab From 5b40810b19db072b74e2fea7e927a908c1b7c5c5 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Jan 2021 06:50:46 -0600 Subject: [PATCH 1018/2012] net: ipa: introduce an IPA interconnect structure Rather than having separate pointers for the memory, imem, and config interconnect paths, maintain an array of ipa_interconnect structures each of which contains a pointer to a path. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_clock.c | 59 +++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c index baedb481fe824..2bf5af6823d8c 100644 --- a/drivers/net/ipa/ipa_clock.c +++ b/drivers/net/ipa/ipa_clock.c @@ -30,23 +30,27 @@ * An IPA clock reference must be held for any access to IPA hardware. */ +/** + * struct ipa_interconnect - IPA interconnect information + * @path: Interconnect path + */ +struct ipa_interconnect { + struct icc_path *path; +}; + /** * struct ipa_clock - IPA clocking information * @count: Clocking reference count * @mutex: Protects clock enable/disable * @core: IPA core clock - * @memory_path: Memory interconnect - * @imem_path: Internal memory interconnect - * @config_path: Configuration space interconnect + * @interconnect: Interconnect array * @interconnect_data: Interconnect configuration data */ struct ipa_clock { refcount_t count; struct mutex mutex; /* protects clock enable/disable */ struct clk *core; - struct icc_path *memory_path; - struct icc_path *imem_path; - struct icc_path *config_path; + struct ipa_interconnect *interconnect[IPA_INTERCONNECT_COUNT]; const struct ipa_interconnect_data *interconnect_data; }; @@ -71,24 +75,24 @@ static int ipa_interconnect_init(struct ipa_clock *clock, struct device *dev) path = ipa_interconnect_init_one(dev, "memory"); if (IS_ERR(path)) goto err_return; - clock->memory_path = path; + clock->interconnect[IPA_INTERCONNECT_MEMORY]->path = path; path = ipa_interconnect_init_one(dev, "imem"); if (IS_ERR(path)) goto err_memory_path_put; - clock->imem_path = path; + clock->interconnect[IPA_INTERCONNECT_IMEM]->path = path; path = ipa_interconnect_init_one(dev, "config"); if (IS_ERR(path)) goto err_imem_path_put; - clock->config_path = path; + clock->interconnect[IPA_INTERCONNECT_CONFIG]->path = path; return 0; err_imem_path_put: - icc_put(clock->imem_path); + icc_put(clock->interconnect[IPA_INTERCONNECT_IMEM]->path); err_memory_path_put: - icc_put(clock->memory_path); + icc_put(clock->interconnect[IPA_INTERCONNECT_MEMORY]->path); err_return: return PTR_ERR(path); } @@ -96,9 +100,9 @@ err_return: /* Inverse of ipa_interconnect_init() */ static void ipa_interconnect_exit(struct ipa_clock *clock) { - icc_put(clock->config_path); - icc_put(clock->imem_path); - icc_put(clock->memory_path); + icc_put(clock->interconnect[IPA_INTERCONNECT_CONFIG]->path); + icc_put(clock->interconnect[IPA_INTERCONNECT_IMEM]->path); + icc_put(clock->interconnect[IPA_INTERCONNECT_MEMORY]->path); } /* Currently we only use one bandwidth level, so just "enable" interconnects */ @@ -109,29 +113,31 @@ static int ipa_interconnect_enable(struct ipa *ipa) int ret; data = &clock->interconnect_data[IPA_INTERCONNECT_MEMORY]; - ret = icc_set_bw(clock->memory_path, data->average_bandwidth, - data->peak_bandwidth); + ret = icc_set_bw(clock->interconnect[IPA_INTERCONNECT_MEMORY]->path, + data->average_bandwidth, data->peak_bandwidth); if (ret) return ret; data = &clock->interconnect_data[IPA_INTERCONNECT_IMEM]; - ret = icc_set_bw(clock->imem_path, data->average_bandwidth, - data->peak_bandwidth); + ret = icc_set_bw(clock->interconnect[IPA_INTERCONNECT_IMEM]->path, + data->average_bandwidth, data->peak_bandwidth); if (ret) goto err_memory_path_disable; data = &clock->interconnect_data[IPA_INTERCONNECT_CONFIG]; - ret = icc_set_bw(clock->config_path, data->average_bandwidth, - data->peak_bandwidth); + ret = icc_set_bw(clock->interconnect[IPA_INTERCONNECT_CONFIG]->path, + data->average_bandwidth, data->peak_bandwidth); if (ret) goto err_imem_path_disable; return 0; err_imem_path_disable: - (void)icc_set_bw(clock->imem_path, 0, 0); + (void)icc_set_bw(clock->interconnect[IPA_INTERCONNECT_IMEM]->path, + 0, 0); err_memory_path_disable: - (void)icc_set_bw(clock->memory_path, 0, 0); + (void)icc_set_bw(clock->interconnect[IPA_INTERCONNECT_MEMORY]->path, + 0, 0); return ret; } @@ -143,15 +149,18 @@ static void ipa_interconnect_disable(struct ipa *ipa) int result = 0; int ret; - ret = icc_set_bw(clock->memory_path, 0, 0); + ret = icc_set_bw(clock->interconnect[IPA_INTERCONNECT_MEMORY]->path, + 0, 0); if (ret) result = ret; - ret = icc_set_bw(clock->imem_path, 0, 0); + ret = icc_set_bw(clock->interconnect[IPA_INTERCONNECT_IMEM]->path, + 0, 0); if (ret && !result) result = ret; - ret = icc_set_bw(clock->config_path, 0, 0); + ret = icc_set_bw(clock->interconnect[IPA_INTERCONNECT_CONFIG]->path, + 0, 0); if (ret && !result) result = ret; -- GitLab From db6cd5148724b07617cf43eb2cb916a853d52bc3 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Jan 2021 06:50:47 -0600 Subject: [PATCH 1019/2012] net: ipa: store average and peak interconnect bandwidth Add fields in the ipa_interconnect structure to hold the average and peak bandwidth values for the interconnect. Pass the configuring data for interconnects to ipa_interconnect_init() so these values can be recorded, and use them when enabling the interconnects. There's no longer any need to keep a copy of the interconnect data after initialization. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_clock.c | 88 ++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 36 deletions(-) diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c index 2bf5af6823d8c..537c72b5267f6 100644 --- a/drivers/net/ipa/ipa_clock.c +++ b/drivers/net/ipa/ipa_clock.c @@ -33,9 +33,13 @@ /** * struct ipa_interconnect - IPA interconnect information * @path: Interconnect path + * @average_bandwidth: Average interconnect bandwidth (KB/second) + * @peak_bandwidth: Peak interconnect bandwidth (KB/second) */ struct ipa_interconnect { struct icc_path *path; + u32 average_bandwidth; + u32 peak_bandwidth; }; /** @@ -44,14 +48,12 @@ struct ipa_interconnect { * @mutex: Protects clock enable/disable * @core: IPA core clock * @interconnect: Interconnect array - * @interconnect_data: Interconnect configuration data */ struct ipa_clock { refcount_t count; struct mutex mutex; /* protects clock enable/disable */ struct clk *core; - struct ipa_interconnect *interconnect[IPA_INTERCONNECT_COUNT]; - const struct ipa_interconnect_data *interconnect_data; + struct ipa_interconnect interconnect[IPA_INTERCONNECT_COUNT]; }; static struct icc_path * @@ -61,38 +63,52 @@ ipa_interconnect_init_one(struct device *dev, const char *name) path = of_icc_get(dev, name); if (IS_ERR(path)) - dev_err(dev, "error %ld getting %s interconnect\n", - PTR_ERR(path), name); + dev_err(dev, "error %d getting %s interconnect\n", + (int)PTR_ERR(path), name); return path; } /* Initialize interconnects required for IPA operation */ -static int ipa_interconnect_init(struct ipa_clock *clock, struct device *dev) +static int ipa_interconnect_init(struct ipa_clock *clock, struct device *dev, + const struct ipa_interconnect_data *data) { + struct ipa_interconnect *interconnect; struct icc_path *path; path = ipa_interconnect_init_one(dev, "memory"); if (IS_ERR(path)) goto err_return; - clock->interconnect[IPA_INTERCONNECT_MEMORY]->path = path; + interconnect = &clock->interconnect[IPA_INTERCONNECT_MEMORY]; + interconnect->path = path; + interconnect->average_bandwidth = data->average_bandwidth; + interconnect->peak_bandwidth = data->peak_bandwidth; + data++; path = ipa_interconnect_init_one(dev, "imem"); if (IS_ERR(path)) goto err_memory_path_put; - clock->interconnect[IPA_INTERCONNECT_IMEM]->path = path; + interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; + interconnect->path = path; + interconnect->average_bandwidth = data->average_bandwidth; + interconnect->peak_bandwidth = data->peak_bandwidth; + data++; path = ipa_interconnect_init_one(dev, "config"); if (IS_ERR(path)) goto err_imem_path_put; - clock->interconnect[IPA_INTERCONNECT_CONFIG]->path = path; + interconnect = &clock->interconnect[IPA_INTERCONNECT_CONFIG]; + interconnect->path = path; + interconnect->average_bandwidth = data->average_bandwidth; + interconnect->peak_bandwidth = data->peak_bandwidth; + data++; return 0; err_imem_path_put: - icc_put(clock->interconnect[IPA_INTERCONNECT_IMEM]->path); + icc_put(clock->interconnect[IPA_INTERCONNECT_IMEM].path); err_memory_path_put: - icc_put(clock->interconnect[IPA_INTERCONNECT_MEMORY]->path); + icc_put(clock->interconnect[IPA_INTERCONNECT_MEMORY].path); err_return: return PTR_ERR(path); } @@ -100,44 +116,44 @@ err_return: /* Inverse of ipa_interconnect_init() */ static void ipa_interconnect_exit(struct ipa_clock *clock) { - icc_put(clock->interconnect[IPA_INTERCONNECT_CONFIG]->path); - icc_put(clock->interconnect[IPA_INTERCONNECT_IMEM]->path); - icc_put(clock->interconnect[IPA_INTERCONNECT_MEMORY]->path); + icc_put(clock->interconnect[IPA_INTERCONNECT_CONFIG].path); + icc_put(clock->interconnect[IPA_INTERCONNECT_IMEM].path); + icc_put(clock->interconnect[IPA_INTERCONNECT_MEMORY].path); } /* Currently we only use one bandwidth level, so just "enable" interconnects */ static int ipa_interconnect_enable(struct ipa *ipa) { - const struct ipa_interconnect_data *data; + struct ipa_interconnect *interconnect; struct ipa_clock *clock = ipa->clock; int ret; - data = &clock->interconnect_data[IPA_INTERCONNECT_MEMORY]; - ret = icc_set_bw(clock->interconnect[IPA_INTERCONNECT_MEMORY]->path, - data->average_bandwidth, data->peak_bandwidth); + interconnect = &clock->interconnect[IPA_INTERCONNECT_MEMORY]; + ret = icc_set_bw(interconnect->path, interconnect->average_bandwidth, + interconnect->peak_bandwidth); if (ret) return ret; - data = &clock->interconnect_data[IPA_INTERCONNECT_IMEM]; - ret = icc_set_bw(clock->interconnect[IPA_INTERCONNECT_IMEM]->path, - data->average_bandwidth, data->peak_bandwidth); + interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; + ret = icc_set_bw(interconnect->path, interconnect->average_bandwidth, + interconnect->peak_bandwidth); if (ret) goto err_memory_path_disable; - data = &clock->interconnect_data[IPA_INTERCONNECT_CONFIG]; - ret = icc_set_bw(clock->interconnect[IPA_INTERCONNECT_CONFIG]->path, - data->average_bandwidth, data->peak_bandwidth); + interconnect = &clock->interconnect[IPA_INTERCONNECT_CONFIG]; + ret = icc_set_bw(interconnect->path, interconnect->average_bandwidth, + interconnect->peak_bandwidth); if (ret) goto err_imem_path_disable; return 0; err_imem_path_disable: - (void)icc_set_bw(clock->interconnect[IPA_INTERCONNECT_IMEM]->path, - 0, 0); + interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; + (void)icc_set_bw(interconnect->path, 0, 0); err_memory_path_disable: - (void)icc_set_bw(clock->interconnect[IPA_INTERCONNECT_MEMORY]->path, - 0, 0); + interconnect = &clock->interconnect[IPA_INTERCONNECT_MEMORY]; + (void)icc_set_bw(interconnect->path, 0, 0); return ret; } @@ -145,22 +161,23 @@ err_memory_path_disable: /* To disable an interconnect, we just its bandwidth to 0 */ static void ipa_interconnect_disable(struct ipa *ipa) { + struct ipa_interconnect *interconnect; struct ipa_clock *clock = ipa->clock; int result = 0; int ret; - ret = icc_set_bw(clock->interconnect[IPA_INTERCONNECT_MEMORY]->path, - 0, 0); + interconnect = &clock->interconnect[IPA_INTERCONNECT_MEMORY]; + ret = icc_set_bw(interconnect->path, 0, 0); if (ret) result = ret; - ret = icc_set_bw(clock->interconnect[IPA_INTERCONNECT_IMEM]->path, - 0, 0); + interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; + ret = icc_set_bw(interconnect->path, 0, 0); if (ret && !result) result = ret; - ret = icc_set_bw(clock->interconnect[IPA_INTERCONNECT_CONFIG]->path, - 0, 0); + interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; + ret = icc_set_bw(interconnect->path, 0, 0); if (ret && !result) result = ret; @@ -286,9 +303,8 @@ ipa_clock_init(struct device *dev, const struct ipa_clock_data *data) goto err_clk_put; } clock->core = clk; - clock->interconnect_data = data->interconnect; - ret = ipa_interconnect_init(clock, dev); + ret = ipa_interconnect_init(clock, dev, data->interconnect); if (ret) goto err_kfree; -- GitLab From e938d7ef92c38f43bbb6de03e8399f6f821d217b Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Jan 2021 06:50:48 -0600 Subject: [PATCH 1020/2012] net: ipa: add interconnect name to configuration data Add the name to the configuration data for each interconnect. Use this information rather than a constant string during initialization. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_clock.c | 6 +++--- drivers/net/ipa/ipa_data-sc7180.c | 3 +++ drivers/net/ipa/ipa_data-sdm845.c | 3 +++ drivers/net/ipa/ipa_data.h | 2 ++ 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c index 537c72b5267f6..07069dbc6d033 100644 --- a/drivers/net/ipa/ipa_clock.c +++ b/drivers/net/ipa/ipa_clock.c @@ -76,7 +76,7 @@ static int ipa_interconnect_init(struct ipa_clock *clock, struct device *dev, struct ipa_interconnect *interconnect; struct icc_path *path; - path = ipa_interconnect_init_one(dev, "memory"); + path = ipa_interconnect_init_one(dev, data->name); if (IS_ERR(path)) goto err_return; interconnect = &clock->interconnect[IPA_INTERCONNECT_MEMORY]; @@ -85,7 +85,7 @@ static int ipa_interconnect_init(struct ipa_clock *clock, struct device *dev, interconnect->peak_bandwidth = data->peak_bandwidth; data++; - path = ipa_interconnect_init_one(dev, "imem"); + path = ipa_interconnect_init_one(dev, data->name); if (IS_ERR(path)) goto err_memory_path_put; interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; @@ -94,7 +94,7 @@ static int ipa_interconnect_init(struct ipa_clock *clock, struct device *dev, interconnect->peak_bandwidth = data->peak_bandwidth; data++; - path = ipa_interconnect_init_one(dev, "config"); + path = ipa_interconnect_init_one(dev, data->name); if (IS_ERR(path)) goto err_imem_path_put; interconnect = &clock->interconnect[IPA_INTERCONNECT_CONFIG]; diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c index 491572c0a34dc..1936ecb4c1104 100644 --- a/drivers/net/ipa/ipa_data-sc7180.c +++ b/drivers/net/ipa/ipa_data-sc7180.c @@ -314,15 +314,18 @@ static struct ipa_clock_data ipa_clock_data = { /* Interconnect bandwidths are in 1000 byte/second units */ .interconnect = { [IPA_INTERCONNECT_MEMORY] = { + .name = "memory", .peak_bandwidth = 465000, /* 465 MBps */ .average_bandwidth = 80000, /* 80 MBps */ }, /* Average bandwidth unused for the next two interconnects */ [IPA_INTERCONNECT_IMEM] = { + .name = "imem", .peak_bandwidth = 68570, /* 68.57 MBps */ .average_bandwidth = 0, /* unused */ }, [IPA_INTERCONNECT_CONFIG] = { + .name = "config", .peak_bandwidth = 30000, /* 30 MBps */ .average_bandwidth = 0, /* unused */ }, diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c index c62b86171b929..3b556b5a63406 100644 --- a/drivers/net/ipa/ipa_data-sdm845.c +++ b/drivers/net/ipa/ipa_data-sdm845.c @@ -334,15 +334,18 @@ static struct ipa_clock_data ipa_clock_data = { /* Interconnect bandwidths are in 1000 byte/second units */ .interconnect = { [IPA_INTERCONNECT_MEMORY] = { + .name = "memory", .peak_bandwidth = 600000, /* 600 MBps */ .average_bandwidth = 80000, /* 80 MBps */ }, /* Average bandwidth unused for the next two interconnects */ [IPA_INTERCONNECT_IMEM] = { + .name = "imem", .peak_bandwidth = 350000, /* 350 MBps */ .average_bandwidth = 0, /* unused */ }, [IPA_INTERCONNECT_CONFIG] = { + .name = "config", .peak_bandwidth = 40000, /* 40 MBps */ .average_bandwidth = 0, /* unused */ }, diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h index 96a9771a6cc05..d8ea6266dc6a1 100644 --- a/drivers/net/ipa/ipa_data.h +++ b/drivers/net/ipa/ipa_data.h @@ -268,10 +268,12 @@ enum ipa_interconnect_id { /** * struct ipa_interconnect_data - description of IPA interconnect bandwidths + * @name: Interconnect name (matches interconnect-name in DT) * @peak_bandwidth: Peak interconnect bandwidth (in 1000 byte/sec units) * @average_bandwidth: Average interconnect bandwidth (in 1000 byte/sec units) */ struct ipa_interconnect_data { + const char *name; u32 peak_bandwidth; u32 average_bandwidth; }; -- GitLab From 10d0d3970187551645c7ab363e8c9d29e2122088 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Jan 2021 06:50:49 -0600 Subject: [PATCH 1021/2012] net: ipa: clean up interconnect initialization Pass an the address of an IPA interconnect structure and its configuration data to ipa_interconnect_init_one() and have that function initialize all the structure's fields. Change the function to simply return an error code. Introduce ipa_interconnect_exit_one() to encapsulate the cleanup of an IPA interconnect structure. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_clock.c | 83 +++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c index 07069dbc6d033..fbe42106fc2a8 100644 --- a/drivers/net/ipa/ipa_clock.c +++ b/drivers/net/ipa/ipa_clock.c @@ -56,17 +56,33 @@ struct ipa_clock { struct ipa_interconnect interconnect[IPA_INTERCONNECT_COUNT]; }; -static struct icc_path * -ipa_interconnect_init_one(struct device *dev, const char *name) +static int ipa_interconnect_init_one(struct device *dev, + struct ipa_interconnect *interconnect, + const struct ipa_interconnect_data *data) { struct icc_path *path; - path = of_icc_get(dev, name); - if (IS_ERR(path)) - dev_err(dev, "error %d getting %s interconnect\n", - (int)PTR_ERR(path), name); + path = of_icc_get(dev, data->name); + if (IS_ERR(path)) { + int ret = PTR_ERR(path); - return path; + dev_err(dev, "error %d getting %s interconnect\n", ret, + data->name); + + return ret; + } + + interconnect->path = path; + interconnect->average_bandwidth = data->average_bandwidth; + interconnect->peak_bandwidth = data->peak_bandwidth; + + return 0; +} + +static void ipa_interconnect_exit_one(struct ipa_interconnect *interconnect) +{ + icc_put(interconnect->path); + memset(interconnect, 0, sizeof(*interconnect)); } /* Initialize interconnects required for IPA operation */ @@ -74,51 +90,46 @@ static int ipa_interconnect_init(struct ipa_clock *clock, struct device *dev, const struct ipa_interconnect_data *data) { struct ipa_interconnect *interconnect; - struct icc_path *path; + int ret; - path = ipa_interconnect_init_one(dev, data->name); - if (IS_ERR(path)) - goto err_return; interconnect = &clock->interconnect[IPA_INTERCONNECT_MEMORY]; - interconnect->path = path; - interconnect->average_bandwidth = data->average_bandwidth; - interconnect->peak_bandwidth = data->peak_bandwidth; - data++; + ret = ipa_interconnect_init_one(dev, interconnect, data++); + if (ret) + return ret; - path = ipa_interconnect_init_one(dev, data->name); - if (IS_ERR(path)) - goto err_memory_path_put; interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; - interconnect->path = path; - interconnect->average_bandwidth = data->average_bandwidth; - interconnect->peak_bandwidth = data->peak_bandwidth; - data++; + ret = ipa_interconnect_init_one(dev, interconnect, data++); + if (ret) + goto err_memory_path_put; - path = ipa_interconnect_init_one(dev, data->name); - if (IS_ERR(path)) + interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; + ret = ipa_interconnect_init_one(dev, interconnect, data++); + if (ret) goto err_imem_path_put; - interconnect = &clock->interconnect[IPA_INTERCONNECT_CONFIG]; - interconnect->path = path; - interconnect->average_bandwidth = data->average_bandwidth; - interconnect->peak_bandwidth = data->peak_bandwidth; - data++; return 0; err_imem_path_put: - icc_put(clock->interconnect[IPA_INTERCONNECT_IMEM].path); + interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; + ipa_interconnect_exit_one(interconnect); err_memory_path_put: - icc_put(clock->interconnect[IPA_INTERCONNECT_MEMORY].path); -err_return: - return PTR_ERR(path); + interconnect = &clock->interconnect[IPA_INTERCONNECT_MEMORY]; + ipa_interconnect_exit_one(interconnect); + + return ret; } /* Inverse of ipa_interconnect_init() */ static void ipa_interconnect_exit(struct ipa_clock *clock) { - icc_put(clock->interconnect[IPA_INTERCONNECT_CONFIG].path); - icc_put(clock->interconnect[IPA_INTERCONNECT_IMEM].path); - icc_put(clock->interconnect[IPA_INTERCONNECT_MEMORY].path); + struct ipa_interconnect *interconnect; + + interconnect = &clock->interconnect[IPA_INTERCONNECT_CONFIG]; + ipa_interconnect_exit_one(interconnect); + interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; + ipa_interconnect_exit_one(interconnect); + interconnect = &clock->interconnect[IPA_INTERCONNECT_MEMORY]; + ipa_interconnect_exit_one(interconnect); } /* Currently we only use one bandwidth level, so just "enable" interconnects */ -- GitLab From ea151e1915ebef316893f1fdae6c2cd89ae3371b Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Jan 2021 06:50:50 -0600 Subject: [PATCH 1022/2012] net: ipa: allow arbitrary number of interconnects Currently we assume that the IPA hardware has exactly three interconnects. But that won't be guaranteed for all platforms, so allow any number of interconnects to be specified in the configuration data. For each platform, define an array of interconnect data entries (still associated with the IPA clock structure), and record the number of entries initialized in that array. Loop over all entries in this array when initializing, enabling, disabling, or tearing down the set of interconnects. With this change we no longer need the ipa_interconnect_id enumerated type, so get rid of it. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_clock.c | 117 +++++++++++++----------------- drivers/net/ipa/ipa_data-sc7180.c | 41 ++++++----- drivers/net/ipa/ipa_data-sdm845.c | 41 ++++++----- drivers/net/ipa/ipa_data.h | 14 +--- 4 files changed, 99 insertions(+), 114 deletions(-) diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c index fbe42106fc2a8..354675a643db5 100644 --- a/drivers/net/ipa/ipa_clock.c +++ b/drivers/net/ipa/ipa_clock.c @@ -47,13 +47,15 @@ struct ipa_interconnect { * @count: Clocking reference count * @mutex: Protects clock enable/disable * @core: IPA core clock + * @interconnect_count: Number of elements in interconnect[] * @interconnect: Interconnect array */ struct ipa_clock { refcount_t count; struct mutex mutex; /* protects clock enable/disable */ struct clk *core; - struct ipa_interconnect interconnect[IPA_INTERCONNECT_COUNT]; + u32 interconnect_count; + struct ipa_interconnect *interconnect; }; static int ipa_interconnect_init_one(struct device *dev, @@ -90,31 +92,29 @@ static int ipa_interconnect_init(struct ipa_clock *clock, struct device *dev, const struct ipa_interconnect_data *data) { struct ipa_interconnect *interconnect; + u32 count; int ret; - interconnect = &clock->interconnect[IPA_INTERCONNECT_MEMORY]; - ret = ipa_interconnect_init_one(dev, interconnect, data++); - if (ret) - return ret; - - interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; - ret = ipa_interconnect_init_one(dev, interconnect, data++); - if (ret) - goto err_memory_path_put; - - interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; - ret = ipa_interconnect_init_one(dev, interconnect, data++); - if (ret) - goto err_imem_path_put; + count = clock->interconnect_count; + interconnect = kcalloc(count, sizeof(*interconnect), GFP_KERNEL); + if (!interconnect) + return -ENOMEM; + clock->interconnect = interconnect; + + while (count--) { + ret = ipa_interconnect_init_one(dev, interconnect, data++); + if (ret) + goto out_unwind; + interconnect++; + } return 0; -err_imem_path_put: - interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; - ipa_interconnect_exit_one(interconnect); -err_memory_path_put: - interconnect = &clock->interconnect[IPA_INTERCONNECT_MEMORY]; - ipa_interconnect_exit_one(interconnect); +out_unwind: + while (interconnect-- > clock->interconnect) + ipa_interconnect_exit_one(interconnect); + kfree(clock->interconnect); + clock->interconnect = NULL; return ret; } @@ -124,12 +124,11 @@ static void ipa_interconnect_exit(struct ipa_clock *clock) { struct ipa_interconnect *interconnect; - interconnect = &clock->interconnect[IPA_INTERCONNECT_CONFIG]; - ipa_interconnect_exit_one(interconnect); - interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; - ipa_interconnect_exit_one(interconnect); - interconnect = &clock->interconnect[IPA_INTERCONNECT_MEMORY]; - ipa_interconnect_exit_one(interconnect); + interconnect = clock->interconnect + clock->interconnect_count; + while (interconnect-- > clock->interconnect) + ipa_interconnect_exit_one(interconnect); + kfree(clock->interconnect); + clock->interconnect = NULL; } /* Currently we only use one bandwidth level, so just "enable" interconnects */ @@ -138,33 +137,23 @@ static int ipa_interconnect_enable(struct ipa *ipa) struct ipa_interconnect *interconnect; struct ipa_clock *clock = ipa->clock; int ret; - - interconnect = &clock->interconnect[IPA_INTERCONNECT_MEMORY]; - ret = icc_set_bw(interconnect->path, interconnect->average_bandwidth, - interconnect->peak_bandwidth); - if (ret) - return ret; - - interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; - ret = icc_set_bw(interconnect->path, interconnect->average_bandwidth, - interconnect->peak_bandwidth); - if (ret) - goto err_memory_path_disable; - - interconnect = &clock->interconnect[IPA_INTERCONNECT_CONFIG]; - ret = icc_set_bw(interconnect->path, interconnect->average_bandwidth, - interconnect->peak_bandwidth); - if (ret) - goto err_imem_path_disable; + u32 i; + + interconnect = clock->interconnect; + for (i = 0; i < clock->interconnect_count; i++) { + ret = icc_set_bw(interconnect->path, + interconnect->average_bandwidth, + interconnect->peak_bandwidth); + if (ret) + goto out_unwind; + interconnect++; + } return 0; -err_imem_path_disable: - interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; - (void)icc_set_bw(interconnect->path, 0, 0); -err_memory_path_disable: - interconnect = &clock->interconnect[IPA_INTERCONNECT_MEMORY]; - (void)icc_set_bw(interconnect->path, 0, 0); +out_unwind: + while (interconnect-- > clock->interconnect) + (void)icc_set_bw(interconnect->path, 0, 0); return ret; } @@ -175,22 +164,17 @@ static void ipa_interconnect_disable(struct ipa *ipa) struct ipa_interconnect *interconnect; struct ipa_clock *clock = ipa->clock; int result = 0; + u32 count; int ret; - interconnect = &clock->interconnect[IPA_INTERCONNECT_MEMORY]; - ret = icc_set_bw(interconnect->path, 0, 0); - if (ret) - result = ret; - - interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; - ret = icc_set_bw(interconnect->path, 0, 0); - if (ret && !result) - result = ret; - - interconnect = &clock->interconnect[IPA_INTERCONNECT_IMEM]; - ret = icc_set_bw(interconnect->path, 0, 0); - if (ret && !result) - result = ret; + count = clock->interconnect_count; + interconnect = clock->interconnect + count; + while (count--) { + interconnect--; + ret = icc_set_bw(interconnect->path, 0, 0); + if (ret && !result) + result = ret; + } if (result) dev_err(&ipa->pdev->dev, @@ -314,8 +298,9 @@ ipa_clock_init(struct device *dev, const struct ipa_clock_data *data) goto err_clk_put; } clock->core = clk; + clock->interconnect_count = data->interconnect_count; - ret = ipa_interconnect_init(clock, dev, data->interconnect); + ret = ipa_interconnect_init(clock, dev, data->interconnect_data); if (ret) goto err_kfree; diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c index 1936ecb4c1104..997b51ceb7d76 100644 --- a/drivers/net/ipa/ipa_data-sc7180.c +++ b/drivers/net/ipa/ipa_data-sc7180.c @@ -309,27 +309,30 @@ static struct ipa_mem_data ipa_mem_data = { .smem_size = 0x00002000, }; +/* Interconnect bandwidths are in 1000 byte/second units */ +static struct ipa_interconnect_data ipa_interconnect_data[] = { + { + .name = "memory", + .peak_bandwidth = 465000, /* 465 MBps */ + .average_bandwidth = 80000, /* 80 MBps */ + }, + /* Average bandwidth is unused for the next two interconnects */ + { + .name = "imem", + .peak_bandwidth = 68570, /* 68.570 MBps */ + .average_bandwidth = 0, /* unused */ + }, + { + .name = "config", + .peak_bandwidth = 30000, /* 30 MBps */ + .average_bandwidth = 0, /* unused */ + }, +}; + static struct ipa_clock_data ipa_clock_data = { .core_clock_rate = 100 * 1000 * 1000, /* Hz */ - /* Interconnect bandwidths are in 1000 byte/second units */ - .interconnect = { - [IPA_INTERCONNECT_MEMORY] = { - .name = "memory", - .peak_bandwidth = 465000, /* 465 MBps */ - .average_bandwidth = 80000, /* 80 MBps */ - }, - /* Average bandwidth unused for the next two interconnects */ - [IPA_INTERCONNECT_IMEM] = { - .name = "imem", - .peak_bandwidth = 68570, /* 68.57 MBps */ - .average_bandwidth = 0, /* unused */ - }, - [IPA_INTERCONNECT_CONFIG] = { - .name = "config", - .peak_bandwidth = 30000, /* 30 MBps */ - .average_bandwidth = 0, /* unused */ - }, - }, + .interconnect_count = ARRAY_SIZE(ipa_interconnect_data), + .interconnect_data = ipa_interconnect_data, }; /* Configuration data for the SC7180 SoC. */ diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c index 3b556b5a63406..88c9c3562ab79 100644 --- a/drivers/net/ipa/ipa_data-sdm845.c +++ b/drivers/net/ipa/ipa_data-sdm845.c @@ -329,27 +329,30 @@ static struct ipa_mem_data ipa_mem_data = { .smem_size = 0x00002000, }; +/* Interconnect bandwidths are in 1000 byte/second units */ +static struct ipa_interconnect_data ipa_interconnect_data[] = { + { + .name = "memory", + .peak_bandwidth = 600000, /* 600 MBps */ + .average_bandwidth = 80000, /* 80 MBps */ + }, + /* Average bandwidth is unused for the next two interconnects */ + { + .name = "imem", + .peak_bandwidth = 350000, /* 350 MBps */ + .average_bandwidth = 0, /* unused */ + }, + { + .name = "config", + .peak_bandwidth = 40000, /* 40 MBps */ + .average_bandwidth = 0, /* unused */ + }, +}; + static struct ipa_clock_data ipa_clock_data = { .core_clock_rate = 75 * 1000 * 1000, /* Hz */ - /* Interconnect bandwidths are in 1000 byte/second units */ - .interconnect = { - [IPA_INTERCONNECT_MEMORY] = { - .name = "memory", - .peak_bandwidth = 600000, /* 600 MBps */ - .average_bandwidth = 80000, /* 80 MBps */ - }, - /* Average bandwidth unused for the next two interconnects */ - [IPA_INTERCONNECT_IMEM] = { - .name = "imem", - .peak_bandwidth = 350000, /* 350 MBps */ - .average_bandwidth = 0, /* unused */ - }, - [IPA_INTERCONNECT_CONFIG] = { - .name = "config", - .peak_bandwidth = 40000, /* 40 MBps */ - .average_bandwidth = 0, /* unused */ - }, - }, + .interconnect_count = ARRAY_SIZE(ipa_interconnect_data), + .interconnect_data = ipa_interconnect_data, }; /* Configuration data for the SDM845 SoC. */ diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h index d8ea6266dc6a1..b476fc373f7fe 100644 --- a/drivers/net/ipa/ipa_data.h +++ b/drivers/net/ipa/ipa_data.h @@ -258,14 +258,6 @@ struct ipa_mem_data { u32 smem_size; }; -/** enum ipa_interconnect_id - IPA interconnect identifier */ -enum ipa_interconnect_id { - IPA_INTERCONNECT_MEMORY, - IPA_INTERCONNECT_IMEM, - IPA_INTERCONNECT_CONFIG, - IPA_INTERCONNECT_COUNT, /* Last; not an interconnect */ -}; - /** * struct ipa_interconnect_data - description of IPA interconnect bandwidths * @name: Interconnect name (matches interconnect-name in DT) @@ -281,11 +273,13 @@ struct ipa_interconnect_data { /** * struct ipa_clock_data - description of IPA clock and interconnect rates * @core_clock_rate: Core clock rate (Hz) - * @interconnect: Array of interconnect bandwidth parameters + * @interconnect_count: Number of entries in the interconnect_data array + * @interconnect_data: IPA interconnect configuration data */ struct ipa_clock_data { u32 core_clock_rate; - struct ipa_interconnect_data interconnect[IPA_INTERCONNECT_COUNT]; + u32 interconnect_count; /* # entries in interconnect_data[] */ + const struct ipa_interconnect_data *interconnect_data; }; /** -- GitLab From d35c9a029a73e84d84337403d20b060494890570 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 15 Jan 2021 00:27:44 +0100 Subject: [PATCH 1023/2012] platform/x86: hp-wmi: Don't log a warning on HPWMI_RET_UNKNOWN_COMMAND errors The recently added thermal policy support makes a hp_wmi_perform_query(0x4c, ...) call on older devices which do not support thermal policies this causes the following warning to be logged (seen on a HP Stream x360 Convertible PC 11): [ 26.805305] hp_wmi: query 0x4c returned error 0x3 Error 0x3 is HPWMI_RET_UNKNOWN_COMMAND error. This commit silences the warning for unknown-command errors, silencing the new warning. Cc: Elia Devito Fixes: 81c93798ef3e ("platform/x86: hp-wmi: add support for thermal policy") Link: https://lore.kernel.org/r/20210114232744.154886-1-hdegoede@redhat.com Signed-off-by: Hans de Goede --- drivers/platform/x86/hp-wmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index ecd477964d117..18bf8aeb5f870 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -247,7 +247,8 @@ static int hp_wmi_perform_query(int query, enum hp_wmi_command command, ret = bios_return->return_code; if (ret) { - if (ret != HPWMI_RET_UNKNOWN_CMDTYPE) + if (ret != HPWMI_RET_UNKNOWN_COMMAND && + ret != HPWMI_RET_UNKNOWN_CMDTYPE) pr_warn("query 0x%x returned error 0x%x\n", query, ret); goto out_free; } -- GitLab From 173aac2fef96972e42d33c0e1189e6f756a0d719 Mon Sep 17 00:00:00 2001 From: Jeannie Stevenson Date: Fri, 15 Jan 2021 16:06:30 +0000 Subject: [PATCH 1024/2012] platform/x86: thinkpad_acpi: Add P53/73 firmware to fan_quirk_table for dual fan control This commit enables dual fan control for the new Lenovo P53 and P73 laptop models. Signed-off-by: Jeannie Stevenson Link: https://lore.kernel.org/r/Pn_Xii4XYpQRFtgkf4PbNgieE89BAkHgLI1kWIq-zFudwh2A1DY5J_DJVHK06rMW_hGPHx_mPE33gd8mg9-8BxqJTaSC6hhPqAsfZlcNGH0=@protonmail.com Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index c102657b3eb32..f3e8eca8d86d6 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -8783,6 +8783,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_Q_LNV3('N', '1', 'T', TPACPI_FAN_2CTL), /* P71 */ TPACPI_Q_LNV3('N', '1', 'U', TPACPI_FAN_2CTL), /* P51 */ TPACPI_Q_LNV3('N', '2', 'C', TPACPI_FAN_2CTL), /* P52 / P72 */ + TPACPI_Q_LNV3('N', '2', 'N', TPACPI_FAN_2CTL), /* P53 / P73 */ TPACPI_Q_LNV3('N', '2', 'E', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (1st gen) */ TPACPI_Q_LNV3('N', '2', 'O', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (2nd gen) */ TPACPI_Q_LNV3('N', '2', 'V', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (3nd gen) */ -- GitLab From 87fe04367d842c4d97a77303242d4dd4ac351e46 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Sat, 16 Jan 2021 03:39:35 +0100 Subject: [PATCH 1025/2012] net: dsa: mv88e6xxx: also read STU state in mv88e6250_g1_vtu_getnext mv88e6xxx_port_vlan_join checks whether the VTU already contains an entry for the given vid (via mv88e6xxx_vtu_getnext), and if so, merely changes the relevant .member[] element and loads the updated entry into the VTU. However, at least for the mv88e6250, the on-stack struct mv88e6xxx_vtu_entry vlan never has its .state[] array explicitly initialized, neither in mv88e6xxx_port_vlan_join() nor inside the getnext implementation. So the new entry has random garbage for the STU bits, breaking VLAN filtering. When the VTU entry is initially created, those bits are all zero, and we should make sure to keep them that way when the entry is updated. Fixes: 92307069a96c (net: dsa: mv88e6xxx: Avoid VTU corruption on 6097) Signed-off-by: Rasmus Villemoes Reviewed-by: Florian Fainelli Reviewed-by: Tobias Waldekranz Tested-by: Tobias Waldekranz Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/global1_vtu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c index 66ddf67b87371..7b96396be609e 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c @@ -351,6 +351,10 @@ int mv88e6250_g1_vtu_getnext(struct mv88e6xxx_chip *chip, if (err) return err; + err = mv88e6185_g1_stu_data_read(chip, entry); + if (err) + return err; + /* VTU DBNum[3:0] are located in VTU Operation 3:0 * VTU DBNum[5:4] are located in VTU Operation 9:8 */ -- GitLab From 92a5e1fdb286851d5bd0eb966b8d075be27cf5ee Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 18 Jan 2021 15:01:45 +0530 Subject: [PATCH 1026/2012] selftests/powerpc: Fix exit status of pkey tests Since main() does not return a value explicitly, the return values from FAIL_IF() conditions are ignored and the tests can still pass irrespective of failures. This makes sure that we always explicitly return the correct test exit status. Fixes: 1addb6444791 ("selftests/powerpc: Add test for execute-disabled pkeys") Fixes: c27f2fd1705a ("selftests/powerpc: Add test for pkey siginfo verification") Reported-by: Eirik Fuller Signed-off-by: Sandipan Das Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210118093145.10134-1-sandipan@linux.ibm.com --- tools/testing/selftests/powerpc/mm/pkey_exec_prot.c | 2 +- tools/testing/selftests/powerpc/mm/pkey_siginfo.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c index 9e5c7f3f498a7..0af4f02669a11 100644 --- a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c +++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c @@ -290,5 +290,5 @@ static int test(void) int main(void) { - test_harness(test, "pkey_exec_prot"); + return test_harness(test, "pkey_exec_prot"); } diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c index 4f815d7c12145..2db76e56d4cb9 100644 --- a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c +++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c @@ -329,5 +329,5 @@ static int test(void) int main(void) { - test_harness(test, "pkey_siginfo"); + return test_harness(test, "pkey_siginfo"); } -- GitLab From a372173bf314d374da4dd1155549d8ca7fc44709 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 14 Jan 2021 21:14:23 +0200 Subject: [PATCH 1027/2012] RDMA/cxgb4: Fix the reported max_recv_sge value The max_recv_sge value is wrongly reported when calling query_qp, This is happening due to a typo when assigning the max_recv_sge value, the value of sq_max_sges was assigned instead of rq_max_sges. Fixes: 3e5c02c9ef9a ("iw_cxgb4: Support query_qp() verb") Link: https://lore.kernel.org/r/20210114191423.423529-1-kamalheib1@gmail.com Signed-off-by: Kamal Heib Reviewed-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index a7401398cb344..d109bb3822a5f 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2474,7 +2474,7 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->cap.max_send_wr = qhp->attr.sq_num_entries; init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries; init_attr->cap.max_send_sge = qhp->attr.sq_max_sges; - init_attr->cap.max_recv_sge = qhp->attr.sq_max_sges; + init_attr->cap.max_recv_sge = qhp->attr.rq_max_sges; init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE; init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; return 0; -- GitLab From a98c0c47420412ef94d6f45f9ae607258929aa10 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 17 Jan 2021 16:09:50 +0800 Subject: [PATCH 1028/2012] net: bridge: check vlan with eth_type_vlan() method Replace some checks for ETH_P_8021Q and ETH_P_8021AD with eth_type_vlan(). Signed-off-by: Menglong Dong Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20210117080950.122761-1-dong.menglong@zte.com.cn Signed-off-by: Jakub Kicinski --- net/bridge/br_forward.c | 3 +-- net/bridge/br_netlink.c | 12 +++--------- net/bridge/br_vlan.c | 2 +- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index e28ffadd13719..6e9b049ae5218 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -39,8 +39,7 @@ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb br_drop_fake_rtable(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && - (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD))) { + eth_type_vlan(skb->protocol)) { int depth; if (!__vlan_get_protocol(skb, skb->protocol, &depth)) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 49700ce0e919e..762f273802cda 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1096,15 +1096,9 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[], return 0; #ifdef CONFIG_BRIDGE_VLAN_FILTERING - if (data[IFLA_BR_VLAN_PROTOCOL]) { - switch (nla_get_be16(data[IFLA_BR_VLAN_PROTOCOL])) { - case htons(ETH_P_8021Q): - case htons(ETH_P_8021AD): - break; - default: - return -EPROTONOSUPPORT; - } - } + if (data[IFLA_BR_VLAN_PROTOCOL] && + !eth_type_vlan(nla_get_be16(data[IFLA_BR_VLAN_PROTOCOL]))) + return -EPROTONOSUPPORT; if (data[IFLA_BR_VLAN_DEFAULT_PVID]) { __u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 701cad646b20c..bb29097385187 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -917,7 +917,7 @@ err_filt: int br_vlan_set_proto(struct net_bridge *br, unsigned long val) { - if (val != ETH_P_8021Q && val != ETH_P_8021AD) + if (!eth_type_vlan(htons(val))) return -EPROTONOSUPPORT; return __br_vlan_set_proto(br, htons(val)); -- GitLab From 505e3f00c3f3648cb6260deb35e87fae1f64f5d8 Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Thu, 14 Jan 2021 21:26:28 +0100 Subject: [PATCH 1029/2012] hv_netvsc: Add (more) validation for untrusted Hyper-V values For additional robustness in the face of Hyper-V errors or malicious behavior, validate all values that originate from packets that Hyper-V has sent to the guest. Ensure that invalid values cannot cause indexing off the end of an array, or subvert an existing validation via integer overflow. Ensure that outgoing packets do not have any leftover guest memory that has not been zeroed out. Reported-by: Juan Vazquez Signed-off-by: Andrea Parri (Microsoft) Link: https://lore.kernel.org/r/20210114202628.119541-1-parri.andrea@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc.c | 3 +- drivers/net/hyperv/netvsc_bpf.c | 6 ++ drivers/net/hyperv/netvsc_drv.c | 18 +++- drivers/net/hyperv/rndis_filter.c | 171 +++++++++++++++++++----------- 4 files changed, 136 insertions(+), 62 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 3a3db2f0134d8..6184e99c7f31f 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -918,6 +918,7 @@ static inline int netvsc_send_pkt( int ret; u32 ring_avail = hv_get_avail_to_write_percent(&out_channel->outbound); + memset(&nvmsg, 0, sizeof(struct nvsp_message)); nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT; if (skb) rpkt->channel_type = 0; /* 0 is RMC_DATA */ @@ -1337,7 +1338,7 @@ static void netvsc_send_table(struct net_device *ndev, sizeof(union nvsp_6_message_uber); /* Boundary check for all versions */ - if (offset > msglen - count * sizeof(u32)) { + if (msglen < count * sizeof(u32) || offset > msglen - count * sizeof(u32)) { netdev_err(ndev, "Received send-table offset too big:%u\n", offset); return; diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c index d60dcf6c9829e..aa877da113f8e 100644 --- a/drivers/net/hyperv/netvsc_bpf.c +++ b/drivers/net/hyperv/netvsc_bpf.c @@ -37,6 +37,12 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan, if (!prog) goto out; + /* Ensure that the below memcpy() won't overflow the page buffer. */ + if (len > ndev->mtu + ETH_HLEN) { + act = XDP_DROP; + goto out; + } + /* allocate page buffer for data */ page = alloc_page(GFP_ATOMIC); if (!page) { diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 75b4d6703cf1e..ac20c432d4d8f 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -761,6 +761,16 @@ void netvsc_linkstatus_callback(struct net_device *net, if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) { u32 speed; + /* Validate status_buf_offset */ + if (indicate->status_buflen < sizeof(speed) || + indicate->status_buf_offset < sizeof(*indicate) || + resp->msg_len - RNDIS_HEADER_SIZE < indicate->status_buf_offset || + resp->msg_len - RNDIS_HEADER_SIZE - indicate->status_buf_offset + < indicate->status_buflen) { + netdev_err(net, "invalid rndis_indicate_status packet\n"); + return; + } + speed = *(u32 *)((void *)indicate + indicate->status_buf_offset) / 10000; ndev_ctx->speed = speed; @@ -866,8 +876,14 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, */ if (csum_info && csum_info->receive.ip_checksum_value_invalid && csum_info->receive.ip_checksum_succeeded && - skb->protocol == htons(ETH_P_IP)) + skb->protocol == htons(ETH_P_IP)) { + /* Check that there is enough space to hold the IP header. */ + if (skb_headlen(skb) < sizeof(struct iphdr)) { + kfree_skb(skb); + return NULL; + } netvsc_comp_ipcsum(skb); + } /* Do L4 checksum offload if enabled and present. */ if (csum_info && (net->features & NETIF_F_RXCSUM)) { diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 598713c0d5a87..c8534b6619b8d 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -131,66 +131,84 @@ static void dump_rndis_message(struct net_device *netdev, { switch (rndis_msg->ndis_msg_type) { case RNDIS_MSG_PACKET: - netdev_dbg(netdev, "RNDIS_MSG_PACKET (len %u, " - "data offset %u data len %u, # oob %u, " - "oob offset %u, oob len %u, pkt offset %u, " - "pkt len %u\n", - rndis_msg->msg_len, - rndis_msg->msg.pkt.data_offset, - rndis_msg->msg.pkt.data_len, - rndis_msg->msg.pkt.num_oob_data_elements, - rndis_msg->msg.pkt.oob_data_offset, - rndis_msg->msg.pkt.oob_data_len, - rndis_msg->msg.pkt.per_pkt_info_offset, - rndis_msg->msg.pkt.per_pkt_info_len); + if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >= sizeof(struct rndis_packet)) { + const struct rndis_packet *pkt = &rndis_msg->msg.pkt; + netdev_dbg(netdev, "RNDIS_MSG_PACKET (len %u, " + "data offset %u data len %u, # oob %u, " + "oob offset %u, oob len %u, pkt offset %u, " + "pkt len %u\n", + rndis_msg->msg_len, + pkt->data_offset, + pkt->data_len, + pkt->num_oob_data_elements, + pkt->oob_data_offset, + pkt->oob_data_len, + pkt->per_pkt_info_offset, + pkt->per_pkt_info_len); + } break; case RNDIS_MSG_INIT_C: - netdev_dbg(netdev, "RNDIS_MSG_INIT_C " - "(len %u, id 0x%x, status 0x%x, major %d, minor %d, " - "device flags %d, max xfer size 0x%x, max pkts %u, " - "pkt aligned %u)\n", - rndis_msg->msg_len, - rndis_msg->msg.init_complete.req_id, - rndis_msg->msg.init_complete.status, - rndis_msg->msg.init_complete.major_ver, - rndis_msg->msg.init_complete.minor_ver, - rndis_msg->msg.init_complete.dev_flags, - rndis_msg->msg.init_complete.max_xfer_size, - rndis_msg->msg.init_complete. - max_pkt_per_msg, - rndis_msg->msg.init_complete. - pkt_alignment_factor); + if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >= + sizeof(struct rndis_initialize_complete)) { + const struct rndis_initialize_complete *init_complete = + &rndis_msg->msg.init_complete; + netdev_dbg(netdev, "RNDIS_MSG_INIT_C " + "(len %u, id 0x%x, status 0x%x, major %d, minor %d, " + "device flags %d, max xfer size 0x%x, max pkts %u, " + "pkt aligned %u)\n", + rndis_msg->msg_len, + init_complete->req_id, + init_complete->status, + init_complete->major_ver, + init_complete->minor_ver, + init_complete->dev_flags, + init_complete->max_xfer_size, + init_complete->max_pkt_per_msg, + init_complete->pkt_alignment_factor); + } break; case RNDIS_MSG_QUERY_C: - netdev_dbg(netdev, "RNDIS_MSG_QUERY_C " - "(len %u, id 0x%x, status 0x%x, buf len %u, " - "buf offset %u)\n", - rndis_msg->msg_len, - rndis_msg->msg.query_complete.req_id, - rndis_msg->msg.query_complete.status, - rndis_msg->msg.query_complete. - info_buflen, - rndis_msg->msg.query_complete. - info_buf_offset); + if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >= + sizeof(struct rndis_query_complete)) { + const struct rndis_query_complete *query_complete = + &rndis_msg->msg.query_complete; + netdev_dbg(netdev, "RNDIS_MSG_QUERY_C " + "(len %u, id 0x%x, status 0x%x, buf len %u, " + "buf offset %u)\n", + rndis_msg->msg_len, + query_complete->req_id, + query_complete->status, + query_complete->info_buflen, + query_complete->info_buf_offset); + } break; case RNDIS_MSG_SET_C: - netdev_dbg(netdev, - "RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n", - rndis_msg->msg_len, - rndis_msg->msg.set_complete.req_id, - rndis_msg->msg.set_complete.status); + if (rndis_msg->msg_len - RNDIS_HEADER_SIZE + sizeof(struct rndis_set_complete)) { + const struct rndis_set_complete *set_complete = + &rndis_msg->msg.set_complete; + netdev_dbg(netdev, + "RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n", + rndis_msg->msg_len, + set_complete->req_id, + set_complete->status); + } break; case RNDIS_MSG_INDICATE: - netdev_dbg(netdev, "RNDIS_MSG_INDICATE " - "(len %u, status 0x%x, buf len %u, buf offset %u)\n", - rndis_msg->msg_len, - rndis_msg->msg.indicate_status.status, - rndis_msg->msg.indicate_status.status_buflen, - rndis_msg->msg.indicate_status.status_buf_offset); + if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >= + sizeof(struct rndis_indicate_status)) { + const struct rndis_indicate_status *indicate_status = + &rndis_msg->msg.indicate_status; + netdev_dbg(netdev, "RNDIS_MSG_INDICATE " + "(len %u, status 0x%x, buf len %u, buf offset %u)\n", + rndis_msg->msg_len, + indicate_status->status, + indicate_status->status_buflen, + indicate_status->status_buf_offset); + } break; default: @@ -246,11 +264,20 @@ static void rndis_set_link_state(struct rndis_device *rdev, { u32 link_status; struct rndis_query_complete *query_complete; + u32 msg_len = request->response_msg.msg_len; + + /* Ensure the packet is big enough to access its fields */ + if (msg_len - RNDIS_HEADER_SIZE < sizeof(struct rndis_query_complete)) + return; query_complete = &request->response_msg.msg.query_complete; if (query_complete->status == RNDIS_STATUS_SUCCESS && - query_complete->info_buflen == sizeof(u32)) { + query_complete->info_buflen >= sizeof(u32) && + query_complete->info_buf_offset >= sizeof(*query_complete) && + msg_len - RNDIS_HEADER_SIZE >= query_complete->info_buf_offset && + msg_len - RNDIS_HEADER_SIZE - query_complete->info_buf_offset + >= query_complete->info_buflen) { memcpy(&link_status, (void *)((unsigned long)query_complete + query_complete->info_buf_offset), sizeof(u32)); rdev->link_state = link_status != 0; @@ -343,7 +370,8 @@ static void rndis_filter_receive_response(struct net_device *ndev, */ static inline void *rndis_get_ppi(struct net_device *ndev, struct rndis_packet *rpkt, - u32 rpkt_len, u32 type, u8 internal) + u32 rpkt_len, u32 type, u8 internal, + u32 ppi_size) { struct rndis_per_packet_info *ppi; int len; @@ -359,7 +387,8 @@ static inline void *rndis_get_ppi(struct net_device *ndev, return NULL; } - if (rpkt->per_pkt_info_len > rpkt_len - rpkt->per_pkt_info_offset) { + if (rpkt->per_pkt_info_len < sizeof(*ppi) || + rpkt->per_pkt_info_len > rpkt_len - rpkt->per_pkt_info_offset) { netdev_err(ndev, "Invalid per_pkt_info_len: %u\n", rpkt->per_pkt_info_len); return NULL; @@ -381,8 +410,15 @@ static inline void *rndis_get_ppi(struct net_device *ndev, continue; } - if (ppi->type == type && ppi->internal == internal) + if (ppi->type == type && ppi->internal == internal) { + /* ppi->size should be big enough to hold the returned object. */ + if (ppi->size - ppi->ppi_offset < ppi_size) { + netdev_err(ndev, "Invalid ppi: size %u ppi_offset %u\n", + ppi->size, ppi->ppi_offset); + continue; + } return (void *)((ulong)ppi + ppi->ppi_offset); + } len -= ppi->size; ppi = (struct rndis_per_packet_info *)((ulong)ppi + ppi->size); } @@ -461,13 +497,16 @@ static int rndis_filter_receive_data(struct net_device *ndev, return NVSP_STAT_FAIL; } - vlan = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, IEEE_8021Q_INFO, 0); + vlan = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, IEEE_8021Q_INFO, 0, sizeof(*vlan)); - csum_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, TCPIP_CHKSUM_PKTINFO, 0); + csum_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, TCPIP_CHKSUM_PKTINFO, 0, + sizeof(*csum_info)); - hash_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, NBL_HASH_VALUE, 0); + hash_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, NBL_HASH_VALUE, 0, + sizeof(*hash_info)); - pktinfo_id = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, RNDIS_PKTINFO_ID, 1); + pktinfo_id = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, RNDIS_PKTINFO_ID, 1, + sizeof(*pktinfo_id)); data = (void *)msg + data_offset; @@ -522,9 +561,6 @@ int rndis_filter_receive(struct net_device *ndev, struct net_device_context *net_device_ctx = netdev_priv(ndev); struct rndis_message *rndis_msg = data; - if (netif_msg_rx_status(net_device_ctx)) - dump_rndis_message(ndev, rndis_msg); - /* Validate incoming rndis_message packet */ if (buflen < RNDIS_HEADER_SIZE || rndis_msg->msg_len < RNDIS_HEADER_SIZE || buflen < rndis_msg->msg_len) { @@ -533,6 +569,9 @@ int rndis_filter_receive(struct net_device *ndev, return NVSP_STAT_FAIL; } + if (netif_msg_rx_status(net_device_ctx)) + dump_rndis_message(ndev, rndis_msg); + switch (rndis_msg->ndis_msg_type) { case RNDIS_MSG_PACKET: return rndis_filter_receive_data(ndev, net_dev, nvchan, @@ -567,6 +606,7 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 inresult_size = *result_size; struct rndis_query_request *query; struct rndis_query_complete *query_complete; + u32 msg_len; int ret = 0; if (!result) @@ -634,8 +674,19 @@ static int rndis_filter_query_device(struct rndis_device *dev, /* Copy the response back */ query_complete = &request->response_msg.msg.query_complete; + msg_len = request->response_msg.msg_len; + + /* Ensure the packet is big enough to access its fields */ + if (msg_len - RNDIS_HEADER_SIZE < sizeof(struct rndis_query_complete)) { + ret = -1; + goto cleanup; + } - if (query_complete->info_buflen > inresult_size) { + if (query_complete->info_buflen > inresult_size || + query_complete->info_buf_offset < sizeof(*query_complete) || + msg_len - RNDIS_HEADER_SIZE < query_complete->info_buf_offset || + msg_len - RNDIS_HEADER_SIZE - query_complete->info_buf_offset + < query_complete->info_buflen) { ret = -1; goto cleanup; } -- GitLab From a826b04303a40d52439aa141035fca5654ccaccd Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 15 Jan 2021 19:42:08 +0100 Subject: [PATCH 1030/2012] ipv6: create multicast route with RTPROT_KERNEL The ff00::/8 multicast route is created without specifying the fc_protocol field, so the default RTPROT_BOOT value is used: $ ip -6 -d route unicast ::1 dev lo proto kernel scope global metric 256 pref medium unicast fe80::/64 dev eth0 proto kernel scope global metric 256 pref medium unicast ff00::/8 dev eth0 proto boot scope global metric 256 pref medium As the documentation says, this value identifies routes installed during boot, but the route is created when interface is set up. Change the value to RTPROT_KERNEL which is a better value. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Matteo Croce Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index eff2cacd52093..19bf6822911cd 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2469,6 +2469,7 @@ static void addrconf_add_mroute(struct net_device *dev) .fc_flags = RTF_UP, .fc_type = RTN_UNICAST, .fc_nlinfo.nl_net = dev_net(dev), + .fc_protocol = RTPROT_KERNEL, }; ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); -- GitLab From ceed9038b2783d14e0422bdc6fd04f70580efb4c Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Fri, 15 Jan 2021 19:42:09 +0100 Subject: [PATCH 1031/2012] ipv6: set multicast flag on the multicast route The multicast route ff00::/8 is created with type RTN_UNICAST: $ ip -6 -d route unicast ::1 dev lo proto kernel scope global metric 256 pref medium unicast fe80::/64 dev eth0 proto kernel scope global metric 256 pref medium unicast ff00::/8 dev eth0 proto kernel scope global metric 256 pref medium Set the type to RTN_MULTICAST which is more appropriate. Fixes: e8478e80e5a7 ("net/ipv6: Save route type in rt6_info") Signed-off-by: Matteo Croce Signed-off-by: Jakub Kicinski --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 19bf6822911cd..9edc5bb2d531a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2467,7 +2467,7 @@ static void addrconf_add_mroute(struct net_device *dev) .fc_ifindex = dev->ifindex, .fc_dst_len = 8, .fc_flags = RTF_UP, - .fc_type = RTN_UNICAST, + .fc_type = RTN_MULTICAST, .fc_nlinfo.nl_net = dev_net(dev), .fc_protocol = RTPROT_KERNEL, }; -- GitLab From cb2c57112432d5c77f97ea6320973d02beebf3ee Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 15 Jan 2021 17:47:45 +0800 Subject: [PATCH 1032/2012] vxlan: add NETIF_F_FRAGLIST flag for dev features Some protocol HW GSO requires fraglist supported by the device, like SCTP. Without NETIF_F_FRAGLIST set in the dev features of vxlan, it would have to do SW GSO before the packets enter the driver, even when the vxlan dev and lower dev (like veth) both have the feature of NETIF_F_GSO_SCTP. So this patch is to add it for vxlan. Signed-off-by: Xin Long Signed-off-by: Jakub Kicinski --- drivers/net/vxlan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index b9364433de8f9..3929e437382b9 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3283,12 +3283,13 @@ static void vxlan_setup(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &vxlan_type); dev->features |= NETIF_F_LLTX; - dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; dev->features |= NETIF_F_RXCSUM; dev->features |= NETIF_F_GSO_SOFTWARE; dev->vlan_features = dev->features; - dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; + dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; + dev->hw_features |= NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; netif_keep_dst(dev); dev->priv_flags |= IFF_NO_QUEUE; -- GitLab From 18423e1a9d7d72c84f04e7f5fa31070855966ea7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 15 Jan 2021 17:47:46 +0800 Subject: [PATCH 1033/2012] geneve: add NETIF_F_FRAGLIST flag for dev features Some protocol HW GSO requires fraglist supported by the device, like SCTP. Without NETIF_F_FRAGLIST set in the dev features of geneve, it would have to do SW GSO before the packets enter the driver, even when the geneve dev and lower dev (like veth) both have the feature of NETIF_F_GSO_SCTP. So this patch is to add it for geneve. Signed-off-by: Xin Long Signed-off-by: Jakub Kicinski --- drivers/net/geneve.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 6aa775d60c575..4ac0373326efd 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1197,11 +1197,12 @@ static void geneve_setup(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &geneve_type); dev->features |= NETIF_F_LLTX; - dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; dev->features |= NETIF_F_RXCSUM; dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; + dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; + dev->hw_features |= NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; /* MTU range: 68 - (something less than 65535) */ -- GitLab From 3224dcfd850fccf92e20e55ff74a7bd079458ba8 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 15 Jan 2021 17:47:47 +0800 Subject: [PATCH 1034/2012] bareudp: add NETIF_F_FRAGLIST flag for dev features Like vxlan and geneve, bareudp also needs this dev feature to support some protocol's HW GSO. Signed-off-by: Xin Long Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 0965d136def3a..1b8f59713fc7d 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -532,11 +532,12 @@ static void bareudp_setup(struct net_device *dev) dev->netdev_ops = &bareudp_netdev_ops; dev->needs_free_netdev = true; SET_NETDEV_DEVTYPE(dev, &bareudp_type); - dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; dev->features |= NETIF_F_RXCSUM; dev->features |= NETIF_F_LLTX; dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; + dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; + dev->hw_features |= NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; dev->hard_header_len = 0; dev->addr_len = 0; -- GitLab From 9d9b1ee0b2d1c9e02b2338c4a4b0a062d2d3edac Mon Sep 17 00:00:00 2001 From: Enke Chen Date: Fri, 15 Jan 2021 14:30:58 -0800 Subject: [PATCH 1035/2012] tcp: fix TCP_USER_TIMEOUT with zero window The TCP session does not terminate with TCP_USER_TIMEOUT when data remain untransmitted due to zero window. The number of unanswered zero-window probes (tcp_probes_out) is reset to zero with incoming acks irrespective of the window size, as described in tcp_probe_timer(): RFC 1122 4.2.2.17 requires the sender to stay open indefinitely as long as the receiver continues to respond probes. We support this by default and reset icsk_probes_out with incoming ACKs. This counter, however, is the wrong one to be used in calculating the duration that the window remains closed and data remain untransmitted. Thanks to Jonathan Maxwell for diagnosing the actual issue. In this patch a new timestamp is introduced for the socket in order to track the elapsed time for the zero-window probes that have not been answered with any non-zero window ack. Fixes: 9721e709fa68 ("tcp: simplify window probe aborting on USER_TIMEOUT") Reported-by: William McCall Co-developed-by: Neal Cardwell Signed-off-by: Neal Cardwell Signed-off-by: Enke Chen Reviewed-by: Yuchung Cheng Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20210115223058.GA39267@localhost.localdomain Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 3 +++ net/ipv4/inet_connection_sock.c | 1 + net/ipv4/tcp.c | 1 + net/ipv4/tcp_input.c | 1 + net/ipv4/tcp_output.c | 1 + net/ipv4/tcp_timer.c | 14 +++++++------- 6 files changed, 14 insertions(+), 7 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 7338b3865a2a3..111d7771b2081 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -76,6 +76,8 @@ struct inet_connection_sock_af_ops { * @icsk_ext_hdr_len: Network protocol overhead (IP/IPv6 options) * @icsk_ack: Delayed ACK control data * @icsk_mtup; MTU probing control data + * @icsk_probes_tstamp: Probe timestamp (cleared by non-zero window ack) + * @icsk_user_timeout: TCP_USER_TIMEOUT value */ struct inet_connection_sock { /* inet_sock has to be the first member! */ @@ -129,6 +131,7 @@ struct inet_connection_sock { u32 probe_timestamp; } icsk_mtup; + u32 icsk_probes_tstamp; u32 icsk_user_timeout; u64 icsk_ca_priv[104 / sizeof(u64)]; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index fd8b8800a2c30..6bd7ca09af03d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -851,6 +851,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, newicsk->icsk_retransmits = 0; newicsk->icsk_backoff = 0; newicsk->icsk_probes_out = 0; + newicsk->icsk_probes_tstamp = 0; /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ed42d2193c5c7..32545ecf2ab10 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2937,6 +2937,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_backoff = 0; icsk->icsk_probes_out = 0; + icsk->icsk_probes_tstamp = 0; icsk->icsk_rto = TCP_TIMEOUT_INIT; icsk->icsk_rto_min = TCP_RTO_MIN; icsk->icsk_delack_max = TCP_DELACK_MAX; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c7e16b0ed791f..bafcab75f4256 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3384,6 +3384,7 @@ static void tcp_ack_probe(struct sock *sk) return; if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) { icsk->icsk_backoff = 0; + icsk->icsk_probes_tstamp = 0; inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); /* Socket must be waked up by subsequent tcp_data_snd_check(). * This function is not for random using! diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f322e798a3519..ab458697881ed 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -4084,6 +4084,7 @@ void tcp_send_probe0(struct sock *sk) /* Cancel probe timer, if it is not required. */ icsk->icsk_probes_out = 0; icsk->icsk_backoff = 0; + icsk->icsk_probes_tstamp = 0; return; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 6c62b9ea1320d..454732ecc8f33 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -349,6 +349,7 @@ static void tcp_probe_timer(struct sock *sk) if (tp->packets_out || !skb) { icsk->icsk_probes_out = 0; + icsk->icsk_probes_tstamp = 0; return; } @@ -360,13 +361,12 @@ static void tcp_probe_timer(struct sock *sk) * corresponding system limit. We also implement similar policy when * we use RTO to probe window in tcp_retransmit_timer(). */ - if (icsk->icsk_user_timeout) { - u32 elapsed = tcp_model_timeout(sk, icsk->icsk_probes_out, - tcp_probe0_base(sk)); - - if (elapsed >= icsk->icsk_user_timeout) - goto abort; - } + if (!icsk->icsk_probes_tstamp) + icsk->icsk_probes_tstamp = tcp_jiffies32; + else if (icsk->icsk_user_timeout && + (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >= + msecs_to_jiffies(icsk->icsk_user_timeout)) + goto abort; max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { -- GitLab From 6ea9309acc2840f8f3fd4d9706f228af6fc45700 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 15 Jan 2021 15:53:46 -0800 Subject: [PATCH 1036/2012] net: phy: national: remove definition of DEBUG Defining DEBUG should only be done in development. So remove DEBUG. Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20210115235346.289611-1-trix@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/national.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/phy/national.c b/drivers/net/phy/national.c index 5a8c8eb185826..46160baaafe3a 100644 --- a/drivers/net/phy/national.c +++ b/drivers/net/phy/national.c @@ -19,8 +19,6 @@ #include #include -#define DEBUG - /* DP83865 phy identifier values */ #define DP83865_PHY_ID 0x20005c7a -- GitLab From d349f997686887906b1183b5be96933c5452362a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 16 Jan 2021 16:56:57 -0800 Subject: [PATCH 1037/2012] net_sched: fix RTNL deadlock again caused by request_module() tcf_action_init_1() loads tc action modules automatically with request_module() after parsing the tc action names, and it drops RTNL lock and re-holds it before and after request_module(). This causes a lot of troubles, as discovered by syzbot, because we can be in the middle of batch initializations when we create an array of tc actions. One of the problem is deadlock: CPU 0 CPU 1 rtnl_lock(); for (...) { tcf_action_init_1(); -> rtnl_unlock(); -> request_module(); rtnl_lock(); for (...) { tcf_action_init_1(); -> tcf_idr_check_alloc(); // Insert one action into idr, // but it is not committed until // tcf_idr_insert_many(), then drop // the RTNL lock in the _next_ // iteration -> rtnl_unlock(); -> rtnl_lock(); -> a_o->init(); -> tcf_idr_check_alloc(); // Now waiting for the same index // to be committed -> request_module(); -> rtnl_lock() // Now waiting for RTNL lock } rtnl_unlock(); } rtnl_unlock(); This is not easy to solve, we can move the request_module() before this loop and pre-load all the modules we need for this netlink message and then do the rest initializations. So the loop breaks down to two now: for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { struct tc_action_ops *a_o; a_o = tc_action_load_ops(name, tb[i]...); ops[i - 1] = a_o; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { act = tcf_action_init_1(ops[i - 1]...); } Although this looks serious, it only has been reported by syzbot, so it seems hard to trigger this by humans. And given the size of this patch, I'd suggest to make it to net-next and not to backport to stable. This patch has been tested by syzbot and tested with tdc.py by me. Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together") Reported-and-tested-by: syzbot+82752bc5331601cf4899@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+b3b63b6bff456bd95294@syzkaller.appspotmail.com Reported-by: syzbot+ba67b12b1ca729912834@syzkaller.appspotmail.com Cc: Jiri Pirko Signed-off-by: Cong Wang Tested-by: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20210117005657.14810-1-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski --- include/net/act_api.h | 5 +- net/sched/act_api.c | 104 +++++++++++++++++++++++++++--------------- net/sched/cls_api.c | 11 ++++- 3 files changed, 79 insertions(+), 41 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 55dab604861fe..761c0e331915f 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -186,10 +186,13 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, struct tc_action *actions[], size_t *attr_size, bool rtnl_held, struct netlink_ext_ack *extack); +struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, + bool rtnl_held, + struct netlink_ext_ack *extack); struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - bool rtnl_held, + struct tc_action_ops *ops, bool rtnl_held, struct netlink_ext_ack *extack); int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind, int ref, bool terse); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2e85b636b27bd..4dd235ce9a072 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -928,19 +928,13 @@ static void tcf_idr_insert_many(struct tc_action *actions[]) } } -struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, - struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind, - bool rtnl_held, - struct netlink_ext_ack *extack) +struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, + bool rtnl_held, + struct netlink_ext_ack *extack) { - struct nla_bitfield32 flags = { 0, 0 }; - u8 hw_stats = TCA_ACT_HW_STATS_ANY; - struct tc_action *a; + struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action_ops *a_o; - struct tc_cookie *cookie = NULL; char act_name[IFNAMSIZ]; - struct nlattr *tb[TCA_ACT_MAX + 1]; struct nlattr *kind; int err; @@ -948,33 +942,21 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) - goto err_out; + return ERR_PTR(err); err = -EINVAL; kind = tb[TCA_ACT_KIND]; if (!kind) { NL_SET_ERR_MSG(extack, "TC action kind must be specified"); - goto err_out; + return ERR_PTR(err); } if (nla_strscpy(act_name, kind, IFNAMSIZ) < 0) { NL_SET_ERR_MSG(extack, "TC action name too long"); - goto err_out; - } - if (tb[TCA_ACT_COOKIE]) { - cookie = nla_memdup_cookie(tb); - if (!cookie) { - NL_SET_ERR_MSG(extack, "No memory to generate TC cookie"); - err = -ENOMEM; - goto err_out; - } + return ERR_PTR(err); } - hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); - if (tb[TCA_ACT_FLAGS]) - flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); } else { if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) { NL_SET_ERR_MSG(extack, "TC action name too long"); - err = -EINVAL; - goto err_out; + return ERR_PTR(-EINVAL); } } @@ -996,24 +978,56 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, * indicate this using -EAGAIN. */ if (a_o != NULL) { - err = -EAGAIN; - goto err_mod; + module_put(a_o->owner); + return ERR_PTR(-EAGAIN); } #endif NL_SET_ERR_MSG(extack, "Failed to load TC action module"); - err = -ENOENT; - goto err_free; + return ERR_PTR(-ENOENT); } + return a_o; +} + +struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, + struct nlattr *nla, struct nlattr *est, + char *name, int ovr, int bind, + struct tc_action_ops *a_o, bool rtnl_held, + struct netlink_ext_ack *extack) +{ + struct nla_bitfield32 flags = { 0, 0 }; + u8 hw_stats = TCA_ACT_HW_STATS_ANY; + struct nlattr *tb[TCA_ACT_MAX + 1]; + struct tc_cookie *cookie = NULL; + struct tc_action *a; + int err; + /* backward compatibility for policer */ - if (name == NULL) + if (name == NULL) { + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, + tcf_action_policy, extack); + if (err < 0) + return ERR_PTR(err); + if (tb[TCA_ACT_COOKIE]) { + cookie = nla_memdup_cookie(tb); + if (!cookie) { + NL_SET_ERR_MSG(extack, "No memory to generate TC cookie"); + err = -ENOMEM; + goto err_out; + } + } + hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); + if (tb[TCA_ACT_FLAGS]) + flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); + err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind, rtnl_held, tp, flags.value, extack); - else + } else { err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held, tp, flags.value, extack); + } if (err < 0) - goto err_mod; + goto err_out; if (!name && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(&a->act_cookie, cookie); @@ -1030,14 +1044,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, return a; -err_mod: - module_put(a_o->owner); -err_free: +err_out: if (cookie) { kfree(cookie->data); kfree(cookie); } -err_out: return ERR_PTR(err); } @@ -1048,6 +1059,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct tc_action *actions[], size_t *attr_size, bool rtnl_held, struct netlink_ext_ack *extack) { + struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {}; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; size_t sz = 0; @@ -1059,9 +1071,20 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, if (err < 0) return err; + for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { + struct tc_action_ops *a_o; + + a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack); + if (IS_ERR(a_o)) { + err = PTR_ERR(a_o); + goto err_mod; + } + ops[i - 1] = a_o; + } + for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind, - rtnl_held, extack); + ops[i - 1], rtnl_held, extack); if (IS_ERR(act)) { err = PTR_ERR(act); goto err; @@ -1081,6 +1104,11 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, err: tcf_action_destroy(actions, bind); +err_mod: + for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { + if (ops[i]) + module_put(ops[i]->owner); + } return err; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 37b77bd309746..a67c66a512a49 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3043,12 +3043,19 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, size_t attr_size = 0; if (exts->police && tb[exts->police]) { + struct tc_action_ops *a_o; + + a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack); + if (IS_ERR(a_o)) + return PTR_ERR(a_o); act = tcf_action_init_1(net, tp, tb[exts->police], rate_tlv, "police", ovr, - TCA_ACT_BIND, rtnl_held, + TCA_ACT_BIND, a_o, rtnl_held, extack); - if (IS_ERR(act)) + if (IS_ERR(act)) { + module_put(a_o->owner); return PTR_ERR(act); + } act->type = exts->type = TCA_OLD_COMPAT; exts->actions[0] = act; -- GitLab From 41fb4c1ba7478fe34c7e094e124e4ee4513b9763 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 17 Jan 2021 09:15:42 +0100 Subject: [PATCH 1038/2012] net/qla3xxx: switch from 'pci_' to 'dma_' API The wrappers in include/linux/pci-dma-compat.h should go away. The patch has been generated with the coccinelle script below and has been hand modified to replace GFP_ with a correct flag. It has been compile tested. When memory is allocated in 'ql_alloc_net_req_rsp_queues()' GFP_KERNEL can be used because it is only called from 'ql_alloc_mem_resources()' which already calls 'ql_alloc_buffer_queues()' which uses GFP_KERNEL. (see below) When memory is allocated in 'ql_alloc_buffer_queues()' GFP_KERNEL can be used because this flag is already used just a few line above. When memory is allocated in 'ql_alloc_small_buffers()' GFP_KERNEL can be used because it is only called from 'ql_alloc_mem_resources()' which already calls 'ql_alloc_buffer_queues()' which uses GFP_KERNEL. (see above) When memory is allocated in 'ql_alloc_mem_resources()' GFP_KERNEL can be used because this function already calls 'ql_alloc_buffer_queues()' which uses GFP_KERNEL. (see above) While at it, use 'dma_set_mask_and_coherent()' instead of 'dma_set_mask()/ dma_set_coherent_mask()' in order to slightly simplify code. @@ @@ - PCI_DMA_BIDIRECTIONAL + DMA_BIDIRECTIONAL @@ @@ - PCI_DMA_TODEVICE + DMA_TO_DEVICE @@ @@ - PCI_DMA_FROMDEVICE + DMA_FROM_DEVICE @@ @@ - PCI_DMA_NONE + DMA_NONE @@ expression e1, e2, e3; @@ - pci_alloc_consistent(e1, e2, e3) + dma_alloc_coherent(&e1->dev, e2, e3, GFP_) @@ expression e1, e2, e3; @@ - pci_zalloc_consistent(e1, e2, e3) + dma_alloc_coherent(&e1->dev, e2, e3, GFP_) @@ expression e1, e2, e3, e4; @@ - pci_free_consistent(e1, e2, e3, e4) + dma_free_coherent(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_map_single(e1, e2, e3, e4) + dma_map_single(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_unmap_single(e1, e2, e3, e4) + dma_unmap_single(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4, e5; @@ - pci_map_page(e1, e2, e3, e4, e5) + dma_map_page(&e1->dev, e2, e3, e4, e5) @@ expression e1, e2, e3, e4; @@ - pci_unmap_page(e1, e2, e3, e4) + dma_unmap_page(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_map_sg(e1, e2, e3, e4) + dma_map_sg(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_unmap_sg(e1, e2, e3, e4) + dma_unmap_sg(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_single_for_cpu(e1, e2, e3, e4) + dma_sync_single_for_cpu(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_single_for_device(e1, e2, e3, e4) + dma_sync_single_for_device(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_sg_for_cpu(e1, e2, e3, e4) + dma_sync_sg_for_cpu(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_sg_for_device(e1, e2, e3, e4) + dma_sync_sg_for_device(&e1->dev, e2, e3, e4) @@ expression e1, e2; @@ - pci_dma_mapping_error(e1, e2) + dma_mapping_error(&e1->dev, e2) @@ expression e1, e2; @@ - pci_set_dma_mask(e1, e2) + dma_set_mask(&e1->dev, e2) @@ expression e1, e2; @@ - pci_set_consistent_dma_mask(e1, e2) + dma_set_coherent_mask(&e1->dev, e2) Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20210117081542.560021-1-christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qla3xxx.c | 196 ++++++++++++-------------- 1 file changed, 87 insertions(+), 109 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 27740c027681b..214e347097a7a 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -315,12 +315,11 @@ static void ql_release_to_lrg_buf_free_list(struct ql3_adapter *qdev, * buffer */ skb_reserve(lrg_buf_cb->skb, QL_HEADER_SPACE); - map = pci_map_single(qdev->pdev, + map = dma_map_single(&qdev->pdev->dev, lrg_buf_cb->skb->data, - qdev->lrg_buffer_len - - QL_HEADER_SPACE, - PCI_DMA_FROMDEVICE); - err = pci_dma_mapping_error(qdev->pdev, map); + qdev->lrg_buffer_len - QL_HEADER_SPACE, + DMA_FROM_DEVICE); + err = dma_mapping_error(&qdev->pdev->dev, map); if (err) { netdev_err(qdev->ndev, "PCI mapping failed with error: %d\n", @@ -1802,13 +1801,12 @@ static int ql_populate_free_queue(struct ql3_adapter *qdev) * first buffer */ skb_reserve(lrg_buf_cb->skb, QL_HEADER_SPACE); - map = pci_map_single(qdev->pdev, + map = dma_map_single(&qdev->pdev->dev, lrg_buf_cb->skb->data, - qdev->lrg_buffer_len - - QL_HEADER_SPACE, - PCI_DMA_FROMDEVICE); + qdev->lrg_buffer_len - QL_HEADER_SPACE, + DMA_FROM_DEVICE); - err = pci_dma_mapping_error(qdev->pdev, map); + err = dma_mapping_error(&qdev->pdev->dev, map); if (err) { netdev_err(qdev->ndev, "PCI mapping failed with error: %d\n", @@ -1943,18 +1941,16 @@ static void ql_process_mac_tx_intr(struct ql3_adapter *qdev, goto invalid_seg_count; } - pci_unmap_single(qdev->pdev, + dma_unmap_single(&qdev->pdev->dev, dma_unmap_addr(&tx_cb->map[0], mapaddr), - dma_unmap_len(&tx_cb->map[0], maplen), - PCI_DMA_TODEVICE); + dma_unmap_len(&tx_cb->map[0], maplen), DMA_TO_DEVICE); tx_cb->seg_count--; if (tx_cb->seg_count) { for (i = 1; i < tx_cb->seg_count; i++) { - pci_unmap_page(qdev->pdev, - dma_unmap_addr(&tx_cb->map[i], - mapaddr), + dma_unmap_page(&qdev->pdev->dev, + dma_unmap_addr(&tx_cb->map[i], mapaddr), dma_unmap_len(&tx_cb->map[i], maplen), - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); } } qdev->ndev->stats.tx_packets++; @@ -2021,10 +2017,9 @@ static void ql_process_mac_rx_intr(struct ql3_adapter *qdev, qdev->ndev->stats.rx_bytes += length; skb_put(skb, length); - pci_unmap_single(qdev->pdev, + dma_unmap_single(&qdev->pdev->dev, dma_unmap_addr(lrg_buf_cb2, mapaddr), - dma_unmap_len(lrg_buf_cb2, maplen), - PCI_DMA_FROMDEVICE); + dma_unmap_len(lrg_buf_cb2, maplen), DMA_FROM_DEVICE); prefetch(skb->data); skb_checksum_none_assert(skb); skb->protocol = eth_type_trans(skb, qdev->ndev); @@ -2067,10 +2062,9 @@ static void ql_process_macip_rx_intr(struct ql3_adapter *qdev, skb2 = lrg_buf_cb2->skb; skb_put(skb2, length); /* Just the second buffer length here. */ - pci_unmap_single(qdev->pdev, + dma_unmap_single(&qdev->pdev->dev, dma_unmap_addr(lrg_buf_cb2, mapaddr), - dma_unmap_len(lrg_buf_cb2, maplen), - PCI_DMA_FROMDEVICE); + dma_unmap_len(lrg_buf_cb2, maplen), DMA_FROM_DEVICE); prefetch(skb2->data); skb_checksum_none_assert(skb2); @@ -2319,9 +2313,9 @@ static int ql_send_map(struct ql3_adapter *qdev, /* * Map the skb buffer first. */ - map = pci_map_single(qdev->pdev, skb->data, len, PCI_DMA_TODEVICE); + map = dma_map_single(&qdev->pdev->dev, skb->data, len, DMA_TO_DEVICE); - err = pci_dma_mapping_error(qdev->pdev, map); + err = dma_mapping_error(&qdev->pdev->dev, map); if (err) { netdev_err(qdev->ndev, "PCI mapping failed with error: %d\n", err); @@ -2357,11 +2351,11 @@ static int ql_send_map(struct ql3_adapter *qdev, (seg == 7 && seg_cnt > 8) || (seg == 12 && seg_cnt > 13) || (seg == 17 && seg_cnt > 18)) { - map = pci_map_single(qdev->pdev, oal, + map = dma_map_single(&qdev->pdev->dev, oal, sizeof(struct oal), - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); - err = pci_dma_mapping_error(qdev->pdev, map); + err = dma_mapping_error(&qdev->pdev->dev, map); if (err) { netdev_err(qdev->ndev, "PCI mapping outbound address list with error: %d\n", @@ -2423,24 +2417,24 @@ map_error: (seg == 7 && seg_cnt > 8) || (seg == 12 && seg_cnt > 13) || (seg == 17 && seg_cnt > 18)) { - pci_unmap_single(qdev->pdev, - dma_unmap_addr(&tx_cb->map[seg], mapaddr), - dma_unmap_len(&tx_cb->map[seg], maplen), - PCI_DMA_TODEVICE); + dma_unmap_single(&qdev->pdev->dev, + dma_unmap_addr(&tx_cb->map[seg], mapaddr), + dma_unmap_len(&tx_cb->map[seg], maplen), + DMA_TO_DEVICE); oal++; seg++; } - pci_unmap_page(qdev->pdev, + dma_unmap_page(&qdev->pdev->dev, dma_unmap_addr(&tx_cb->map[seg], mapaddr), dma_unmap_len(&tx_cb->map[seg], maplen), - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); } - pci_unmap_single(qdev->pdev, + dma_unmap_single(&qdev->pdev->dev, dma_unmap_addr(&tx_cb->map[0], mapaddr), dma_unmap_addr(&tx_cb->map[0], maplen), - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); return NETDEV_TX_BUSY; @@ -2525,9 +2519,8 @@ static int ql_alloc_net_req_rsp_queues(struct ql3_adapter *qdev) wmb(); qdev->req_q_virt_addr = - pci_alloc_consistent(qdev->pdev, - (size_t) qdev->req_q_size, - &qdev->req_q_phy_addr); + dma_alloc_coherent(&qdev->pdev->dev, (size_t)qdev->req_q_size, + &qdev->req_q_phy_addr, GFP_KERNEL); if ((qdev->req_q_virt_addr == NULL) || LS_64BITS(qdev->req_q_phy_addr) & (qdev->req_q_size - 1)) { @@ -2536,16 +2529,14 @@ static int ql_alloc_net_req_rsp_queues(struct ql3_adapter *qdev) } qdev->rsp_q_virt_addr = - pci_alloc_consistent(qdev->pdev, - (size_t) qdev->rsp_q_size, - &qdev->rsp_q_phy_addr); + dma_alloc_coherent(&qdev->pdev->dev, (size_t)qdev->rsp_q_size, + &qdev->rsp_q_phy_addr, GFP_KERNEL); if ((qdev->rsp_q_virt_addr == NULL) || LS_64BITS(qdev->rsp_q_phy_addr) & (qdev->rsp_q_size - 1)) { netdev_err(qdev->ndev, "rspQ allocation failed\n"); - pci_free_consistent(qdev->pdev, (size_t) qdev->req_q_size, - qdev->req_q_virt_addr, - qdev->req_q_phy_addr); + dma_free_coherent(&qdev->pdev->dev, (size_t)qdev->req_q_size, + qdev->req_q_virt_addr, qdev->req_q_phy_addr); return -ENOMEM; } @@ -2561,15 +2552,13 @@ static void ql_free_net_req_rsp_queues(struct ql3_adapter *qdev) return; } - pci_free_consistent(qdev->pdev, - qdev->req_q_size, - qdev->req_q_virt_addr, qdev->req_q_phy_addr); + dma_free_coherent(&qdev->pdev->dev, qdev->req_q_size, + qdev->req_q_virt_addr, qdev->req_q_phy_addr); qdev->req_q_virt_addr = NULL; - pci_free_consistent(qdev->pdev, - qdev->rsp_q_size, - qdev->rsp_q_virt_addr, qdev->rsp_q_phy_addr); + dma_free_coherent(&qdev->pdev->dev, qdev->rsp_q_size, + qdev->rsp_q_virt_addr, qdev->rsp_q_phy_addr); qdev->rsp_q_virt_addr = NULL; @@ -2593,9 +2582,9 @@ static int ql_alloc_buffer_queues(struct ql3_adapter *qdev) return -ENOMEM; qdev->lrg_buf_q_alloc_virt_addr = - pci_alloc_consistent(qdev->pdev, - qdev->lrg_buf_q_alloc_size, - &qdev->lrg_buf_q_alloc_phy_addr); + dma_alloc_coherent(&qdev->pdev->dev, + qdev->lrg_buf_q_alloc_size, + &qdev->lrg_buf_q_alloc_phy_addr, GFP_KERNEL); if (qdev->lrg_buf_q_alloc_virt_addr == NULL) { netdev_err(qdev->ndev, "lBufQ failed\n"); @@ -2613,15 +2602,16 @@ static int ql_alloc_buffer_queues(struct ql3_adapter *qdev) qdev->small_buf_q_alloc_size = qdev->small_buf_q_size * 2; qdev->small_buf_q_alloc_virt_addr = - pci_alloc_consistent(qdev->pdev, - qdev->small_buf_q_alloc_size, - &qdev->small_buf_q_alloc_phy_addr); + dma_alloc_coherent(&qdev->pdev->dev, + qdev->small_buf_q_alloc_size, + &qdev->small_buf_q_alloc_phy_addr, GFP_KERNEL); if (qdev->small_buf_q_alloc_virt_addr == NULL) { netdev_err(qdev->ndev, "Small Buffer Queue allocation failed\n"); - pci_free_consistent(qdev->pdev, qdev->lrg_buf_q_alloc_size, - qdev->lrg_buf_q_alloc_virt_addr, - qdev->lrg_buf_q_alloc_phy_addr); + dma_free_coherent(&qdev->pdev->dev, + qdev->lrg_buf_q_alloc_size, + qdev->lrg_buf_q_alloc_virt_addr, + qdev->lrg_buf_q_alloc_phy_addr); return -ENOMEM; } @@ -2638,17 +2628,15 @@ static void ql_free_buffer_queues(struct ql3_adapter *qdev) return; } kfree(qdev->lrg_buf); - pci_free_consistent(qdev->pdev, - qdev->lrg_buf_q_alloc_size, - qdev->lrg_buf_q_alloc_virt_addr, - qdev->lrg_buf_q_alloc_phy_addr); + dma_free_coherent(&qdev->pdev->dev, qdev->lrg_buf_q_alloc_size, + qdev->lrg_buf_q_alloc_virt_addr, + qdev->lrg_buf_q_alloc_phy_addr); qdev->lrg_buf_q_virt_addr = NULL; - pci_free_consistent(qdev->pdev, - qdev->small_buf_q_alloc_size, - qdev->small_buf_q_alloc_virt_addr, - qdev->small_buf_q_alloc_phy_addr); + dma_free_coherent(&qdev->pdev->dev, qdev->small_buf_q_alloc_size, + qdev->small_buf_q_alloc_virt_addr, + qdev->small_buf_q_alloc_phy_addr); qdev->small_buf_q_virt_addr = NULL; @@ -2666,9 +2654,9 @@ static int ql_alloc_small_buffers(struct ql3_adapter *qdev) QL_SMALL_BUFFER_SIZE); qdev->small_buf_virt_addr = - pci_alloc_consistent(qdev->pdev, - qdev->small_buf_total_size, - &qdev->small_buf_phy_addr); + dma_alloc_coherent(&qdev->pdev->dev, + qdev->small_buf_total_size, + &qdev->small_buf_phy_addr, GFP_KERNEL); if (qdev->small_buf_virt_addr == NULL) { netdev_err(qdev->ndev, "Failed to get small buffer memory\n"); @@ -2701,10 +2689,10 @@ static void ql_free_small_buffers(struct ql3_adapter *qdev) return; } if (qdev->small_buf_virt_addr != NULL) { - pci_free_consistent(qdev->pdev, - qdev->small_buf_total_size, - qdev->small_buf_virt_addr, - qdev->small_buf_phy_addr); + dma_free_coherent(&qdev->pdev->dev, + qdev->small_buf_total_size, + qdev->small_buf_virt_addr, + qdev->small_buf_phy_addr); qdev->small_buf_virt_addr = NULL; } @@ -2719,10 +2707,10 @@ static void ql_free_large_buffers(struct ql3_adapter *qdev) lrg_buf_cb = &qdev->lrg_buf[i]; if (lrg_buf_cb->skb) { dev_kfree_skb(lrg_buf_cb->skb); - pci_unmap_single(qdev->pdev, + dma_unmap_single(&qdev->pdev->dev, dma_unmap_addr(lrg_buf_cb, mapaddr), dma_unmap_len(lrg_buf_cb, maplen), - PCI_DMA_FROMDEVICE); + DMA_FROM_DEVICE); memset(lrg_buf_cb, 0, sizeof(struct ql_rcv_buf_cb)); } else { break; @@ -2774,13 +2762,11 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev) * buffer */ skb_reserve(skb, QL_HEADER_SPACE); - map = pci_map_single(qdev->pdev, - skb->data, - qdev->lrg_buffer_len - - QL_HEADER_SPACE, - PCI_DMA_FROMDEVICE); + map = dma_map_single(&qdev->pdev->dev, skb->data, + qdev->lrg_buffer_len - QL_HEADER_SPACE, + DMA_FROM_DEVICE); - err = pci_dma_mapping_error(qdev->pdev, map); + err = dma_mapping_error(&qdev->pdev->dev, map); if (err) { netdev_err(qdev->ndev, "PCI mapping failed with error: %d\n", @@ -2865,8 +2851,8 @@ static int ql_alloc_mem_resources(struct ql3_adapter *qdev) * Network Completion Queue Producer Index Register */ qdev->shadow_reg_virt_addr = - pci_alloc_consistent(qdev->pdev, - PAGE_SIZE, &qdev->shadow_reg_phy_addr); + dma_alloc_coherent(&qdev->pdev->dev, PAGE_SIZE, + &qdev->shadow_reg_phy_addr, GFP_KERNEL); if (qdev->shadow_reg_virt_addr != NULL) { qdev->preq_consumer_index = qdev->shadow_reg_virt_addr; @@ -2921,10 +2907,9 @@ err_small_buffers: err_buffer_queues: ql_free_net_req_rsp_queues(qdev); err_req_rsp: - pci_free_consistent(qdev->pdev, - PAGE_SIZE, - qdev->shadow_reg_virt_addr, - qdev->shadow_reg_phy_addr); + dma_free_coherent(&qdev->pdev->dev, PAGE_SIZE, + qdev->shadow_reg_virt_addr, + qdev->shadow_reg_phy_addr); return -ENOMEM; } @@ -2937,10 +2922,9 @@ static void ql_free_mem_resources(struct ql3_adapter *qdev) ql_free_buffer_queues(qdev); ql_free_net_req_rsp_queues(qdev); if (qdev->shadow_reg_virt_addr != NULL) { - pci_free_consistent(qdev->pdev, - PAGE_SIZE, - qdev->shadow_reg_virt_addr, - qdev->shadow_reg_phy_addr); + dma_free_coherent(&qdev->pdev->dev, PAGE_SIZE, + qdev->shadow_reg_virt_addr, + qdev->shadow_reg_phy_addr); qdev->shadow_reg_virt_addr = NULL; } } @@ -3641,18 +3625,15 @@ static void ql_reset_work(struct work_struct *work) if (tx_cb->skb) { netdev_printk(KERN_DEBUG, ndev, "Freeing lost SKB\n"); - pci_unmap_single(qdev->pdev, - dma_unmap_addr(&tx_cb->map[0], - mapaddr), - dma_unmap_len(&tx_cb->map[0], maplen), - PCI_DMA_TODEVICE); + dma_unmap_single(&qdev->pdev->dev, + dma_unmap_addr(&tx_cb->map[0], mapaddr), + dma_unmap_len(&tx_cb->map[0], maplen), + DMA_TO_DEVICE); for (j = 1; j < tx_cb->seg_count; j++) { - pci_unmap_page(qdev->pdev, - dma_unmap_addr(&tx_cb->map[j], - mapaddr), - dma_unmap_len(&tx_cb->map[j], - maplen), - PCI_DMA_TODEVICE); + dma_unmap_page(&qdev->pdev->dev, + dma_unmap_addr(&tx_cb->map[j], mapaddr), + dma_unmap_len(&tx_cb->map[j], maplen), + DMA_TO_DEVICE); } dev_kfree_skb(tx_cb->skb); tx_cb->skb = NULL; @@ -3784,13 +3765,10 @@ static int ql3xxx_probe(struct pci_dev *pdev, pci_set_master(pdev); - if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { + if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) pci_using_dac = 1; - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - } else if (!(err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { + else if (!(err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))) pci_using_dac = 0; - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); - } if (err) { pr_err("%s no usable DMA configuration\n", pci_name(pdev)); -- GitLab From 719a402cf60311b1cdff3f6320abaecdcc5e46b7 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 17 Jan 2021 16:59:42 +0200 Subject: [PATCH 1039/2012] net: netdevice: Add operation ndo_sk_get_lower_dev ndo_sk_get_lower_dev returns the lower netdev that corresponds to a given socket. Additionally, we implement a helper netdev_sk_get_lowest_dev() to get the lowest one in chain. Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 4 ++++ net/core/dev.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5b949076ed231..02dcef4d66e2e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1398,6 +1398,8 @@ struct net_device_ops { struct net_device* (*ndo_get_xmit_slave)(struct net_device *dev, struct sk_buff *skb, bool all_slaves); + struct net_device* (*ndo_sk_get_lower_dev)(struct net_device *dev, + struct sock *sk); netdev_features_t (*ndo_fix_features)(struct net_device *dev, netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, @@ -2858,6 +2860,8 @@ int init_dummy_netdev(struct net_device *dev); struct net_device *netdev_get_xmit_slave(struct net_device *dev, struct sk_buff *skb, bool all_slaves); +struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev, + struct sock *sk); struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); diff --git a/net/core/dev.c b/net/core/dev.c index bae35c1ae1928..6b90520a01b19 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8105,6 +8105,39 @@ struct net_device *netdev_get_xmit_slave(struct net_device *dev, } EXPORT_SYMBOL(netdev_get_xmit_slave); +static struct net_device *netdev_sk_get_lower_dev(struct net_device *dev, + struct sock *sk) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_sk_get_lower_dev) + return NULL; + return ops->ndo_sk_get_lower_dev(dev, sk); +} + +/** + * netdev_sk_get_lowest_dev - Get the lowest device in chain given device and socket + * @dev: device + * @sk: the socket + * + * %NULL is returned if no lower device is found. + */ + +struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev, + struct sock *sk) +{ + struct net_device *lower; + + lower = netdev_sk_get_lower_dev(dev, sk); + while (lower) { + dev = lower; + lower = netdev_sk_get_lower_dev(dev, sk); + } + + return dev; +} +EXPORT_SYMBOL(netdev_sk_get_lowest_dev); + static void netdev_adjacent_add_links(struct net_device *dev) { struct netdev_adjacent *iter; -- GitLab From 5b99854540e35c2c6a226bcdb4bafbae1bccad5a Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 17 Jan 2021 16:59:43 +0200 Subject: [PATCH 1040/2012] net/bonding: Take IP hash logic into a helper Hash logic on L3 will be used in a downstream patch for one more use case. Take it to a function for a better code reuse. Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ad5192ee18459..759ad22b72790 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3541,6 +3541,16 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, return true; } +static u32 bond_ip_hash(u32 hash, struct flow_keys *flow) +{ + hash ^= (__force u32)flow_get_u32_dst(flow) ^ + (__force u32)flow_get_u32_src(flow); + hash ^= (hash >> 16); + hash ^= (hash >> 8); + /* discard lowest hash bit to deal with the common even ports pattern */ + return hash >> 1; +} + /** * bond_xmit_hash - generate a hash value based on the xmit policy * @bond: bonding device @@ -3571,12 +3581,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) else memcpy(&hash, &flow.ports.ports, sizeof(hash)); } - hash ^= (__force u32)flow_get_u32_dst(&flow) ^ - (__force u32)flow_get_u32_src(&flow); - hash ^= (hash >> 16); - hash ^= (hash >> 8); - return hash >> 1; + return bond_ip_hash(hash, &flow); } /*-------------------------- Device entry points ----------------------------*/ -- GitLab From 007feb87fb15933b5de7135e6bdf57c219b3fbec Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 17 Jan 2021 16:59:44 +0200 Subject: [PATCH 1041/2012] net/bonding: Implement ndo_sk_get_lower_dev Add ndo_sk_get_lower_dev() implementation for bond interfaces. Support only for the cases where the socket's and SKBs' hash yields identical value for the whole connection lifetime. Here we restrict it to L3+4 sockets only, with xmit_hash_policy==LAYER34 and bond modes xor/802.3ad. Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 93 +++++++++++++++++++++++++++++++++ include/net/bonding.h | 2 + 2 files changed, 95 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 759ad22b72790..09524f99c7534 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -301,6 +301,19 @@ netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, return dev_queue_xmit(skb); } +bool bond_sk_check(struct bonding *bond) +{ + switch (BOND_MODE(bond)) { + case BOND_MODE_8023AD: + case BOND_MODE_XOR: + if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) + return true; + fallthrough; + default: + return false; + } +} + /*---------------------------------- VLAN -----------------------------------*/ /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, @@ -4555,6 +4568,85 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev, return NULL; } +static void bond_sk_to_flow(struct sock *sk, struct flow_keys *flow) +{ + switch (sk->sk_family) { +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + if (sk->sk_ipv6only || + ipv6_addr_type(&sk->sk_v6_daddr) != IPV6_ADDR_MAPPED) { + flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + flow->addrs.v6addrs.src = inet6_sk(sk)->saddr; + flow->addrs.v6addrs.dst = sk->sk_v6_daddr; + break; + } + fallthrough; +#endif + default: /* AF_INET */ + flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + flow->addrs.v4addrs.src = inet_sk(sk)->inet_rcv_saddr; + flow->addrs.v4addrs.dst = inet_sk(sk)->inet_daddr; + break; + } + + flow->ports.src = inet_sk(sk)->inet_sport; + flow->ports.dst = inet_sk(sk)->inet_dport; +} + +/** + * bond_sk_hash_l34 - generate a hash value based on the socket's L3 and L4 fields + * @sk: socket to use for headers + * + * This function will extract the necessary field from the socket and use + * them to generate a hash based on the LAYER34 xmit_policy. + * Assumes that sk is a TCP or UDP socket. + */ +static u32 bond_sk_hash_l34(struct sock *sk) +{ + struct flow_keys flow; + u32 hash; + + bond_sk_to_flow(sk, &flow); + + /* L4 */ + memcpy(&hash, &flow.ports.ports, sizeof(hash)); + /* L3 */ + return bond_ip_hash(hash, &flow); +} + +static struct net_device *__bond_sk_get_lower_dev(struct bonding *bond, + struct sock *sk) +{ + struct bond_up_slave *slaves; + struct slave *slave; + unsigned int count; + u32 hash; + + slaves = rcu_dereference(bond->usable_slaves); + count = slaves ? READ_ONCE(slaves->count) : 0; + if (unlikely(!count)) + return NULL; + + hash = bond_sk_hash_l34(sk); + slave = slaves->arr[hash % count]; + + return slave->dev; +} + +static struct net_device *bond_sk_get_lower_dev(struct net_device *dev, + struct sock *sk) +{ + struct bonding *bond = netdev_priv(dev); + struct net_device *lower = NULL; + + rcu_read_lock(); + if (bond_sk_check(bond)) + lower = __bond_sk_get_lower_dev(bond, sk); + rcu_read_unlock(); + + return lower; +} + static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bonding *bond = netdev_priv(dev); @@ -4691,6 +4783,7 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_fix_features = bond_fix_features, .ndo_features_check = passthru_features_check, .ndo_get_xmit_slave = bond_xmit_get_slave, + .ndo_sk_get_lower_dev = bond_sk_get_lower_dev, }; static const struct device_type bond_type = { diff --git a/include/net/bonding.h b/include/net/bonding.h index adc3da7769700..21497193c4a47 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -265,6 +265,8 @@ struct bond_vlan_tag { unsigned short vlan_id; }; +bool bond_sk_check(struct bonding *bond); + /** * Returns NULL if the net_device does not belong to any of the bond's slaves * -- GitLab From f45583de361db2160fbca4a99c20a0c44b34f36a Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 17 Jan 2021 16:59:45 +0200 Subject: [PATCH 1042/2012] net/bonding: Take update_features call out of XFRM funciton In preparation for more cases that call netdev_update_features(). While here, move the features logic to the stage where struct bond is already updated, and pass it as the only parameter to function bond_set_xfrm_features(). Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_options.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index a4e4e15f574df..7f0ad97926dee 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -745,17 +745,17 @@ const struct bond_option *bond_opt_get(unsigned int option) return &bond_opts[option]; } -static void bond_set_xfrm_features(struct net_device *bond_dev, u64 mode) +static bool bond_set_xfrm_features(struct bonding *bond) { if (!IS_ENABLED(CONFIG_XFRM_OFFLOAD)) - return; + return false; - if (mode == BOND_MODE_ACTIVEBACKUP) - bond_dev->wanted_features |= BOND_XFRM_FEATURES; + if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) + bond->dev->wanted_features |= BOND_XFRM_FEATURES; else - bond_dev->wanted_features &= ~BOND_XFRM_FEATURES; + bond->dev->wanted_features &= ~BOND_XFRM_FEATURES; - netdev_update_features(bond_dev); + return true; } static int bond_option_mode_set(struct bonding *bond, @@ -780,13 +780,14 @@ static int bond_option_mode_set(struct bonding *bond, if (newval->value == BOND_MODE_ALB) bond->params.tlb_dynamic_lb = 1; - if (bond->dev->reg_state == NETREG_REGISTERED) - bond_set_xfrm_features(bond->dev, newval->value); - /* don't cache arp_validate between modes */ bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; bond->params.mode = newval->value; + if (bond->dev->reg_state == NETREG_REGISTERED) + if (bond_set_xfrm_features(bond)) + netdev_update_features(bond->dev); + return 0; } -- GitLab From 89df6a8104706f94800ed527ad73d07465ea4d12 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 17 Jan 2021 16:59:46 +0200 Subject: [PATCH 1043/2012] net/bonding: Implement TLS TX device offload Implement TLS TX device offload for bonding interfaces. This allows kTLS sockets running on a bond to benefit from the device offload on capable lower devices. To allow a simple and fast maintenance of the TLS context in SW and lower devices, we bind the TLS socket to a specific lower dev. To achieve a behavior similar to SW kTLS, we support only balance-xor and 802.3ad modes, with xmit_hash_policy=layer3+4. This is enforced in bond_sk_check(), done in a previous patch. For the above configuration, the SW implementation keeps picking the same exact lower dev for all the socket's SKBs. The device offload behaves similarly, making the decision once at the connection creation. Per socket, the TLS module should work directly with the lowest netdev in chain, to call the tls_dev_ops operations. As the bond interface is being bypassed by the TLS module, interacting directly against the lower devs, there is no way for the bond interface to disable its device offload capabilities, as long as the mode/policy config allows it. Hence, the feature flag is not directly controllable, but just reflects the current offload status based on the logic under bond_sk_check(). Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 29 +++++++++++++++++++++++++++++ drivers/net/bonding/bond_options.c | 27 +++++++++++++++++++++++++-- include/net/bonding.h | 2 ++ 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 09524f99c7534..539c6bc218df4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -83,6 +83,9 @@ #include #include #include +#if IS_ENABLED(CONFIG_TLS_DEVICE) +#include +#endif #include "bonding_priv.h" @@ -1225,6 +1228,13 @@ static netdev_features_t bond_fix_features(struct net_device *dev, netdev_features_t mask; struct slave *slave; +#if IS_ENABLED(CONFIG_TLS_DEVICE) + if (bond_sk_check(bond)) + features |= BOND_TLS_FEATURES; + else + features &= ~BOND_TLS_FEATURES; +#endif + mask = features; features &= ~NETIF_F_ONE_FOR_ALL; @@ -4647,6 +4657,16 @@ static struct net_device *bond_sk_get_lower_dev(struct net_device *dev, return lower; } +#if IS_ENABLED(CONFIG_TLS_DEVICE) +static netdev_tx_t bond_tls_device_xmit(struct bonding *bond, struct sk_buff *skb, + struct net_device *dev) +{ + if (likely(bond_get_slave_by_dev(bond, tls_get_ctx(skb->sk)->netdev))) + return bond_dev_queue_xmit(bond, skb, tls_get_ctx(skb->sk)->netdev); + return bond_tx_drop(dev, skb); +} +#endif + static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bonding *bond = netdev_priv(dev); @@ -4655,6 +4675,11 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev !bond_slave_override(bond, skb)) return NETDEV_TX_OK; +#if IS_ENABLED(CONFIG_TLS_DEVICE) + if (skb->sk && tls_is_sk_tx_device_offloaded(skb->sk)) + return bond_tls_device_xmit(bond, skb, dev); +#endif + switch (BOND_MODE(bond)) { case BOND_MODE_ROUNDROBIN: return bond_xmit_roundrobin(skb, dev); @@ -4855,6 +4880,10 @@ void bond_setup(struct net_device *bond_dev) if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) bond_dev->features |= BOND_XFRM_FEATURES; #endif /* CONFIG_XFRM_OFFLOAD */ +#if IS_ENABLED(CONFIG_TLS_DEVICE) + if (bond_sk_check(bond)) + bond_dev->features |= BOND_TLS_FEATURES; +#endif } /* Destroy a bonding device. diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 7f0ad97926dee..8fcbf7f9c7b2a 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -758,6 +758,19 @@ static bool bond_set_xfrm_features(struct bonding *bond) return true; } +static bool bond_set_tls_features(struct bonding *bond) +{ + if (!IS_ENABLED(CONFIG_TLS_DEVICE)) + return false; + + if (bond_sk_check(bond)) + bond->dev->wanted_features |= BOND_TLS_FEATURES; + else + bond->dev->wanted_features &= ~BOND_TLS_FEATURES; + + return true; +} + static int bond_option_mode_set(struct bonding *bond, const struct bond_opt_value *newval) { @@ -784,9 +797,15 @@ static int bond_option_mode_set(struct bonding *bond, bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; bond->params.mode = newval->value; - if (bond->dev->reg_state == NETREG_REGISTERED) - if (bond_set_xfrm_features(bond)) + if (bond->dev->reg_state == NETREG_REGISTERED) { + bool update = false; + + update |= bond_set_xfrm_features(bond); + update |= bond_set_tls_features(bond); + + if (update) netdev_update_features(bond->dev); + } return 0; } @@ -1220,6 +1239,10 @@ static int bond_option_xmit_hash_policy_set(struct bonding *bond, newval->string, newval->value); bond->params.xmit_policy = newval->value; + if (bond->dev->reg_state == NETREG_REGISTERED) + if (bond_set_tls_features(bond)) + netdev_update_features(bond->dev); + return 0; } diff --git a/include/net/bonding.h b/include/net/bonding.h index 21497193c4a47..97fbec02df2d7 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -89,6 +89,8 @@ #define BOND_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \ NETIF_F_GSO_ESP) +#define BOND_TLS_FEATURES (NETIF_F_HW_TLS_TX) + #ifdef CONFIG_NET_POLL_CONTROLLER extern atomic_t netpoll_block_tx; -- GitLab From dc5809f9e2b674a489723bd8d0131c97e565ca8d Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 17 Jan 2021 16:59:47 +0200 Subject: [PATCH 1044/2012] net/bonding: Declare TLS RX device offload support Following the description in previous patch (for TX): As the bond interface is being bypassed by the TLS module, interacting directly against the lower devs, there is no way for the bond interface to disable its device offload capabilities, as long as the mode/policy config allows it. Hence, the feature flag is not directly controllable, but just reflects the offload status based on the logic under bond_sk_check(). Here we just declare RX device offload support, and expose it via the NETIF_F_HW_TLS_RX flag. Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Signed-off-by: Jakub Kicinski --- include/net/bonding.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/bonding.h b/include/net/bonding.h index 97fbec02df2d7..019e998d944ad 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -89,7 +89,7 @@ #define BOND_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \ NETIF_F_GSO_ESP) -#define BOND_TLS_FEATURES (NETIF_F_HW_TLS_TX) +#define BOND_TLS_FEATURES (NETIF_F_HW_TLS_TX | NETIF_F_HW_TLS_RX) #ifdef CONFIG_NET_POLL_CONTROLLER extern atomic_t netpoll_block_tx; -- GitLab From 153cbd137f0ad9ee334fa805155b983e25a432e7 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 17 Jan 2021 16:59:48 +0200 Subject: [PATCH 1045/2012] net/tls: Device offload to use lowest netdevice in chain Do not call the tls_dev_ops of upper devices. Instead, ask them for the proper lowest device and communicate with it directly. Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Signed-off-by: Jakub Kicinski --- net/tls/tls_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index f7fb7d2c1de1f..75ceea0a41bf3 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -113,7 +113,7 @@ static struct net_device *get_netdev_for_sock(struct sock *sk) struct net_device *netdev = NULL; if (likely(dst)) { - netdev = dst->dev; + netdev = netdev_sk_get_lowest_dev(dst->dev, sk); dev_hold(netdev); } -- GitLab From 4e5a73329051e5b24fb1d715a5417ef3f95b08a6 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 17 Jan 2021 16:59:49 +0200 Subject: [PATCH 1046/2012] net/tls: Except bond interface from some TLS checks In the tls_dev_event handler, ignore tlsdev_ops requirement for bond interfaces, they do not exist as the interaction is done directly with the lower device. Also, make the validate function pass when it's called with the upper bond interface. Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Signed-off-by: Jakub Kicinski --- net/tls/tls_device.c | 2 ++ net/tls/tls_device_fallback.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 75ceea0a41bf3..d9cd229aa111b 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1329,6 +1329,8 @@ static int tls_dev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_REGISTER: case NETDEV_FEAT_CHANGE: + if (netif_is_bond_master(dev)) + return NOTIFY_DONE; if ((dev->features & NETIF_F_HW_TLS_RX) && !dev->tlsdev_ops->tls_dev_resync) return NOTIFY_BAD; diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index d946817ed0652..cacf040872c74 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -424,7 +424,7 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, struct sk_buff *skb) { - if (dev == tls_get_ctx(sk)->netdev) + if (dev == tls_get_ctx(sk)->netdev || netif_is_bond_master(dev)) return skb; return tls_sw_fallback(sk, skb); -- GitLab From 7cfabe4f85a52a06943236a7747f1d868f3cac4b Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sun, 17 Jan 2021 10:15:19 -0800 Subject: [PATCH 1047/2012] arcnet: fix macro name when DEBUG is defined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When DEBUG is defined this error occurs drivers/net/arcnet/com20020_cs.c:70:15: error: ‘com20020_REG_W_ADDR_HI’ undeclared (first use in this function); did you mean ‘COM20020_REG_W_ADDR_HI’? ioaddr, com20020_REG_W_ADDR_HI); ^~~~~~~~~~~~~~~~~~~~~~ From reviewing the context, the suggestion is what is meant. Signed-off-by: Tom Rix Acked-by: Joe Perches Link: https://lore.kernel.org/r/20210117181519.527625-1-trix@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/arcnet/com20020_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c index cf607ffcf358e..81223f6bebcc1 100644 --- a/drivers/net/arcnet/com20020_cs.c +++ b/drivers/net/arcnet/com20020_cs.c @@ -67,7 +67,7 @@ static void regdump(struct net_device *dev) /* set up the address register */ count = 0; arcnet_outb((count >> 8) | RDDATAflag | AUTOINCflag, - ioaddr, com20020_REG_W_ADDR_HI); + ioaddr, COM20020_REG_W_ADDR_HI); arcnet_outb(count & 0xff, ioaddr, COM20020_REG_W_ADDR_LO); for (count = 0; count < 256 + 32; count++) { -- GitLab From 99d518970c5a1901e83cdd4a0a6ff5a41ba56a56 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sun, 17 Jan 2021 11:10:44 -0800 Subject: [PATCH 1048/2012] net: hns: fix variable used when DEBUG is defined MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When DEBUG is defined this error occurs drivers/net/ethernet/hisilicon/hns/hns_enet.c:1505:36: error: ‘struct net_device’ has no member named ‘ae_handle’; did you mean ‘rx_handler’? assert(skb->queue_mapping < ndev->ae_handle->q_num); ^~~~~~~~~ ae_handle is an element of struct hns_nic_priv, so change ndev to priv. Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20210117191044.533725-1-trix@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 858cb293152a9..5d7824d2b4d47 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1502,7 +1502,7 @@ static netdev_tx_t hns_nic_net_xmit(struct sk_buff *skb, { struct hns_nic_priv *priv = netdev_priv(ndev); - assert(skb->queue_mapping < ndev->ae_handle->q_num); + assert(skb->queue_mapping < priv->ae_handle->q_num); return hns_nic_net_xmit_hw(ndev, skb, &tx_ring_data(priv, skb->queue_mapping)); -- GitLab From d502297008142645edf5c791af424ed321e5da84 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 19 Jan 2021 15:53:35 +1000 Subject: [PATCH 1049/2012] drm/nouveau/nvif: fix method count when pushing an array Reported-by: Lyude Paul Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvif/push.h | 216 ++++++++++---------- 1 file changed, 108 insertions(+), 108 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvif/push.h b/drivers/gpu/drm/nouveau/include/nvif/push.h index 168d7694ede5c..6d3a8a3d2087b 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/push.h +++ b/drivers/gpu/drm/nouveau/include/nvif/push.h @@ -123,131 +123,131 @@ PUSH_KICK(struct nvif_push *push) } while(0) #endif -#define PUSH_1(X,f,ds,n,c,o,p,s,mA,dA) do { \ - PUSH_##o##_HDR((p), s, mA, (c)+(n)); \ - PUSH_##f(X, (p), X##mA, 1, o, (dA), ds, ""); \ +#define PUSH_1(X,f,ds,n,o,p,s,mA,dA) do { \ + PUSH_##o##_HDR((p), s, mA, (ds)+(n)); \ + PUSH_##f(X, (p), X##mA, 1, o, (dA), ds, ""); \ } while(0) -#define PUSH_2(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (1?PUSH_##o##_INC), "mthd1"); \ - PUSH_1(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_2(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (1?PUSH_##o##_INC), "mthd1"); \ + PUSH_1(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_3(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd2"); \ - PUSH_2(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_3(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd2"); \ + PUSH_2(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_4(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd3"); \ - PUSH_3(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_4(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd3"); \ + PUSH_3(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_5(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd4"); \ - PUSH_4(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_5(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd4"); \ + PUSH_4(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_6(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd5"); \ - PUSH_5(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_6(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd5"); \ + PUSH_5(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_7(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd6"); \ - PUSH_6(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_7(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd6"); \ + PUSH_6(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_8(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd7"); \ - PUSH_7(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_8(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd7"); \ + PUSH_7(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_9(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd8"); \ - PUSH_8(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_9(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd8"); \ + PUSH_8(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_10(X,f,ds,n,c,o,p,s,mB,dB,mA,dA,a...) do { \ - PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd9"); \ - PUSH_9(X, DATA_, 1, ds, (c)+(n), o, (p), s, X##mA, (dA), ##a); \ - PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ +#define PUSH_10(X,f,ds,n,o,p,s,mB,dB,mA,dA,a...) do { \ + PUSH_ASSERT((mB) - (mA) == (0?PUSH_##o##_INC), "mthd9"); \ + PUSH_9(X, DATA_, 1, (ds) + (n), o, (p), s, X##mA, (dA), ##a); \ + PUSH_##f(X, (p), X##mB, 0, o, (dB), ds, ""); \ } while(0) -#define PUSH_1D(X,o,p,s,mA,dA) \ - PUSH_1(X, DATA_, 1, 1, 0, o, (p), s, X##mA, (dA)) -#define PUSH_2D(X,o,p,s,mA,dA,mB,dB) \ - PUSH_2(X, DATA_, 1, 1, 0, o, (p), s, X##mB, (dB), \ - X##mA, (dA)) -#define PUSH_3D(X,o,p,s,mA,dA,mB,dB,mC,dC) \ - PUSH_3(X, DATA_, 1, 1, 0, o, (p), s, X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) -#define PUSH_4D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD) \ - PUSH_4(X, DATA_, 1, 1, 0, o, (p), s, X##mD, (dD), \ - X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) -#define PUSH_5D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD,mE,dE) \ - PUSH_5(X, DATA_, 1, 1, 0, o, (p), s, X##mE, (dE), \ - X##mD, (dD), \ - X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) +#define PUSH_1D(X,o,p,s,mA,dA) \ + PUSH_1(X, DATA_, 1, 0, o, (p), s, X##mA, (dA)) +#define PUSH_2D(X,o,p,s,mA,dA,mB,dB) \ + PUSH_2(X, DATA_, 1, 0, o, (p), s, X##mB, (dB), \ + X##mA, (dA)) +#define PUSH_3D(X,o,p,s,mA,dA,mB,dB,mC,dC) \ + PUSH_3(X, DATA_, 1, 0, o, (p), s, X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) +#define PUSH_4D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD) \ + PUSH_4(X, DATA_, 1, 0, o, (p), s, X##mD, (dD), \ + X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) +#define PUSH_5D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD,mE,dE) \ + PUSH_5(X, DATA_, 1, 0, o, (p), s, X##mE, (dE), \ + X##mD, (dD), \ + X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) #define PUSH_6D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD,mE,dE,mF,dF) \ - PUSH_6(X, DATA_, 1, 1, 0, o, (p), s, X##mF, (dF), \ - X##mE, (dE), \ - X##mD, (dD), \ - X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) + PUSH_6(X, DATA_, 1, 0, o, (p), s, X##mF, (dF), \ + X##mE, (dE), \ + X##mD, (dD), \ + X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) #define PUSH_7D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD,mE,dE,mF,dF,mG,dG) \ - PUSH_7(X, DATA_, 1, 1, 0, o, (p), s, X##mG, (dG), \ - X##mF, (dF), \ - X##mE, (dE), \ - X##mD, (dD), \ - X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) + PUSH_7(X, DATA_, 1, 0, o, (p), s, X##mG, (dG), \ + X##mF, (dF), \ + X##mE, (dE), \ + X##mD, (dD), \ + X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) #define PUSH_8D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD,mE,dE,mF,dF,mG,dG,mH,dH) \ - PUSH_8(X, DATA_, 1, 1, 0, o, (p), s, X##mH, (dH), \ - X##mG, (dG), \ - X##mF, (dF), \ - X##mE, (dE), \ - X##mD, (dD), \ - X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) + PUSH_8(X, DATA_, 1, 0, o, (p), s, X##mH, (dH), \ + X##mG, (dG), \ + X##mF, (dF), \ + X##mE, (dE), \ + X##mD, (dD), \ + X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) #define PUSH_9D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD,mE,dE,mF,dF,mG,dG,mH,dH,mI,dI) \ - PUSH_9(X, DATA_, 1, 1, 0, o, (p), s, X##mI, (dI), \ - X##mH, (dH), \ - X##mG, (dG), \ - X##mF, (dF), \ - X##mE, (dE), \ - X##mD, (dD), \ - X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) + PUSH_9(X, DATA_, 1, 0, o, (p), s, X##mI, (dI), \ + X##mH, (dH), \ + X##mG, (dG), \ + X##mF, (dF), \ + X##mE, (dE), \ + X##mD, (dD), \ + X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) #define PUSH_10D(X,o,p,s,mA,dA,mB,dB,mC,dC,mD,dD,mE,dE,mF,dF,mG,dG,mH,dH,mI,dI,mJ,dJ) \ - PUSH_10(X, DATA_, 1, 1, 0, o, (p), s, X##mJ, (dJ), \ - X##mI, (dI), \ - X##mH, (dH), \ - X##mG, (dG), \ - X##mF, (dF), \ - X##mE, (dE), \ - X##mD, (dD), \ - X##mC, (dC), \ - X##mB, (dB), \ - X##mA, (dA)) + PUSH_10(X, DATA_, 1, 0, o, (p), s, X##mJ, (dJ), \ + X##mI, (dI), \ + X##mH, (dH), \ + X##mG, (dG), \ + X##mF, (dF), \ + X##mE, (dE), \ + X##mD, (dD), \ + X##mC, (dC), \ + X##mB, (dB), \ + X##mA, (dA)) -#define PUSH_1P(X,o,p,s,mA,dp,ds) \ - PUSH_1(X, DATAp, ds, ds, 0, o, (p), s, X##mA, (dp)) -#define PUSH_2P(X,o,p,s,mA,dA,mB,dp,ds) \ - PUSH_2(X, DATAp, ds, ds, 0, o, (p), s, X##mB, (dp), \ - X##mA, (dA)) -#define PUSH_3P(X,o,p,s,mA,dA,mB,dB,mC,dp,ds) \ - PUSH_3(X, DATAp, ds, ds, 0, o, (p), s, X##mC, (dp), \ - X##mB, (dB), \ - X##mA, (dA)) +#define PUSH_1P(X,o,p,s,mA,dp,ds) \ + PUSH_1(X, DATAp, ds, 0, o, (p), s, X##mA, (dp)) +#define PUSH_2P(X,o,p,s,mA,dA,mB,dp,ds) \ + PUSH_2(X, DATAp, ds, 0, o, (p), s, X##mB, (dp), \ + X##mA, (dA)) +#define PUSH_3P(X,o,p,s,mA,dA,mB,dB,mC,dp,ds) \ + PUSH_3(X, DATAp, ds, 0, o, (p), s, X##mC, (dp), \ + X##mB, (dB), \ + X##mA, (dA)) #define PUSH_(A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,IMPL,...) IMPL #define PUSH(A...) PUSH_(A, PUSH_10P, PUSH_10D, \ -- GitLab From 1c4995b0a576d24bb7ead991fb037c8b47ab6e32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 18 Jan 2021 17:43:55 +0200 Subject: [PATCH 1050/2012] drm/i915: Only enable DFP 4:4:4->4:2:0 conversion when outputting YCbCr 4:4:4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's not enable the 4:4:4->4:2:0 conversion bit in the DFP unless we're actually outputting YCbCr 4:4:4. It would appear some protocol converters blindy consult this bit even when the source is outputting RGB, resulting in a visual mess. Cc: stable@vger.kernel.org Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2914 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210111164111.13302-1-ville.syrjala@linux.intel.com Fixes: 181567aa9f0d ("drm/i915: Do YCbCr 444->420 conversion via DP protocol converters") Reviewed-by: Jani Nikula (cherry picked from commit 3170a21f7059c4660c469f59bf529f372a57da5f) Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210118154355.24453-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 2 +- drivers/gpu/drm/i915/display/intel_dp.c | 9 +++++---- drivers/gpu/drm/i915/display/intel_dp.h | 3 ++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 92940a0c5ef8f..d5ace48b1acee 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3725,7 +3725,7 @@ static void hsw_ddi_pre_enable_dp(struct intel_atomic_state *state, intel_ddi_init_dp_buf_reg(encoder, crtc_state); if (!is_mst) intel_dp_set_power(intel_dp, DP_SET_POWER_D0); - intel_dp_configure_protocol_converter(intel_dp); + intel_dp_configure_protocol_converter(intel_dp, crtc_state); intel_dp_sink_set_decompression_state(intel_dp, crtc_state, true); intel_dp_sink_set_fec_ready(intel_dp, crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 37f1a10fd0217..09123e8625c49 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4014,7 +4014,8 @@ static void intel_dp_enable_port(struct intel_dp *intel_dp, intel_de_posting_read(dev_priv, intel_dp->output_reg); } -void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp) +void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); u8 tmp; @@ -4033,8 +4034,8 @@ void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp) drm_dbg_kms(&i915->drm, "Failed to set protocol converter HDMI mode to %s\n", enableddisabled(intel_dp->has_hdmi_sink)); - tmp = intel_dp->dfp.ycbcr_444_to_420 ? - DP_CONVERSION_TO_YCBCR420_ENABLE : 0; + tmp = crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR444 && + intel_dp->dfp.ycbcr_444_to_420 ? DP_CONVERSION_TO_YCBCR420_ENABLE : 0; if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_PROTOCOL_CONVERTER_CONTROL_1, tmp) != 1) @@ -4088,7 +4089,7 @@ static void intel_enable_dp(struct intel_atomic_state *state, } intel_dp_set_power(intel_dp, DP_SET_POWER_D0); - intel_dp_configure_protocol_converter(intel_dp); + intel_dp_configure_protocol_converter(intel_dp, pipe_config); intel_dp_start_link_train(intel_dp, pipe_config); intel_dp_stop_link_train(intel_dp, pipe_config); diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index b871a09b69013..05f7ddf7a7952 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -51,7 +51,8 @@ int intel_dp_get_link_train_fallback_values(struct intel_dp *intel_dp, int intel_dp_retrain_link(struct intel_encoder *encoder, struct drm_modeset_acquire_ctx *ctx); void intel_dp_set_power(struct intel_dp *intel_dp, u8 mode); -void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp); +void intel_dp_configure_protocol_converter(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state); void intel_dp_sink_set_decompression_state(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, bool enable); -- GitLab From 0bab9cb2d980d7c075cffb9216155f7835237f98 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 14 Jan 2021 14:25:35 +0100 Subject: [PATCH 1051/2012] x86/entry: Remove put_ret_addr_in_rdi THUNK macro argument That logic is unused since 320100a5ffe5 ("x86/entry: Remove the TRACE_IRQS cruft") Remove it. Suggested-by: Peter Zijlstra (Intel) Acked-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/YAAszZJ2GcIYZmB5@hirez.programming.kicks-ass.net --- arch/x86/entry/thunk_64.S | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index c9a9fbf1655f3..496b11ec469de 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -10,7 +10,7 @@ #include /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ - .macro THUNK name, func, put_ret_addr_in_rdi=0 + .macro THUNK name, func SYM_FUNC_START_NOALIGN(\name) pushq %rbp movq %rsp, %rbp @@ -25,11 +25,6 @@ SYM_FUNC_START_NOALIGN(\name) pushq %r10 pushq %r11 - .if \put_ret_addr_in_rdi - /* 8(%rbp) is return addr on stack */ - movq 8(%rbp), %rdi - .endif - call \func jmp __thunk_restore SYM_FUNC_END(\name) -- GitLab From f0e386ee0c0b71ea6f7238506a4d0965a2dbef11 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Thu, 14 Jan 2021 18:10:12 +0106 Subject: [PATCH 1052/2012] printk: fix buffer overflow potential for print_text() Before the commit 896fbe20b4e2333fb55 ("printk: use the lockless ringbuffer"), msg_print_text() would only write up to size-1 bytes into the provided buffer. Some callers expect this behavior and append a terminator to returned string. In particular: arch/powerpc/xmon/xmon.c:dump_log_buf() arch/um/kernel/kmsg_dump.c:kmsg_dumper_stdout() msg_print_text() has been replaced by record_print_text(), which currently fills the full size of the buffer. This causes a buffer overflow for the above callers. Change record_print_text() so that it will only use size-1 bytes for text data. Also, for paranoia sakes, add a terminator after the text data. And finally, document this behavior so that it is clear that only size-1 bytes are used and a terminator is added. Fixes: 896fbe20b4e2333fb55 ("printk: use the lockless ringbuffer") Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: John Ogness Reviewed-by: Petr Mladek Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210114170412.4819-1-john.ogness@linutronix.de --- kernel/printk/printk.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 96e24074c9626..17fa6dc77053b 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1292,11 +1292,16 @@ static size_t info_print_prefix(const struct printk_info *info, bool syslog, * done: * * - Add prefix for each line. + * - Drop truncated lines that no longer fit into the buffer. * - Add the trailing newline that has been removed in vprintk_store(). - * - Drop truncated lines that do not longer fit into the buffer. + * - Add a string terminator. + * + * Since the produced string is always terminated, the maximum possible + * return value is @r->text_buf_size - 1; * * Return: The length of the updated/prepared text, including the added - * prefixes and the newline. The dropped line(s) are not counted. + * prefixes and the newline. The terminator is not counted. The dropped + * line(s) are not counted. */ static size_t record_print_text(struct printk_record *r, bool syslog, bool time) @@ -1339,26 +1344,31 @@ static size_t record_print_text(struct printk_record *r, bool syslog, /* * Truncate the text if there is not enough space to add the - * prefix and a trailing newline. + * prefix and a trailing newline and a terminator. */ - if (len + prefix_len + text_len + 1 > buf_size) { + if (len + prefix_len + text_len + 1 + 1 > buf_size) { /* Drop even the current line if no space. */ - if (len + prefix_len + line_len + 1 > buf_size) + if (len + prefix_len + line_len + 1 + 1 > buf_size) break; - text_len = buf_size - len - prefix_len - 1; + text_len = buf_size - len - prefix_len - 1 - 1; truncated = true; } memmove(text + prefix_len, text, text_len); memcpy(text, prefix, prefix_len); + /* + * Increment the prepared length to include the text and + * prefix that were just moved+copied. Also increment for the + * newline at the end of this line. If this is the last line, + * there is no newline, but it will be added immediately below. + */ len += prefix_len + line_len + 1; - if (text_len == line_len) { /* - * Add the trailing newline removed in - * vprintk_store(). + * This is the last line. Add the trailing newline + * removed in vprintk_store(). */ text[prefix_len + line_len] = '\n'; break; @@ -1383,6 +1393,14 @@ static size_t record_print_text(struct printk_record *r, bool syslog, text_len -= line_len + 1; } + /* + * If a buffer was provided, it will be terminated. Space for the + * string terminator is guaranteed to be available. The terminator is + * not counted in the return value. + */ + if (buf_size > 0) + text[len] = 0; + return len; } -- GitLab From ef38237444ce952daf041ed2885918f9f7d1e997 Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Mon, 18 Jan 2021 12:05:08 +0300 Subject: [PATCH 1053/2012] gpiolib: add a warning on gpiochip->to_irq defined gpiochip->to_irq method is redefined in gpiochip_add_irqchip. A lot of gpiod driver's still define ->to_irq method, let's give a gentle warning that they can no longer rely on it, so they can remove it on ocassion. Fixes: e0d8972898139 ("gpio: Implement tighter IRQ chip integration") Signed-off-by: Nikita Shubin Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index b02cc2abd3b68..b78a634cca240 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1489,6 +1489,9 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc, type = IRQ_TYPE_NONE; } + if (gc->to_irq) + chip_warn(gc, "to_irq is redefined in %s and you shouldn't rely on it\n", __func__); + gc->to_irq = gpiochip_to_irq; gc->irq.default_type = type; gc->irq.lock_key = lock_key; -- GitLab From e73b0101ae5124bf7cd3fb5d250302ad2f16a416 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sun, 17 Jan 2021 15:17:02 +0200 Subject: [PATCH 1054/2012] gpio: mvebu: fix pwm .get_state period calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The period is the sum of on and off values. That is, calculate period as ($on + $off) / clkrate instead of $off / clkrate - $on / clkrate that makes no sense. Reported-by: Russell King Reviewed-by: Uwe Kleine-König Fixes: 757642f9a584e ("gpio: mvebu: Add limited PWM support") Signed-off-by: Baruch Siach Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mvebu.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index 672681a976f50..a912a8fed197a 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -676,20 +676,17 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip, else state->duty_cycle = 1; + val = (unsigned long long) u; /* on duration */ regmap_read(mvpwm->regs, mvebu_pwmreg_blink_off_duration(mvpwm), &u); - val = (unsigned long long) u * NSEC_PER_SEC; + val += (unsigned long long) u; /* period = on + off duration */ + val *= NSEC_PER_SEC; do_div(val, mvpwm->clk_rate); - if (val < state->duty_cycle) { + if (val > UINT_MAX) + state->period = UINT_MAX; + else if (val) + state->period = val; + else state->period = 1; - } else { - val -= state->duty_cycle; - if (val > UINT_MAX) - state->period = UINT_MAX; - else if (val) - state->period = val; - else - state->period = 1; - } regmap_read(mvchip->regs, GPIO_BLINK_EN_OFF + mvchip->offset, &u); if (u) -- GitLab From 18eedf2b5ec7c8ce2bb23d9148cfd63949207414 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 18 Jan 2021 19:18:13 -0800 Subject: [PATCH 1055/2012] gpio: sifive: select IRQ_DOMAIN_HIERARCHY rather than depend on it This is the only driver in the kernel source tree that depends on IRQ_DOMAIN_HIERARCHY instead of selecting it. Since it is not a visible Kconfig symbol, depending on it (expecting a user to set/enable it) doesn't make much sense, so change it to select instead of "depends on". Fixes: 96868dce644d ("gpio/sifive: Add GPIO driver for SiFive SoCs") Signed-off-by: Randy Dunlap Cc: Linus Walleij Cc: Bartosz Golaszewski Cc: linux-gpio@vger.kernel.org Cc: Thierry Reding Cc: Greentime Hu Cc: Yash Shah Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index c70f46e80a3b7..64291ce454c3f 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -521,7 +521,8 @@ config GPIO_SAMA5D2_PIOBU config GPIO_SIFIVE bool "SiFive GPIO support" - depends on OF_GPIO && IRQ_DOMAIN_HIERARCHY + depends on OF_GPIO + select IRQ_DOMAIN_HIERARCHY select GPIO_GENERIC select GPIOLIB_IRQCHIP select REGMAP_MMIO -- GitLab From 298d75c9b18875d2d582dcd5145a45cac8d2bae2 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 18 Jan 2021 20:49:25 +0100 Subject: [PATCH 1056/2012] gpio: tegra: Add missing dependencies Commit efcdca286eef ("gpio: tegra: Convert to gpio_irq_chip") moved the Tegra GPIO driver to the generic GPIO IRQ chip infrastructure and made the IRQ domain hierarchical, so the driver needs to pull in the support infrastructure via the GPIOLIB_IRQCHIP and IRQ_DOMAIN_HIERARCHY Kconfig options. Fixes: efcdca286eef ("gpio: tegra: Convert to gpio_irq_chip") Reported-by: kernel test robot Signed-off-by: Thierry Reding Signed-off-by: Bartosz Golaszewski --- drivers/gpio/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 64291ce454c3f..dea65d85594fc 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -598,6 +598,8 @@ config GPIO_TEGRA default ARCH_TEGRA depends on ARCH_TEGRA || COMPILE_TEST depends on OF_GPIO + select GPIOLIB_IRQCHIP + select IRQ_DOMAIN_HIERARCHY help Say yes here to support GPIO pins on NVIDIA Tegra SoCs. -- GitLab From ef02684c4e67d8c35ac83083564135bc7b1d3445 Mon Sep 17 00:00:00 2001 From: Patrik Jakobsson Date: Mon, 18 Jan 2021 21:36:15 +0100 Subject: [PATCH 1057/2012] usb: bdc: Make bdc pci driver depend on BROKEN The bdc pci driver is going to be removed due to it not existing in the wild. This patch turns off compilation of the driver so that stable kernels can also pick up the change. This helps the out-of-tree facetimehd webcam driver as the pci id conflicts with bdc. Cc: Al Cooper Cc: Acked-by: Felipe Balbi Signed-off-by: Patrik Jakobsson Link: https://lore.kernel.org/r/20210118203615.13995-1-patrik.r.jakobsson@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/bdc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/bdc/Kconfig b/drivers/usb/gadget/udc/bdc/Kconfig index 3e88c7670b2ed..fb01ff47b64cf 100644 --- a/drivers/usb/gadget/udc/bdc/Kconfig +++ b/drivers/usb/gadget/udc/bdc/Kconfig @@ -17,7 +17,7 @@ if USB_BDC_UDC comment "Platform Support" config USB_BDC_PCI tristate "BDC support for PCIe based platforms" - depends on USB_PCI + depends on USB_PCI && BROKEN default USB_BDC_UDC help Enable support for platforms which have BDC connected through PCIe, such as Lego3 FPGA platform. -- GitLab From 43b67309b6b2a3c08396cc9b3f83f21aa529d273 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Tue, 19 Jan 2021 04:11:27 -0800 Subject: [PATCH 1058/2012] drm/atomic: put state on error path Put the state before returning error code. Fixes: 44596b8c4750 ("drm/atomic: Unify conflicting encoder handling.") Signed-off-by: Pan Bian Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210119121127.84127-1-bianpan2016@163.com --- drivers/gpu/drm/drm_atomic_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index ba1507036f265..4a8cbec832bc4 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -3021,7 +3021,7 @@ int drm_atomic_helper_set_config(struct drm_mode_set *set, ret = handle_conflicting_encoders(state, true); if (ret) - return ret; + goto fail; ret = drm_atomic_commit(state); -- GitLab From abbc4d6ecd07fa246fd597b5d8fced28f0bcc606 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 18 Jan 2021 15:46:39 +0100 Subject: [PATCH 1059/2012] drm/vram-helper: Reuse existing page mappings in vmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For performance, BO page mappings can stay in place even if the map counter has returned to 0. In these cases, the existing page mapping has to be reused by the next vmap operation. Otherwise a new mapping would be installed and the old mapping's pages leak. Fix the issue by reusing existing page mappings for vmap operations. Signed-off-by: Thomas Zimmermann Fixes: 1086db71a1db ("drm/vram-helper: Remove invariant parameters from internal kmap function") Acked-by: Christian König Tested-by: Eli Cohen Reported-by: Eli Cohen Reported-by: kernel test robot Cc: Daniel Vetter Cc: Christian König Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Link: https://patchwork.freedesktop.org/patch/msgid/20210118144639.27307-1-tzimmermann@suse.de --- drivers/gpu/drm/drm_gem_vram_helper.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c index 02ca22e90290d..0b232a73c1b74 100644 --- a/drivers/gpu/drm/drm_gem_vram_helper.c +++ b/drivers/gpu/drm/drm_gem_vram_helper.c @@ -387,9 +387,16 @@ static int drm_gem_vram_kmap_locked(struct drm_gem_vram_object *gbo, if (gbo->vmap_use_count > 0) goto out; - ret = ttm_bo_vmap(&gbo->bo, &gbo->map); - if (ret) - return ret; + /* + * VRAM helpers unmap the BO only on demand. So the previous + * page mapping might still be around. Only vmap if the there's + * no mapping present. + */ + if (dma_buf_map_is_null(&gbo->map)) { + ret = ttm_bo_vmap(&gbo->bo, &gbo->map); + if (ret) + return ret; + } out: ++gbo->vmap_use_count; @@ -577,6 +584,7 @@ static void drm_gem_vram_bo_driver_move_notify(struct drm_gem_vram_object *gbo, return; ttm_bo_vunmap(bo, &gbo->map); + dma_buf_map_clear(&gbo->map); /* explicitly clear mapping for next vmap call */ } static int drm_gem_vram_bo_driver_move(struct drm_gem_vram_object *gbo, -- GitLab From 2b73649cee65b8e33c75c66348cb1bfe0ff9d766 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 19 Jan 2021 23:21:43 +0800 Subject: [PATCH 1060/2012] ALSA: hda: Balance runtime/system PM if direct-complete is disabled After hibernation, HDA controller can't be runtime-suspended after commit 215a22ed31a1 ("ALSA: hda: Refactor codjc PM to use direct-complete optimization"), which enables direct-complete for HDA codec. The HDA codec driver didn't expect direct-complete will be disabled after it returns a positive value from prepare() callback. However, there are some places that PM core can disable direct-complete. For instance, system hibernation or when codec has subordinates like LEDs. So if the codec is prepared for direct-complete but PM core still calls codec's suspend or freeze callback, partially revert the commit and take the original approach, which uses pm_runtime_force_*() helpers to ensure PM refcount are balanced. Meanwhile, still keep prepare() and complete() callbacks to enable direct-complete and request a resume for jack detection, respectively. Reported-by: Kenneth R. Crudup Fixes: 215a22ed31a1 ("ALSA: hda: Refactor codec PM to use direct-complete optimization") Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20210119152145.346558-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 687216e745267..eec1775dfffe9 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2934,7 +2934,7 @@ static void hda_call_codec_resume(struct hda_codec *codec) snd_hdac_leave_pm(&codec->core); } -static int hda_codec_suspend(struct device *dev) +static int hda_codec_runtime_suspend(struct device *dev) { struct hda_codec *codec = dev_to_hda_codec(dev); unsigned int state; @@ -2953,7 +2953,7 @@ static int hda_codec_suspend(struct device *dev) return 0; } -static int hda_codec_resume(struct device *dev) +static int hda_codec_runtime_resume(struct device *dev) { struct hda_codec *codec = dev_to_hda_codec(dev); @@ -2968,16 +2968,6 @@ static int hda_codec_resume(struct device *dev) return 0; } -static int hda_codec_runtime_suspend(struct device *dev) -{ - return hda_codec_suspend(dev); -} - -static int hda_codec_runtime_resume(struct device *dev) -{ - return hda_codec_resume(dev); -} - #endif /* CONFIG_PM */ #ifdef CONFIG_PM_SLEEP @@ -2998,31 +2988,31 @@ static void hda_codec_pm_complete(struct device *dev) static int hda_codec_pm_suspend(struct device *dev) { dev->power.power_state = PMSG_SUSPEND; - return hda_codec_suspend(dev); + return pm_runtime_force_suspend(dev); } static int hda_codec_pm_resume(struct device *dev) { dev->power.power_state = PMSG_RESUME; - return hda_codec_resume(dev); + return pm_runtime_force_resume(dev); } static int hda_codec_pm_freeze(struct device *dev) { dev->power.power_state = PMSG_FREEZE; - return hda_codec_suspend(dev); + return pm_runtime_force_suspend(dev); } static int hda_codec_pm_thaw(struct device *dev) { dev->power.power_state = PMSG_THAW; - return hda_codec_resume(dev); + return pm_runtime_force_resume(dev); } static int hda_codec_pm_restore(struct device *dev) { dev->power.power_state = PMSG_RESTORE; - return hda_codec_resume(dev); + return pm_runtime_force_resume(dev); } #endif /* CONFIG_PM_SLEEP */ -- GitLab From 9c7d9017a49fb8516c13b7bff59b7da2abed23e1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 8 Jan 2021 19:05:59 +0100 Subject: [PATCH 1061/2012] x86: PM: Register syscore_ops for scale invariance On x86 scale invariace tends to be disabled during resume from suspend-to-RAM, because the MPERF or APERF MSR values are not as expected then due to updates taking place after the platform firmware has been invoked to complete the suspend transition. That, of course, is not desirable, especially if the schedutil scaling governor is in use, because the lack of scale invariance causes it to be less reliable. To counter that effect, modify init_freq_invariance() to register a syscore_ops object for scale invariance with the ->resume callback pointing to init_counter_refs() which will run on the CPU starting the resume transition (the other CPUs will be taken care of the "online" operations taking place later). Fixes: e2b0d619b400 ("x86, sched: check for counters overflow in frequency invariant accounting") Signed-off-by: Rafael J. Wysocki Signed-off-by: Peter Zijlstra (Intel) Acked-by: Giovanni Gherdovich Link: https://lkml.kernel.org/r/1803209.Mvru99baaF@kreacher --- arch/x86/kernel/smpboot.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8ca66af96a547..117e24fbfd8a0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -2083,6 +2084,23 @@ static void init_counter_refs(void) this_cpu_write(arch_prev_mperf, mperf); } +#ifdef CONFIG_PM_SLEEP +static struct syscore_ops freq_invariance_syscore_ops = { + .resume = init_counter_refs, +}; + +static void register_freq_invariance_syscore_ops(void) +{ + /* Bail out if registered already. */ + if (freq_invariance_syscore_ops.node.prev) + return; + + register_syscore_ops(&freq_invariance_syscore_ops); +} +#else +static inline void register_freq_invariance_syscore_ops(void) {} +#endif + static void init_freq_invariance(bool secondary, bool cppc_ready) { bool ret = false; @@ -2109,6 +2127,7 @@ static void init_freq_invariance(bool secondary, bool cppc_ready) if (ret) { init_counter_refs(); static_branch_enable(&arch_scale_freq_key); + register_freq_invariance_syscore_ops(); pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); } else { pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n"); -- GitLab From dc9c9e72ff3ba01ae63e6263ac26234ba1869cd7 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Fri, 15 Jan 2021 12:46:20 +0800 Subject: [PATCH 1062/2012] vhost_net: avoid tx queue stuck when sendmsg fails Currently the driver doesn't drop a packet which can't be sent by tun (e.g bad packet). In this case, the driver will always process the same packet lead to the tx queue stuck. To fix this issue: 1. in the case of persistent failure (e.g bad packet), the driver can skip this descriptor by ignoring the error. 2. in the case of transient failure (e.g -ENOBUFS, -EAGAIN and -ENOMEM), the driver schedules the worker to try again. Signed-off-by: Yunjian Wang Acked-by: Jason Wang Acked-by: Willem de Bruijn Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/1610685980-38608-1-git-send-email-wangyunjian@huawei.com Signed-off-by: Jakub Kicinski --- drivers/vhost/net.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 3b744031ec8f2..df82b124170ec 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -828,14 +828,15 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock) msg.msg_flags &= ~MSG_MORE; } - /* TODO: Check specific error and bomb out unless ENOBUFS? */ err = sock->ops->sendmsg(sock, &msg, len); if (unlikely(err < 0)) { - vhost_discard_vq_desc(vq, 1); - vhost_net_enable_vq(net, vq); - break; - } - if (err != len) + if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) { + vhost_discard_vq_desc(vq, 1); + vhost_net_enable_vq(net, vq); + break; + } + pr_debug("Fail to send packet: err %d", err); + } else if (unlikely(err != len)) pr_debug("Truncated TX packet: len %d != %zd\n", err, len); done: @@ -924,7 +925,6 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) msg.msg_flags &= ~MSG_MORE; } - /* TODO: Check specific error and bomb out unless ENOBUFS? */ err = sock->ops->sendmsg(sock, &msg, len); if (unlikely(err < 0)) { if (zcopy_used) { @@ -933,11 +933,13 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) % UIO_MAXIOV; } - vhost_discard_vq_desc(vq, 1); - vhost_net_enable_vq(net, vq); - break; - } - if (err != len) + if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) { + vhost_discard_vq_desc(vq, 1); + vhost_net_enable_vq(net, vq); + break; + } + pr_debug("Fail to send packet: err %d", err); + } else if (unlikely(err != len)) pr_debug("Truncated TX packet: " " len %d != %zd\n", err, len); if (!zcopy_used) -- GitLab From 7e238de8283acd32c26c2bc2a50672d0ea862ff7 Mon Sep 17 00:00:00 2001 From: Oleksandr Mazur Date: Tue, 19 Jan 2021 10:53:33 +0200 Subject: [PATCH 1063/2012] net: core: devlink: use right genl user_ptr when handling port param get/set Fix incorrect user_ptr dereferencing when handling port param get/set: idx [0] stores the 'struct devlink' pointer; idx [1] stores the 'struct devlink_port' pointer; Fixes: 637989b5d77e ("devlink: Always use user_ptr[0] for devlink and simplify post_doit") CC: Parav Pandit Signed-off-by: Oleksandr Mazur Signed-off-by: Vadym Kochan Link: https://lore.kernel.org/r/20210119085333.16833-1-vadym.kochan@plvision.eu Signed-off-by: Jakub Kicinski --- net/core/devlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index ee828e4b1007e..738d4344d6799 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4146,7 +4146,7 @@ out: static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink_param_item *param_item; struct sk_buff *msg; int err; @@ -4175,7 +4175,7 @@ static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb, static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb, struct genl_info *info) { - struct devlink_port *devlink_port = info->user_ptr[0]; + struct devlink_port *devlink_port = info->user_ptr[1]; return __devlink_nl_cmd_param_set_doit(devlink_port->devlink, devlink_port->index, -- GitLab From 7eab14de73a8028f770e703962c5437a2b0dda82 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sat, 16 Jan 2021 16:13:22 +0000 Subject: [PATCH 1064/2012] mdio, phy: fix -Wshadow warnings triggered by nested container_of() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit container_of() macro hides a local variable '__mptr' inside. This becomes a problem when several container_of() are nested in each other within single line or plain macros. As C preprocessor doesn't support generating random variable names, the sole solution is to avoid defining macros that consist only of container_of() calls, or they will self-shadow '__mptr' each time: In file included from ./include/linux/bitmap.h:10, from drivers/net/phy/phy_device.c:12: drivers/net/phy/phy_device.c: In function ‘phy_device_release’: ./include/linux/kernel.h:693:8: warning: declaration of ‘__mptr’ shadows a previous local [-Wshadow] 693 | void *__mptr = (void *)(ptr); \ | ^~~~~~ ./include/linux/phy.h:647:26: note: in expansion of macro ‘container_of’ 647 | #define to_phy_device(d) container_of(to_mdio_device(d), \ | ^~~~~~~~~~~~ ./include/linux/mdio.h:52:27: note: in expansion of macro ‘container_of’ 52 | #define to_mdio_device(d) container_of(d, struct mdio_device, dev) | ^~~~~~~~~~~~ ./include/linux/phy.h:647:39: note: in expansion of macro ‘to_mdio_device’ 647 | #define to_phy_device(d) container_of(to_mdio_device(d), \ | ^~~~~~~~~~~~~~ drivers/net/phy/phy_device.c:217:8: note: in expansion of macro ‘to_phy_device’ 217 | kfree(to_phy_device(dev)); | ^~~~~~~~~~~~~ ./include/linux/kernel.h:693:8: note: shadowed declaration is here 693 | void *__mptr = (void *)(ptr); \ | ^~~~~~ ./include/linux/phy.h:647:26: note: in expansion of macro ‘container_of’ 647 | #define to_phy_device(d) container_of(to_mdio_device(d), \ | ^~~~~~~~~~~~ drivers/net/phy/phy_device.c:217:8: note: in expansion of macro ‘to_phy_device’ 217 | kfree(to_phy_device(dev)); | ^~~~~~~~~~~~~ As they are declared in header files, these warnings are highly repetitive and very annoying (along with the one from linux/pci.h). Convert the related macros from linux/{mdio,phy}.h to static inlines to avoid self-shadowing and potentially improve bug-catching. No functional changes implied. Signed-off-by: Alexander Lobakin Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210116161246.67075-1-alobakin@pm.me Signed-off-by: Jakub Kicinski --- include/linux/mdio.h | 23 ++++++++++++++++++----- include/linux/phy.h | 7 +++++-- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/include/linux/mdio.h b/include/linux/mdio.h index dbd69b3d170b4..ffb787d5ebde3 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -49,7 +49,11 @@ struct mdio_device { unsigned int reset_assert_delay; unsigned int reset_deassert_delay; }; -#define to_mdio_device(d) container_of(d, struct mdio_device, dev) + +static inline struct mdio_device *to_mdio_device(const struct device *dev) +{ + return container_of(dev, struct mdio_device, dev); +} /* struct mdio_driver_common: Common to all MDIO drivers */ struct mdio_driver_common { @@ -57,8 +61,12 @@ struct mdio_driver_common { int flags; }; #define MDIO_DEVICE_FLAG_PHY 1 -#define to_mdio_common_driver(d) \ - container_of(d, struct mdio_driver_common, driver) + +static inline struct mdio_driver_common * +to_mdio_common_driver(const struct device_driver *driver) +{ + return container_of(driver, struct mdio_driver_common, driver); +} /* struct mdio_driver: Generic MDIO driver */ struct mdio_driver { @@ -73,8 +81,13 @@ struct mdio_driver { /* Clears up any memory if needed */ void (*remove)(struct mdio_device *mdiodev); }; -#define to_mdio_driver(d) \ - container_of(to_mdio_common_driver(d), struct mdio_driver, mdiodrv) + +static inline struct mdio_driver * +to_mdio_driver(const struct device_driver *driver) +{ + return container_of(to_mdio_common_driver(driver), struct mdio_driver, + mdiodrv); +} /* device driver data */ static inline void mdiodev_set_drvdata(struct mdio_device *mdio, void *data) diff --git a/include/linux/phy.h b/include/linux/phy.h index 24fcc6456a9e9..bc323fbdd21ea 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -648,8 +648,11 @@ struct phy_device { const struct macsec_ops *macsec_ops; #endif }; -#define to_phy_device(d) container_of(to_mdio_device(d), \ - struct phy_device, mdio) + +static inline struct phy_device *to_phy_device(const struct device *dev) +{ + return container_of(to_mdio_device(dev), struct phy_device, mdio); +} /** * struct phy_tdr_config - Configuration of a TDR raw test -- GitLab From 8eed01b5ca9c1deff329ad44f08e2041ca14842c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 18 Jan 2021 16:06:55 +0100 Subject: [PATCH 1065/2012] mdio-bitbang: Export mdiobb_{read,write}() Export mdiobb_read() and mdiobb_write(), so Ethernet controller drivers can call them from their MDIO read/write wrappers. Signed-off-by: Geert Uytterhoeven Tested-by: Wolfram Sang Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-bitbang.c | 6 ++++-- include/linux/mdio-bitbang.h | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/mdio/mdio-bitbang.c b/drivers/net/mdio/mdio-bitbang.c index 5136275c8e739..d3915f8318543 100644 --- a/drivers/net/mdio/mdio-bitbang.c +++ b/drivers/net/mdio/mdio-bitbang.c @@ -149,7 +149,7 @@ static int mdiobb_cmd_addr(struct mdiobb_ctrl *ctrl, int phy, u32 addr) return dev_addr; } -static int mdiobb_read(struct mii_bus *bus, int phy, int reg) +int mdiobb_read(struct mii_bus *bus, int phy, int reg) { struct mdiobb_ctrl *ctrl = bus->priv; int ret, i; @@ -180,8 +180,9 @@ static int mdiobb_read(struct mii_bus *bus, int phy, int reg) mdiobb_get_bit(ctrl); return ret; } +EXPORT_SYMBOL(mdiobb_read); -static int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val) +int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val) { struct mdiobb_ctrl *ctrl = bus->priv; @@ -201,6 +202,7 @@ static int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val) mdiobb_get_bit(ctrl); return 0; } +EXPORT_SYMBOL(mdiobb_write); struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl) { diff --git a/include/linux/mdio-bitbang.h b/include/linux/mdio-bitbang.h index 5d71e8a8500f5..aca4dc037b70b 100644 --- a/include/linux/mdio-bitbang.h +++ b/include/linux/mdio-bitbang.h @@ -35,6 +35,9 @@ struct mdiobb_ctrl { const struct mdiobb_ops *ops; }; +int mdiobb_read(struct mii_bus *bus, int phy, int reg); +int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val); + /* The returned bus is not yet registered with the phy layer. */ struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl); -- GitLab From 02cae02a7de1484095e4ba984bfee7a75843ec26 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 18 Jan 2021 16:06:56 +0100 Subject: [PATCH 1066/2012] sh_eth: Make PHY access aware of Runtime PM to fix reboot crash Wolfram reports that his R-Car H2-based Lager board can no longer be rebooted in v5.11-rc1, as it crashes with an imprecise external abort. The issue can be reproduced on other boards (e.g. Koelsch with R-Car M2-W) too, if CONFIG_IP_PNP is disabled, and the Ethernet interface is down at reboot time: Unhandled fault: imprecise external abort (0x1406) at 0x00000000 pgd = (ptrval) [00000000] *pgd=422b6835, *pte=00000000, *ppte=00000000 Internal error: : 1406 [#1] ARM Modules linked in: CPU: 0 PID: 1105 Comm: init Tainted: G W 5.10.0-rc1-00402-ge2f016cf7751 #1048 Hardware name: Generic R-Car Gen2 (Flattened Device Tree) PC is at sh_mdio_ctrl+0x44/0x60 LR is at sh_mmd_ctrl+0x20/0x24 ... Backtrace: [] (sh_mdio_ctrl) from [] (sh_mmd_ctrl+0x20/0x24) r7:0000001f r6:00000020 r5:00000002 r4:c22a1dc4 [] (sh_mmd_ctrl) from [] (mdiobb_cmd+0x38/0xa8) [] (mdiobb_cmd) from [] (mdiobb_read+0x58/0xdc) r9:c229f844 r8:c0c329dc r7:c221e000 r6:00000001 r5:c22a1dc4 r4:00000001 [] (mdiobb_read) from [] (__mdiobus_read+0x74/0xe0) r7:0000001f r6:00000001 r5:c221e000 r4:c221e000 [] (__mdiobus_read) from [] (mdiobus_read+0x40/0x54) r7:0000001f r6:00000001 r5:c221e000 r4:c221e458 [] (mdiobus_read) from [] (phy_read+0x1c/0x20) r7:ffffe000 r6:c221e470 r5:00000200 r4:c229f800 [] (phy_read) from [] (kszphy_config_intr+0x44/0x80) [] (kszphy_config_intr) from [] (phy_disable_interrupts+0x44/0x50) r5:c229f800 r4:c229f800 [] (phy_disable_interrupts) from [] (phy_shutdown+0x18/0x1c) r5:c229f800 r4:c229f804 [] (phy_shutdown) from [] (device_shutdown+0x168/0x1f8) [] (device_shutdown) from [] (kernel_restart_prepare+0x3c/0x48) r9:c22d2000 r8:c0100264 r7:c0b0d034 r6:00000000 r5:4321fedc r4:00000000 [] (kernel_restart_prepare) from [] (kernel_restart+0x1c/0x60) [] (kernel_restart) from [] (__do_sys_reboot+0x168/0x208) r5:4321fedc r4:01234567 [] (__do_sys_reboot) from [] (sys_reboot+0x18/0x1c) r7:00000058 r6:00000000 r5:00000000 r4:00000000 [] (sys_reboot) from [] (ret_fast_syscall+0x0/0x54) As of commit e2f016cf775129c0 ("net: phy: add a shutdown procedure"), system reboot calls phy_disable_interrupts() during shutdown. As this happens unconditionally, the PHY registers may be accessed while the device is suspended, causing undefined behavior, which may crash the system. Fix this by wrapping the PHY bitbang accessors in the sh_eth driver by wrappers that take care of Runtime PM, to resume the device when needed. Reported-by: Wolfram Sang Suggested-by: Andrew Lunn Signed-off-by: Geert Uytterhoeven Tested-by: Wolfram Sang Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/sh_eth.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index c633046329352..9b52d350e21a9 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3034,6 +3034,28 @@ static int sh_mdio_release(struct sh_eth_private *mdp) return 0; } +static int sh_mdiobb_read(struct mii_bus *bus, int phy, int reg) +{ + int res; + + pm_runtime_get_sync(bus->parent); + res = mdiobb_read(bus, phy, reg); + pm_runtime_put(bus->parent); + + return res; +} + +static int sh_mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val) +{ + int res; + + pm_runtime_get_sync(bus->parent); + res = mdiobb_write(bus, phy, reg, val); + pm_runtime_put(bus->parent); + + return res; +} + /* MDIO bus init function */ static int sh_mdio_init(struct sh_eth_private *mdp, struct sh_eth_plat_data *pd) @@ -3058,6 +3080,10 @@ static int sh_mdio_init(struct sh_eth_private *mdp, if (!mdp->mii_bus) return -ENOMEM; + /* Wrap accessors with Runtime PM-aware ops */ + mdp->mii_bus->read = sh_mdiobb_read; + mdp->mii_bus->write = sh_mdiobb_write; + /* Hook up MII support for ethtool */ mdp->mii_bus->name = "sh_mii"; mdp->mii_bus->parent = dev; -- GitLab From 301a33d51880619d0c5a581b5a48d3a5248fa84b Mon Sep 17 00:00:00 2001 From: Mircea Cirjaliu Date: Tue, 19 Jan 2021 21:53:18 +0100 Subject: [PATCH 1067/2012] bpf: Fix helper bpf_map_peek_elem_proto pointing to wrong callback I assume this was obtained by copy/paste. Point it to bpf_map_peek_elem() instead of bpf_map_pop_elem(). In practice it may have been less likely hit when under JIT given shielded via 84430d4232c3 ("bpf, verifier: avoid retpoline for map push/pop/peek operation"). Fixes: f1a2e44a3aec ("bpf: add queue and stack maps") Signed-off-by: Mircea Cirjaliu Signed-off-by: Daniel Borkmann Cc: Mauricio Vasquez Link: https://lore.kernel.org/bpf/AM7PR02MB6082663DFDCCE8DA7A6DD6B1BBA30@AM7PR02MB6082.eurprd02.prod.outlook.com --- kernel/bpf/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index bd8a3183d0302..41ca280b1dc19 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -108,7 +108,7 @@ BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) } const struct bpf_func_proto bpf_map_peek_elem_proto = { - .func = bpf_map_pop_elem, + .func = bpf_map_peek_elem, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, -- GitLab From 031c7a8cd6fc565e90320bf08f22ee6e70f9d969 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 28 Dec 2020 09:33:28 +0100 Subject: [PATCH 1068/2012] openrisc: io: Add missing __iomem annotation to iounmap() With C=1: drivers/soc/renesas/rmobile-sysc.c:330:33: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void *addr @@ got void [noderef] __iomem *[assigned] base @@ drivers/soc/renesas/rmobile-sysc.c:330:33: sparse: expected void *addr drivers/soc/renesas/rmobile-sysc.c:330:33: sparse: got void [noderef] __iomem *[assigned] base Fix this by adding the missing __iomem annotation to iounmap(). Reported-by: kernel test robot Signed-off-by: Geert Uytterhoeven Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/io.h | 2 +- arch/openrisc/mm/ioremap.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/openrisc/include/asm/io.h b/arch/openrisc/include/asm/io.h index 7d6b4a77b379d..c298061c70a7e 100644 --- a/arch/openrisc/include/asm/io.h +++ b/arch/openrisc/include/asm/io.h @@ -31,7 +31,7 @@ void __iomem *ioremap(phys_addr_t offset, unsigned long size); #define iounmap iounmap -extern void iounmap(void *addr); +extern void iounmap(void __iomem *addr); #include diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c index 5aed97a18bac9..daae13a76743b 100644 --- a/arch/openrisc/mm/ioremap.c +++ b/arch/openrisc/mm/ioremap.c @@ -77,7 +77,7 @@ void __iomem *__ref ioremap(phys_addr_t addr, unsigned long size) } EXPORT_SYMBOL(ioremap); -void iounmap(void *addr) +void iounmap(void __iomem *addr) { /* If the page is from the fixmap pool then we just clear out * the fixmap mapping. -- GitLab From b425e24a934e21a502d25089c6c7443d799c5594 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 18 Jan 2021 18:03:33 +0200 Subject: [PATCH 1069/2012] xsk: Clear pool even for inactive queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The number of queues can change by other means, rather than ethtool. For example, attaching an mqprio qdisc with num_tc > 1 leads to creating multiple sets of TX queues, which may be then destroyed when mqprio is deleted. If an AF_XDP socket is created while mqprio is active, dev->_tx[queue_id].pool will be filled, but then real_num_tx_queues may decrease with deletion of mqprio, which will mean that the pool won't be NULLed, and a further increase of the number of TX queues may expose a dangling pointer. To avoid any potential misbehavior, this commit clears pool for RX and TX queues, regardless of real_num_*_queues, still taking into consideration num_*_queues to avoid overflows. Fixes: 1c1efc2af158 ("xsk: Create and free buffer pool independently from umem") Fixes: a41b4f3c58dd ("xsk: simplify xdp_clear_umem_at_qid implementation") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20210118160333.333439-1-maximmi@mellanox.com --- net/xdp/xsk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 8037b04a9edd1..4a83117507f5a 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -108,9 +108,9 @@ EXPORT_SYMBOL(xsk_get_pool_from_qid); void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) { - if (queue_id < dev->real_num_rx_queues) + if (queue_id < dev->num_rx_queues) dev->_rx[queue_id].pool = NULL; - if (queue_id < dev->real_num_tx_queues) + if (queue_id < dev->num_tx_queues) dev->_tx[queue_id].pool = NULL; } -- GitLab From 8d2b51b008c25240914984208b2ced57d1dd25a5 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 16 Jan 2021 11:44:22 +0100 Subject: [PATCH 1070/2012] udp: mask TOS bits in udp_v4_early_demux() udp_v4_early_demux() is the only function that calls ip_mc_validate_source() with a TOS that hasn't been masked with IPTOS_RT_MASK. This results in different behaviours for incoming multicast UDPv4 packets, depending on if ip_mc_validate_source() is called from the early-demux path (udp_v4_early_demux) or from the regular input path (ip_route_input_noref). ECN would normally not be used with UDP multicast packets, so the practical consequences should be limited on that side. However, IPTOS_RT_MASK is used to also masks the TOS' high order bits, to align with the non-early-demux path behaviour. Reproducer: Setup two netns, connected with veth: $ ip netns add ns0 $ ip netns add ns1 $ ip -netns ns0 link set dev lo up $ ip -netns ns1 link set dev lo up $ ip link add name veth01 netns ns0 type veth peer name veth10 netns ns1 $ ip -netns ns0 link set dev veth01 up $ ip -netns ns1 link set dev veth10 up $ ip -netns ns0 address add 192.0.2.10 peer 192.0.2.11/32 dev veth01 $ ip -netns ns1 address add 192.0.2.11 peer 192.0.2.10/32 dev veth10 In ns0, add route to multicast address 224.0.2.0/24 using source address 198.51.100.10: $ ip -netns ns0 address add 198.51.100.10/32 dev lo $ ip -netns ns0 route add 224.0.2.0/24 dev veth01 src 198.51.100.10 In ns1, define route to 198.51.100.10, only for packets with TOS 4: $ ip -netns ns1 route add 198.51.100.10/32 tos 4 dev veth10 Also activate rp_filter in ns1, so that incoming packets not matching the above route get dropped: $ ip netns exec ns1 sysctl -wq net.ipv4.conf.veth10.rp_filter=1 Now try to receive packets on 224.0.2.11: $ ip netns exec ns1 socat UDP-RECVFROM:1111,ip-add-membership=224.0.2.11:veth10,ignoreeof - In ns0, send packet to 224.0.2.11 with TOS 4 and ECT(0) (that is, tos 6 for socat): $ echo test0 | ip netns exec ns0 socat - UDP-DATAGRAM:224.0.2.11:1111,bind=:1111,tos=6 The "test0" message is properly received by socat in ns1, because early-demux has no cached dst to use, so source address validation is done by ip_route_input_mc(), which receives a TOS that has the ECN bits masked. Now send another packet to 224.0.2.11, still with TOS 4 and ECT(0): $ echo test1 | ip netns exec ns0 socat - UDP-DATAGRAM:224.0.2.11:1111,bind=:1111,tos=6 The "test1" message isn't received by socat in ns1, because, now, early-demux has a cached dst to use and calls ip_mc_validate_source() immediately, without masking the ECN bits. Fixes: bc044e8db796 ("udp: perform source validation for mcast early demux") Signed-off-by: Guillaume Nault Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7103b0a89756e..69ea76578abb9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2555,7 +2555,8 @@ int udp_v4_early_demux(struct sk_buff *skb) */ if (!inet_sk(sk)->inet_daddr && in_dev) return ip_mc_validate_source(skb, iph->daddr, - iph->saddr, iph->tos, + iph->saddr, + iph->tos & IPTOS_RT_MASK, skb->dev, in_dev, &itag); } return 0; -- GitLab From 2e5a6266fbb11ae93c468dfecab169aca9c27b43 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sat, 16 Jan 2021 11:44:26 +0100 Subject: [PATCH 1071/2012] netfilter: rpfilter: mask ecn bits before fib lookup RT_TOS() only masks one of the two ECN bits. Therefore rpfilter_mt() treats Not-ECT or ECT(1) packets in a different way than those with ECT(0) or CE. Reproducer: Create two netns, connected with a veth: $ ip netns add ns0 $ ip netns add ns1 $ ip link add name veth01 netns ns0 type veth peer name veth10 netns ns1 $ ip -netns ns0 link set dev veth01 up $ ip -netns ns1 link set dev veth10 up $ ip -netns ns0 address add 192.0.2.10/32 dev veth01 $ ip -netns ns1 address add 192.0.2.11/32 dev veth10 Add a route to ns1 in ns0: $ ip -netns ns0 route add 192.0.2.11/32 dev veth01 In ns1, only packets with TOS 4 can be routed to ns0: $ ip -netns ns1 route add 192.0.2.10/32 tos 4 dev veth10 Ping from ns0 to ns1 works regardless of the ECN bits, as long as TOS is 4: $ ip netns exec ns0 ping -Q 4 192.0.2.11 # TOS 4, Not-ECT ... 0% packet loss ... $ ip netns exec ns0 ping -Q 5 192.0.2.11 # TOS 4, ECT(1) ... 0% packet loss ... $ ip netns exec ns0 ping -Q 6 192.0.2.11 # TOS 4, ECT(0) ... 0% packet loss ... $ ip netns exec ns0 ping -Q 7 192.0.2.11 # TOS 4, CE ... 0% packet loss ... Now use iptable's rpfilter module in ns1: $ ip netns exec ns1 iptables-legacy -t raw -A PREROUTING -m rpfilter --invert -j DROP Not-ECT and ECT(1) packets still pass: $ ip netns exec ns0 ping -Q 4 192.0.2.11 # TOS 4, Not-ECT ... 0% packet loss ... $ ip netns exec ns0 ping -Q 5 192.0.2.11 # TOS 4, ECT(1) ... 0% packet loss ... But ECT(0) and ECN packets are dropped: $ ip netns exec ns0 ping -Q 6 192.0.2.11 # TOS 4, ECT(0) ... 100% packet loss ... $ ip netns exec ns0 ping -Q 7 192.0.2.11 # TOS 4, CE ... 100% packet loss ... After this patch, rpfilter doesn't drop ECT(0) and CE packets anymore. Fixes: 8f97339d3feb ("netfilter: add ipv4 reverse path filter match") Signed-off-by: Guillaume Nault Signed-off-by: Jakub Kicinski --- net/ipv4/netfilter/ipt_rpfilter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index cc23f1ce239c2..8cd3224d913e0 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -76,7 +76,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.daddr = iph->saddr; flow.saddr = rpfilter_get_saddr(iph->daddr); flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; - flow.flowi4_tos = RT_TOS(iph->tos); + flow.flowi4_tos = iph->tos & IPTOS_RT_MASK; flow.flowi4_scope = RT_SCOPE_UNIVERSE; flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par)); -- GitLab From fa82117010430aff2ce86400f7328f55a31b48a6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 16 Jan 2021 14:13:37 +0800 Subject: [PATCH 1072/2012] net: add inline function skb_csum_is_sctp This patch is to define a inline function skb_csum_is_sctp(), and also replace all places where it checks if it's a SCTP CSUM skb. This function would be used later in many networking drivers in the following patches. Suggested-by: Alexander Duyck Signed-off-by: Xin Long Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 2 +- include/linux/skbuff.h | 5 +++++ net/core/dev.c | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index ac4cd5d82e696..162a1ff1e9d24 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -979,7 +979,7 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb) stats->vlan_inserted++; } - if (skb->csum_not_inet) + if (skb_csum_is_sctp(skb)) stats->crc32_csum++; else stats->csum++; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c9568cf60c2a6..46f901adf1a80 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4621,6 +4621,11 @@ static inline void skb_reset_redirect(struct sk_buff *skb) #endif } +static inline bool skb_csum_is_sctp(struct sk_buff *skb) +{ + return skb->csum_not_inet; +} + static inline void skb_set_kcov_handle(struct sk_buff *skb, const u64 kcov_handle) { diff --git a/net/core/dev.c b/net/core/dev.c index 6b90520a01b19..0332f2e8f7da0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3617,7 +3617,7 @@ static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, int skb_csum_hwoffload_help(struct sk_buff *skb, const netdev_features_t features) { - if (unlikely(skb->csum_not_inet)) + if (unlikely(skb_csum_is_sctp(skb))) return !!(features & NETIF_F_SCTP_CRC) ? 0 : skb_crc32c_csum_help(skb); -- GitLab From 8bcf02035bd5ab5f22110d16a1aaee1794aa8d3c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 16 Jan 2021 14:13:38 +0800 Subject: [PATCH 1073/2012] net: igb: use skb_csum_is_sctp instead of protocol check Using skb_csum_is_sctp is a easier way to validate it's a SCTP CRC checksum offload packet, and there is no need to parse the packet to check its proto field, especially when it's a UDP or GRE encapped packet. So this patch also makes igb support SCTP CRC checksum offload for UDP and GRE encapped packets. Signed-off-by: Xin Long Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igb/igb_main.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 6b5adbd9660b7..84d4284b8b326 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5959,15 +5959,6 @@ static int igb_tso(struct igb_ring *tx_ring, return 1; } -static inline bool igb_ipv6_csum_is_sctp(struct sk_buff *skb) -{ - unsigned int offset = 0; - - ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL); - - return offset == skb_checksum_start_offset(skb); -} - static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) { struct sk_buff *skb = first->skb; @@ -5990,10 +5981,7 @@ csum_failed: break; case offsetof(struct sctphdr, checksum): /* validate that this is actually an SCTP request */ - if (((first->protocol == htons(ETH_P_IP)) && - (ip_hdr(skb)->protocol == IPPROTO_SCTP)) || - ((first->protocol == htons(ETH_P_IPV6)) && - igb_ipv6_csum_is_sctp(skb))) { + if (skb_csum_is_sctp(skb)) { type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP; break; } -- GitLab From d2de44443cafa16f6c8c6e724632d57097991f55 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 16 Jan 2021 14:13:39 +0800 Subject: [PATCH 1074/2012] net: igbvf: use skb_csum_is_sctp instead of protocol check Using skb_csum_is_sctp is a easier way to validate it's a SCTP CRC checksum offload packet, and yet it also makes igbvf support SCTP CRC checksum offload for UDP and GRE encapped packets, just as it does in igb driver. Signed-off-by: Xin Long Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igbvf/netdev.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 30fdea24e94ae..fb3fbcb133310 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2072,15 +2072,6 @@ static int igbvf_tso(struct igbvf_ring *tx_ring, return 1; } -static inline bool igbvf_ipv6_csum_is_sctp(struct sk_buff *skb) -{ - unsigned int offset = 0; - - ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL); - - return offset == skb_checksum_start_offset(skb); -} - static bool igbvf_tx_csum(struct igbvf_ring *tx_ring, struct sk_buff *skb, u32 tx_flags, __be16 protocol) { @@ -2102,10 +2093,7 @@ csum_failed: break; case offsetof(struct sctphdr, checksum): /* validate that this is actually an SCTP request */ - if (((protocol == htons(ETH_P_IP)) && - (ip_hdr(skb)->protocol == IPPROTO_SCTP)) || - ((protocol == htons(ETH_P_IPV6)) && - igbvf_ipv6_csum_is_sctp(skb))) { + if (skb_csum_is_sctp(skb)) { type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP; break; } -- GitLab From 609d29a9d2429a840a2f1f44e77b71d58e3e9a33 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 16 Jan 2021 14:13:40 +0800 Subject: [PATCH 1075/2012] net: igc: use skb_csum_is_sctp instead of protocol check Using skb_csum_is_sctp is a easier way to validate it's a SCTP CRC checksum offload packet, and yet it also makes igc support SCTP CRC checksum offload for UDP and GRE encapped packets, just as it does in igb driver. Signed-off-by: Xin Long Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc_main.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index afd6a62da29dd..43aec42e6d9d4 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -949,15 +949,6 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, } } -static inline bool igc_ipv6_csum_is_sctp(struct sk_buff *skb) -{ - unsigned int offset = 0; - - ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL); - - return offset == skb_checksum_start_offset(skb); -} - static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first) { struct sk_buff *skb = first->skb; @@ -980,10 +971,7 @@ csum_failed: break; case offsetof(struct sctphdr, checksum): /* validate that this is actually an SCTP request */ - if ((first->protocol == htons(ETH_P_IP) && - (ip_hdr(skb)->protocol == IPPROTO_SCTP)) || - (first->protocol == htons(ETH_P_IPV6) && - igc_ipv6_csum_is_sctp(skb))) { + if (skb_csum_is_sctp(skb)) { type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP; break; } -- GitLab From f8c4b01d3a680de2144dd274df03ffaf69cfb881 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 16 Jan 2021 14:13:41 +0800 Subject: [PATCH 1076/2012] net: ixgbe: use skb_csum_is_sctp instead of protocol check Using skb_csum_is_sctp is a easier way to validate it's a SCTP CRC checksum offload packet, and yet it also makes ixgbe support SCTP CRC checksum offload for UDP and GRE encapped packets, just as it does in igb driver. Signed-off-by: Xin Long Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 56dca73d158ee..e08c01525fd26 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8039,15 +8039,6 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, return 1; } -static inline bool ixgbe_ipv6_csum_is_sctp(struct sk_buff *skb) -{ - unsigned int offset = 0; - - ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL); - - return offset == skb_checksum_start_offset(skb); -} - static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, struct ixgbe_tx_buffer *first, struct ixgbe_ipsec_tx_data *itd) @@ -8073,10 +8064,7 @@ csum_failed: break; case offsetof(struct sctphdr, checksum): /* validate that this is actually an SCTP request */ - if (((first->protocol == htons(ETH_P_IP)) && - (ip_hdr(skb)->protocol == IPPROTO_SCTP)) || - ((first->protocol == htons(ETH_P_IPV6)) && - ixgbe_ipv6_csum_is_sctp(skb))) { + if (skb_csum_is_sctp(skb)) { type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_SCTP; break; } -- GitLab From fc186d0a4ef8cc493a04895e620c7d55052a9d93 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 16 Jan 2021 14:13:42 +0800 Subject: [PATCH 1077/2012] net: ixgbevf: use skb_csum_is_sctp instead of protocol check Using skb_csum_is_sctp is a easier way to validate it's a SCTP CRC checksum offload packet, and yet it also makes ixgbevf support SCTP CRC checksum offload for UDP and GRE encapped packets, just as it does in igb driver. Signed-off-by: Xin Long Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index a534a3fb392ed..a14e55e7fce88 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -3843,15 +3843,6 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring, return 1; } -static inline bool ixgbevf_ipv6_csum_is_sctp(struct sk_buff *skb) -{ - unsigned int offset = 0; - - ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL); - - return offset == skb_checksum_start_offset(skb); -} - static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, struct ixgbevf_tx_buffer *first, struct ixgbevf_ipsec_tx_data *itd) @@ -3872,10 +3863,7 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring, break; case offsetof(struct sctphdr, checksum): /* validate that this is actually an SCTP request */ - if (((first->protocol == htons(ETH_P_IP)) && - (ip_hdr(skb)->protocol == IPPROTO_SCTP)) || - ((first->protocol == htons(ETH_P_IPV6)) && - ixgbevf_ipv6_csum_is_sctp(skb))) { + if (skb_csum_is_sctp(skb)) { type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_SCTP; break; } -- GitLab From a3eb4e9d4c9218476d05c52dfd2be3d6fdce6b91 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 17 Jan 2021 17:15:38 +0200 Subject: [PATCH 1078/2012] net: Disable NETIF_F_HW_TLS_RX when RXCSUM is disabled With NETIF_F_HW_TLS_RX packets are decrypted in HW. This cannot be logically done when RXCSUM offload is off. Fixes: 14136564c8ee ("net: Add TLS RX offload feature") Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Link: https://lore.kernel.org/r/20210117151538.9411-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/networking/tls-offload.rst | 3 +++ net/core/dev.c | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst index 9af3334d9ad08..5f0dea3d571e3 100644 --- a/Documentation/networking/tls-offload.rst +++ b/Documentation/networking/tls-offload.rst @@ -534,3 +534,6 @@ offload. Hence, TLS TX device feature flag requires TX csum offload being set. Disabling the latter implies clearing the former. Disabling TX checksum offload should not affect old connections, and drivers should make sure checksum calculation does not break for them. +Similarly, device-offloaded TLS decryption implies doing RXCSUM. If the user +does not want to enable RX csum offload, TLS RX device feature is disabled +as well. diff --git a/net/core/dev.c b/net/core/dev.c index c360bb5367e24..a979b86dbacda 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9672,6 +9672,11 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, } } + if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) { + netdev_dbg(dev, "Dropping TLS RX HW offload feature since no RXCSUM feature.\n"); + features &= ~NETIF_F_HW_TLS_RX; + } + return features; } -- GitLab From 9f206f7398f6f6ec7dd0198c045c2459b4f720b6 Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Mon, 18 Jan 2021 19:16:29 -0800 Subject: [PATCH 1079/2012] RDMA/vmw_pvrdma: Fix network_hdr_type reported in WC The PVRDMA device HW interface defines network_hdr_type according to an old definition of the internal kernel rdma_network_type enum that has since changed, resulting in the wrong rdma_network_type being reported. Fix this by explicitly defining the enum used by the PVRDMA device and adding a function to convert the pvrdma_network_type to rdma_network_type enum. Cc: stable@vger.kernel.org # 5.10+ Fixes: 1c15b4f2a42f ("RDMA/core: Modify enum ib_gid_type and enum rdma_network_type") Link: https://lore.kernel.org/r/1611026189-17943-1-git-send-email-bryantan@vmware.com Reviewed-by: Adit Ranadive Signed-off-by: Bryan Tan Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 14 ++++++++++++++ drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 2 +- include/uapi/rdma/vmw_pvrdma-abi.h | 7 +++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index c142f5e7f25f8..de57f2fed7437 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -509,6 +509,20 @@ static inline int ib_send_flags_to_pvrdma(int flags) return flags & PVRDMA_MASK(PVRDMA_SEND_FLAGS_MAX); } +static inline int pvrdma_network_type_to_ib(enum pvrdma_network_type type) +{ + switch (type) { + case PVRDMA_NETWORK_ROCE_V1: + return RDMA_NETWORK_ROCE_V1; + case PVRDMA_NETWORK_IPV4: + return RDMA_NETWORK_IPV4; + case PVRDMA_NETWORK_IPV6: + return RDMA_NETWORK_IPV6; + default: + return RDMA_NETWORK_IPV6; + } +} + void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst, const struct pvrdma_qp_cap *src); void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index a119ac3e103c8..6aa40bd2fd52d 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -367,7 +367,7 @@ retry: wc->dlid_path_bits = cqe->dlid_path_bits; wc->port_num = cqe->port_num; wc->vendor_err = cqe->vendor_err; - wc->network_hdr_type = cqe->network_hdr_type; + wc->network_hdr_type = pvrdma_network_type_to_ib(cqe->network_hdr_type); /* Update shared ring state */ pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe); diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h index f8b638c73371d..901a4fd72c09f 100644 --- a/include/uapi/rdma/vmw_pvrdma-abi.h +++ b/include/uapi/rdma/vmw_pvrdma-abi.h @@ -133,6 +133,13 @@ enum pvrdma_wc_flags { PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_NETWORK_HDR_TYPE, }; +enum pvrdma_network_type { + PVRDMA_NETWORK_IB, + PVRDMA_NETWORK_ROCE_V1 = PVRDMA_NETWORK_IB, + PVRDMA_NETWORK_IPV4, + PVRDMA_NETWORK_IPV6 +}; + struct pvrdma_alloc_ucontext_resp { __u32 qp_tab_size; __u32 reserved; -- GitLab From 9293d3fcb70583f2c786f04ca788af026b7c4c5c Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Tue, 19 Jan 2021 17:28:33 +0800 Subject: [PATCH 1080/2012] RDMA/hns: Use mutex instead of spinlock for ida allocation GFP_KERNEL may cause ida_alloc_range() to sleep, but the spinlock covering this function is not allowed to sleep, so the spinlock needs to be changed to mutex. As there is a certain chance of memory allocation failure, GFP_ATOMIC is not suitable for QP allocation scenarios. Fixes: 71586dd20010 ("RDMA/hns: Create QP with selected QPN for bank load balance") Link: https://lore.kernel.org/r/1611048513-28663-1-git-send-email-liweihang@huawei.com Signed-off-by: Yangyang Li Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 55d538625e361..ad8253245a85f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -532,7 +532,7 @@ struct hns_roce_qp_table { struct hns_roce_hem_table sccc_table; struct mutex scc_mutex; struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM]; - spinlock_t bank_lock; + struct mutex bank_mutex; }; struct hns_roce_cq_table { diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index d8e2fe5558d29..1116371adf74f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -209,7 +209,7 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hr_qp->doorbell_qpn = 1; } else { - spin_lock(&qp_table->bank_lock); + mutex_lock(&qp_table->bank_mutex); bankid = get_least_load_bankid_for_qp(qp_table->bank); ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid, @@ -217,12 +217,12 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if (ret) { ibdev_err(&hr_dev->ib_dev, "failed to alloc QPN, ret = %d\n", ret); - spin_unlock(&qp_table->bank_lock); + mutex_unlock(&qp_table->bank_mutex); return ret; } qp_table->bank[bankid].inuse++; - spin_unlock(&qp_table->bank_lock); + mutex_unlock(&qp_table->bank_mutex); hr_qp->doorbell_qpn = (u32)num; } @@ -408,9 +408,9 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3); - spin_lock(&hr_dev->qp_table.bank_lock); + mutex_lock(&hr_dev->qp_table.bank_mutex); hr_dev->qp_table.bank[bankid].inuse--; - spin_unlock(&hr_dev->qp_table.bank_lock); + mutex_unlock(&hr_dev->qp_table.bank_mutex); } static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, @@ -1371,6 +1371,7 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) unsigned int i; mutex_init(&qp_table->scc_mutex); + mutex_init(&qp_table->bank_mutex); xa_init(&hr_dev->qp_table_xa); reserved_from_bot = hr_dev->caps.reserved_qps; -- GitLab From de641d74fb00f5b32f054ee154e31fb037e0db88 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 17 Jan 2021 11:26:33 +0200 Subject: [PATCH 1081/2012] Revert "RDMA/mlx5: Fix devlink deadlock on net namespace deletion" This reverts commit fbdd0049d98d44914fc57d4b91f867f4996c787b. Due to commit in fixes tag, netdevice events were received only in one net namespace of mlx5_core_dev. Due to this when netdevice events arrive in net namespace other than net namespace of mlx5_core_dev, they are missed. This results in empty GID table due to RDMA device being detached from its net device. Hence, revert back to receive netdevice events in all net namespaces to restore back RDMA functionality in non init_net net namespace. The deadlock will have to be addressed in another patch. Fixes: fbdd0049d98d ("RDMA/mlx5: Fix devlink deadlock on net namespace deletion") Link: https://lore.kernel.org/r/20210117092633.10690-1-leon@kernel.org Signed-off-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 6 ++---- .../net/ethernet/mellanox/mlx5/core/lib/mlx5.h | 5 +++++ include/linux/mlx5/driver.h | 18 ------------------ 3 files changed, 7 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d26f3f3e0462a..aabdc07e47537 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3311,8 +3311,7 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) int err; dev->port[port_num].roce.nb.notifier_call = mlx5_netdev_event; - err = register_netdevice_notifier_net(mlx5_core_net(dev->mdev), - &dev->port[port_num].roce.nb); + err = register_netdevice_notifier(&dev->port[port_num].roce.nb); if (err) { dev->port[port_num].roce.nb.notifier_call = NULL; return err; @@ -3324,8 +3323,7 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) { if (dev->port[port_num].roce.nb.notifier_call) { - unregister_netdevice_notifier_net(mlx5_core_net(dev->mdev), - &dev->port[port_num].roce.nb); + unregister_netdevice_notifier(&dev->port[port_num].roce.nb); dev->port[port_num].roce.nb.notifier_call = NULL; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 3a9fa629503f0..d046db7bb047d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -90,4 +90,9 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, u32 key_type, u32 *p_key_id); void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id); +static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) +{ + return devlink_net(priv_to_devlink(dev)); +} + #endif diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f93bfe7473aa7..4672e12f1aa52 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1209,22 +1209,4 @@ static inline bool mlx5_is_roce_enabled(struct mlx5_core_dev *dev) return val.vbool; } -/** - * mlx5_core_net - Provide net namespace of the mlx5_core_dev - * @dev: mlx5 core device - * - * mlx5_core_net() returns the net namespace of mlx5 core device. - * This can be called only in below described limited context. - * (a) When a devlink instance for mlx5_core is registered and - * when devlink reload operation is disabled. - * or - * (b) during devlink reload reload_down() and reload_up callbacks - * where it is ensured that devlink instance's net namespace is - * stable. - */ -static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) -{ - return devlink_net(priv_to_devlink(dev)); -} - #endif /* MLX5_DRIVER_H */ -- GitLab From f068cb1db2cb40c9782874df7b08c684106cf609 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 15 Jan 2021 16:36:50 -0800 Subject: [PATCH 1082/2012] RDMA/usnic: Fix misuse of sysfs_emit_at In commit e28bf1f03b01 ("RDMA: Convert various random sprintf sysfs _show uses to sysfs_emit") I mistakenly used len = sysfs_emit_at to overwrite the last trailing space of potentially multiple entry output. Instead use a more common style by removing the trailing space from the output formats and adding a prefixing space to the contination formats and converting the final terminating output newline from the defective len = sysfs_emit_at(buf, len, "\n"); to the now appropriate and typical len += sysfs_emit_at(buf, len, "\n"); Fixes: e28bf1f03b01 ("RDMA: Convert various random sprintf sysfs _show uses to sysfs_emit") Link: https://lore.kernel.org/r/5eb794b9c9bca0494d94b2b209f1627fa4e7b555.camel@perches.com Reported-by: James Bottomley Signed-off-by: Joe Perches Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/usnic/usnic_ib_sysfs.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index e59615a4c9d98..586b0e52ba7f8 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -214,7 +214,7 @@ static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf) struct usnic_vnic_res *vnic_res; int len; - len = sysfs_emit(buf, "QPN: %d State: (%s) PID: %u VF Idx: %hu ", + len = sysfs_emit(buf, "QPN: %d State: (%s) PID: %u VF Idx: %hu", qp_grp->ibqp.qp_num, usnic_ib_qp_grp_state_to_string(qp_grp->state), qp_grp->owner_pid, @@ -224,14 +224,13 @@ static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf) res_chunk = qp_grp->res_chunk_list[i]; for (j = 0; j < res_chunk->cnt; j++) { vnic_res = res_chunk->res[j]; - len += sysfs_emit_at( - buf, len, "%s[%d] ", + len += sysfs_emit_at(buf, len, " %s[%d]", usnic_vnic_res_type_to_str(vnic_res->type), vnic_res->vnic_idx); } } - len = sysfs_emit_at(buf, len, "\n"); + len += sysfs_emit_at(buf, len, "\n"); return len; } -- GitLab From f6a2e94b3f9d89cb40771ff746b16b5687650cbb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 18 Jan 2021 16:08:12 +0100 Subject: [PATCH 1083/2012] sh_eth: Fix power down vs. is_opened flag ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sh_eth_close() does a synchronous power down of the device before marking it closed. Revert the order, to make sure the device is never marked opened while suspended. While at it, use pm_runtime_put() instead of pm_runtime_put_sync(), as there is no reason to do a synchronous power down. Fixes: 7fa2955ff70ce453 ("sh_eth: Fix sleeping function called from invalid context") Signed-off-by: Geert Uytterhoeven Reviewed-by: Sergei Shtylyov Reviewed-by: Niklas Söderlund Link: https://lore.kernel.org/r/20210118150812.796791-1-geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/sh_eth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 9b52d350e21a9..590b088bc4c7f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2606,10 +2606,10 @@ static int sh_eth_close(struct net_device *ndev) /* Free all the skbuffs in the Rx queue and the DMA buffer. */ sh_eth_ring_free(ndev); - pm_runtime_put_sync(&mdp->pdev->dev); - mdp->is_opened = 0; + pm_runtime_put(&mdp->pdev->dev); + return 0; } -- GitLab From 4964e5a1e080f785f5518b402a9e48c527fe6cbd Mon Sep 17 00:00:00 2001 From: Bongsu Jeon Date: Tue, 19 Jan 2021 05:55:22 +0900 Subject: [PATCH 1084/2012] net: nfc: nci: fix the wrong NCI_CORE_INIT parameters Fix the code because NCI_CORE_INIT_CMD includes two parameters in NCI2.0 but there is no parameters in NCI1.x. Fixes: bcd684aace34 ("net/nfc/nci: Support NCI 2.x initial sequence") Signed-off-by: Bongsu Jeon Link: https://lore.kernel.org/r/20210118205522.317087-1-bongsu.jeon@samsung.com Signed-off-by: Jakub Kicinski --- net/nfc/nci/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index e64727e1a72f9..02a1f13f07980 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -508,7 +508,7 @@ static int nci_open_device(struct nci_dev *ndev) }; unsigned long opt = 0; - if (!(ndev->nci_ver & NCI_VER_2_MASK)) + if (ndev->nci_ver & NCI_VER_2_MASK) opt = (unsigned long)&nci_init_v2_cmd; rc = __nci_request(ndev, nci_init_req, opt, -- GitLab From dd3a44c06f7b4f14e90065bf05d62c255b20005f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 19 Jan 2021 15:18:00 +1100 Subject: [PATCH 1085/2012] selftests/powerpc: Only test lwm/stmw on big endian Newer binutils (>= 2.36) refuse to assemble lmw/stmw when building in little endian mode. That breaks compilation of our alignment handler test: /tmp/cco4l14N.s: Assembler messages: /tmp/cco4l14N.s:1440: Error: `lmw' invalid when little-endian /tmp/cco4l14N.s:1814: Error: `stmw' invalid when little-endian make[2]: *** [../../lib.mk:139: /output/kselftest/powerpc/alignment/alignment_handler] Error 1 These tests do pass on little endian machines, as the kernel will still emulate those instructions even when running little endian (which is arguably a kernel bug). But we don't really need to test that case, so ifdef those instructions out to get the alignment test building again. Reported-by: Libor Pechacek Signed-off-by: Michael Ellerman Tested-by: Libor Pechacek Link: https://lore.kernel.org/r/20210119041800.3093047-1-mpe@ellerman.id.au --- .../testing/selftests/powerpc/alignment/alignment_handler.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index cb53a8b777e68..c25cf7cd45e9f 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -443,7 +443,6 @@ int test_alignment_handler_integer(void) LOAD_DFORM_TEST(ldu); LOAD_XFORM_TEST(ldx); LOAD_XFORM_TEST(ldux); - LOAD_DFORM_TEST(lmw); STORE_DFORM_TEST(stb); STORE_XFORM_TEST(stbx); STORE_DFORM_TEST(stbu); @@ -462,7 +461,11 @@ int test_alignment_handler_integer(void) STORE_XFORM_TEST(stdx); STORE_DFORM_TEST(stdu); STORE_XFORM_TEST(stdux); + +#ifdef __BIG_ENDIAN__ + LOAD_DFORM_TEST(lmw); STORE_DFORM_TEST(stmw); +#endif return rc; } -- GitLab From fd23d2dc180fccfad4b27a8e52ba1bc415d18509 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 19 Jan 2021 10:59:30 +0800 Subject: [PATCH 1086/2012] selftests: net: fib_tests: remove duplicate log test The previous test added an address with a specified metric and check if correspond route was created. I somehow added two logs for the same test. Remove the duplicated one. Reported-by: Antoine Tenart Fixes: 0d29169a708b ("selftests/net/fib_tests: update addr_metric_test for peer route testing") Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210119025930.2810532-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_tests.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 84205c3a55ebe..2b5707738609e 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1055,7 +1055,6 @@ ipv6_addr_metric_test() check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260" log_test $? 0 "Set metric with peer route on local side" - log_test $? 0 "User specified metric on local address" check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260" log_test $? 0 "Set metric with peer route on peer side" -- GitLab From b3228c74e0d24976604e8550e9cccb83135f5302 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 15 Jan 2021 21:28:48 +0200 Subject: [PATCH 1087/2012] dt-binding: ti: am65x-cpts: add assigned-clock and power-domains props The CPTS clock is usually a clk-mux which allows to select CPTS reference clock by using 'assigned-clock-parents', 'assigned-clocks' DT properties. Also depending on integration the power-domains has to be specified to enable CPTS IP. Hence add 'assigned-clock-parents', 'assigned-clocks' and 'power-domains' properties to the CPTS DT bindings to avoid dtbs_check warnings: cpts@310d0000: 'assigned-clock-parents', 'assigned-clocks' do not match any of the regexes: 'pinctrl-[0-9]+' cpts@310d0000: 'power-domains' does not match any of the regexes: 'pinctrl-[0-9]+' Signed-off-by: Grygorii Strashko Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/ti,k3-am654-cpts.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml index 9b7117920d906..ce43a1c58a571 100644 --- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml +++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml @@ -73,6 +73,13 @@ properties: items: - const: cpts + assigned-clock-parents: true + + assigned-clocks: true + + power-domains: + maxItems: 1 + ti,cpts-ext-ts-inputs: $ref: /schemas/types.yaml#/definitions/uint32 maximum: 8 -- GitLab From 19d9a846d9fcdfd30b1500339e09ec8dc898feea Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 15 Jan 2021 21:28:49 +0200 Subject: [PATCH 1088/2012] dt-binding: net: ti: k3-am654-cpsw-nuss: update bindings for am64x cpsw3g Update DT binding for recently introduced TI K3 AM642x SoC [1] which contains 3 port (2 external ports) CPSW3g module. The CPSW3g integrated in MAIN domain and can be configured in multi port or switch modes. The overall functionality and DT bindings are similar to other K3 CPSWxg versions, so DT binding changes are minimal: - reword description - add new compatible 'ti,am642-cpsw-nuss' - allow 2 external ports child nodes - add missed 'assigned-clock' props [1] https://www.ti.com/lit/pdf/spruim2 Signed-off-by: Grygorii Strashko Signed-off-by: Jakub Kicinski --- .../bindings/net/ti,k3-am654-cpsw-nuss.yaml | 50 +++++++++++-------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml index c47b58f3e3f6e..3fae9a5f0c6a4 100644 --- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml +++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml @@ -4,7 +4,7 @@ $id: http://devicetree.org/schemas/net/ti,k3-am654-cpsw-nuss.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: The TI AM654x/J721E SoC Gigabit Ethernet MAC (Media Access Controller) Device Tree Bindings +title: The TI AM654x/J721E/AM642x SoC Gigabit Ethernet MAC (Media Access Controller) Device Tree Bindings maintainers: - Grygorii Strashko @@ -13,19 +13,16 @@ maintainers: description: The TI AM654x/J721E SoC Gigabit Ethernet MAC (CPSW2G NUSS) has two ports (one external) and provides Ethernet packet communication for the device. - CPSW2G NUSS features - the Reduced Gigabit Media Independent Interface (RGMII), - Reduced Media Independent Interface (RMII), the Management Data - Input/Output (MDIO) interface for physical layer device (PHY) management, - new version of Common Platform Time Sync (CPTS), updated Address Lookup - Engine (ALE). - One external Ethernet port (port 1) with selectable RGMII/RMII interfaces and - an internal Communications Port Programming Interface (CPPI5) (Host port 0). + The TI AM642x SoC Gigabit Ethernet MAC (CPSW3G NUSS) has three ports + (two external) and provides Ethernet packet communication and switching. + + The internal Communications Port Programming Interface (CPPI5) (Host port 0). Host Port 0 CPPI Packet Streaming Interface interface supports 8 TX channels - and one RX channels and operating by TI AM654x/J721E NAVSS Unified DMA - Peripheral Root Complex (UDMA-P) controller. - The CPSW2G NUSS is integrated into device MCU domain named MCU_CPSW0. + and one RX channels and operating by NAVSS Unified DMA Peripheral Root + Complex (UDMA-P) controller. - Additional features + CPSWxG features + updated Address Lookup Engine (ALE). priority level Quality Of Service (QOS) support (802.1p) Support for Audio/Video Bridging (P802.1Qav/D6.0) Support for IEEE 1588 Clock Synchronization (2008 Annex D, Annex E and Annex F) @@ -38,10 +35,18 @@ description: VLAN support, 802.1Q compliant, Auto add port VLAN for untagged frames on ingress, Auto VLAN removal on egress and auto pad to minimum frame size. RX/TX csum offload + Management Data Input/Output (MDIO) interface for PHYs management + RMII/RGMII Interfaces support + new version of Common Platform Time Sync (CPTS) + + The CPSWxG NUSS is integrated into + device MCU domain named MCU_CPSW0 on AM654x/J721E SoC. + device MAIN domain named CPSW0 on AM642x SoC. Specifications can be found at - http://www.ti.com/lit/ug/spruid7e/spruid7e.pdf - http://www.ti.com/lit/ug/spruil1a/spruil1a.pdf + https://www.ti.com/lit/pdf/spruid7 + https://www.ti.com/lit/zip/spruil1 + https://www.ti.com/lit/pdf/spruim2 properties: "#address-cells": true @@ -51,11 +56,12 @@ properties: oneOf: - const: ti,am654-cpsw-nuss - const: ti,j721e-cpsw-nuss + - const: ti,am642-cpsw-nuss reg: maxItems: 1 description: - The physical base address and size of full the CPSW2G NUSS IO range + The physical base address and size of full the CPSWxG NUSS IO range reg-names: items: @@ -66,12 +72,16 @@ properties: dma-coherent: true clocks: - description: CPSW2G NUSS functional clock + description: CPSWxG NUSS functional clock clock-names: items: - const: fck + assigned-clock-parents: true + + assigned-clocks: true + power-domains: maxItems: 1 @@ -99,16 +109,16 @@ properties: const: 0 patternProperties: - port@1: + port@[1-2]: type: object - description: CPSW2G NUSS external ports + description: CPSWxG NUSS external ports $ref: ethernet-controller.yaml# properties: reg: - items: - - const: 1 + minimum: 1 + maximum: 2 description: CPSW port number phys: -- GitLab From ed569ed9b30a748f4e5601041a999a537bb5e736 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 15 Jan 2021 21:28:50 +0200 Subject: [PATCH 1089/2012] net: ethernet: ti: am65-cpsw-nuss: Use DMA device for DMA API For DMA API the DMA device should be used as cpsw does not accesses to descriptors or data buffers in any ways. The DMA does. Also, drop dma_coerce_mask_and_coherent() setting on CPSW device, as it should be done by DMA driver which does data movement. This is required for adding AM64x CPSW3g support where DMA coherency supported per DMA channel. Signed-off-by: Peter Ujfalusi Co-developed-by: Vignesh Raghavendra Signed-off-by: Vignesh Raghavendra Signed-off-by: Grygorii Strashko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 79 ++++++++++++------------ drivers/net/ethernet/ti/am65-cpsw-nuss.h | 2 + 2 files changed, 40 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 766e8866bbefc..8bf48cf3be9b2 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -366,8 +366,9 @@ static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common, } desc_dma = k3_cppi_desc_pool_virt2dma(rx_chn->desc_pool, desc_rx); - buf_dma = dma_map_single(dev, skb->data, pkt_len, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, buf_dma))) { + buf_dma = dma_map_single(rx_chn->dma_dev, skb->data, pkt_len, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(rx_chn->dma_dev, buf_dma))) { k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); dev_err(dev, "Failed to map rx skb buffer\n"); return -EINVAL; @@ -692,7 +693,7 @@ static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma) skb = *swdata; cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); - dma_unmap_single(rx_chn->dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); + dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); dev_kfree_skb_any(skb); @@ -793,7 +794,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, csum_info = psdata[2]; dev_dbg(dev, "%s rx csum_info:%#x\n", __func__, csum_info); - dma_unmap_single(dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); + dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); @@ -864,7 +865,6 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) } static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn, - struct device *dev, struct cppi5_host_desc_t *desc) { struct cppi5_host_desc_t *first_desc, *next_desc; @@ -876,8 +876,7 @@ static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn, cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len); - dma_unmap_single(dev, buf_dma, buf_dma_len, - DMA_TO_DEVICE); + dma_unmap_single(tx_chn->dma_dev, buf_dma, buf_dma_len, DMA_TO_DEVICE); next_desc_dma = cppi5_hdesc_get_next_hbdesc(first_desc); while (next_desc_dma) { @@ -885,7 +884,7 @@ static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn, next_desc_dma); cppi5_hdesc_get_obuf(next_desc, &buf_dma, &buf_dma_len); - dma_unmap_page(dev, buf_dma, buf_dma_len, + dma_unmap_page(tx_chn->dma_dev, buf_dma, buf_dma_len, DMA_TO_DEVICE); next_desc_dma = cppi5_hdesc_get_next_hbdesc(next_desc); @@ -906,7 +905,7 @@ static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma) desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_tx); skb = *(swdata); - am65_cpsw_nuss_xmit_free(tx_chn, tx_chn->common->dev, desc_tx); + am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); dev_kfree_skb_any(skb); } @@ -926,7 +925,7 @@ am65_cpsw_nuss_tx_compl_packet(struct am65_cpsw_tx_chn *tx_chn, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_tx); skb = *(swdata); - am65_cpsw_nuss_xmit_free(tx_chn, tx_chn->common->dev, desc_tx); + am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); ndev = skb->dev; @@ -1119,9 +1118,9 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb, netif_txq = netdev_get_tx_queue(ndev, q_idx); /* Map the linear buffer */ - buf_dma = dma_map_single(dev, skb->data, pkt_len, + buf_dma = dma_map_single(tx_chn->dma_dev, skb->data, pkt_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, buf_dma))) { + if (unlikely(dma_mapping_error(tx_chn->dma_dev, buf_dma))) { dev_err(dev, "Failed to map tx skb buffer\n"); ndev->stats.tx_errors++; goto err_free_skb; @@ -1130,7 +1129,8 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb, first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); if (!first_desc) { dev_dbg(dev, "Failed to allocate descriptor\n"); - dma_unmap_single(dev, buf_dma, pkt_len, DMA_TO_DEVICE); + dma_unmap_single(tx_chn->dma_dev, buf_dma, pkt_len, + DMA_TO_DEVICE); goto busy_stop_q; } @@ -1175,9 +1175,9 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb, goto busy_free_descs; } - buf_dma = skb_frag_dma_map(dev, frag, 0, frag_size, + buf_dma = skb_frag_dma_map(tx_chn->dma_dev, frag, 0, frag_size, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, buf_dma))) { + if (unlikely(dma_mapping_error(tx_chn->dma_dev, buf_dma))) { dev_err(dev, "Failed to map tx skb page\n"); k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); ndev->stats.tx_errors++; @@ -1237,14 +1237,14 @@ done_tx: return NETDEV_TX_OK; err_free_descs: - am65_cpsw_nuss_xmit_free(tx_chn, dev, first_desc); + am65_cpsw_nuss_xmit_free(tx_chn, first_desc); err_free_skb: ndev->stats.tx_dropped++; dev_kfree_skb_any(skb); return NETDEV_TX_OK; busy_free_descs: - am65_cpsw_nuss_xmit_free(tx_chn, dev, first_desc); + am65_cpsw_nuss_xmit_free(tx_chn, first_desc); busy_stop_q: netif_tx_stop_queue(netif_txq); return NETDEV_TX_BUSY; @@ -1545,16 +1545,6 @@ static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common) tx_chn->common = common; tx_chn->id = i; tx_chn->descs_num = max_desc_num; - tx_chn->desc_pool = - k3_cppi_desc_pool_create_name(dev, - tx_chn->descs_num, - hdesc_size, - tx_chn->tx_chn_name); - if (IS_ERR(tx_chn->desc_pool)) { - ret = PTR_ERR(tx_chn->desc_pool); - dev_err(dev, "Failed to create poll %d\n", ret); - goto err; - } tx_chn->tx_chn = k3_udma_glue_request_tx_chn(dev, @@ -1565,6 +1555,17 @@ static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common) "Failed to request tx dma channel\n"); goto err; } + tx_chn->dma_dev = k3_udma_glue_tx_get_dma_device(tx_chn->tx_chn); + + tx_chn->desc_pool = k3_cppi_desc_pool_create_name(tx_chn->dma_dev, + tx_chn->descs_num, + hdesc_size, + tx_chn->tx_chn_name); + if (IS_ERR(tx_chn->desc_pool)) { + ret = PTR_ERR(tx_chn->desc_pool); + dev_err(dev, "Failed to create poll %d\n", ret); + goto err; + } tx_chn->irq = k3_udma_glue_tx_get_irq(tx_chn->tx_chn); if (tx_chn->irq <= 0) { @@ -1622,14 +1623,6 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) /* init all flows */ rx_chn->dev = dev; rx_chn->descs_num = max_desc_num; - rx_chn->desc_pool = k3_cppi_desc_pool_create_name(dev, - rx_chn->descs_num, - hdesc_size, "rx"); - if (IS_ERR(rx_chn->desc_pool)) { - ret = PTR_ERR(rx_chn->desc_pool); - dev_err(dev, "Failed to create rx poll %d\n", ret); - goto err; - } rx_chn->rx_chn = k3_udma_glue_request_rx_chn(dev, "rx", &rx_cfg); if (IS_ERR(rx_chn->rx_chn)) { @@ -1637,6 +1630,16 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) "Failed to request rx dma channel\n"); goto err; } + rx_chn->dma_dev = k3_udma_glue_rx_get_dma_device(rx_chn->rx_chn); + + rx_chn->desc_pool = k3_cppi_desc_pool_create_name(rx_chn->dma_dev, + rx_chn->descs_num, + hdesc_size, "rx"); + if (IS_ERR(rx_chn->desc_pool)) { + ret = PTR_ERR(rx_chn->desc_pool); + dev_err(dev, "Failed to create rx poll %d\n", ret); + goto err; + } common->rx_flow_id_base = k3_udma_glue_rx_get_flow_id_base(rx_chn->rx_chn); @@ -2164,12 +2167,6 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) common->tx_ch_num = 1; common->pf_p0_rx_ptype_rrobin = false; - ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(48)); - if (ret) { - dev_err(dev, "error setting dma mask: %d\n", ret); - return ret; - } - common->ports = devm_kcalloc(dev, common->port_num, sizeof(*common->ports), GFP_KERNEL); diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index 02aed4c0ceba7..d7f8a0f76fdc3 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -56,6 +56,7 @@ struct am65_cpsw_host { }; struct am65_cpsw_tx_chn { + struct device *dma_dev; struct napi_struct napi_tx; struct am65_cpsw_common *common; struct k3_cppi_desc_pool *desc_pool; @@ -69,6 +70,7 @@ struct am65_cpsw_tx_chn { struct am65_cpsw_rx_chn { struct device *dev; + struct device *dma_dev; struct k3_cppi_desc_pool *desc_pool; struct k3_udma_glue_rx_channel *rx_chn; u32 descs_num; -- GitLab From 39fd0547ee66669479c05b86b65bbca9a16af5f0 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 15 Jan 2021 21:28:51 +0200 Subject: [PATCH 1090/2012] net: ethernet: ti: am65-cpsw-nuss: Support for transparent ASEL handling Use the glue layer's functions to convert the dma_addr_t to and from CPPI5 address (with the ASEL bits), which should be used within the descriptors and data buffers. - Per channel coherency support The DMAs use the 'ASEL' bits to select data and configuration fetch path. The ASEL bits are placed at the unused parts of any address field used by the DMAs (pointers to descriptors, addresses in descriptors, ring base addresses). The ASEL is not part of the address (the DMAs can address 48bits). Individual channels can be configured to be coherent (via ACP port) or non coherent individually by configuring the ASEL to appropriate value. [1] https://lore.kernel.org/patchwork/cover/1350756/ Signed-off-by: Peter Ujfalusi Co-developed-by: Vignesh Raghavendra Signed-off-by: Vignesh Raghavendra Signed-off-by: Grygorii Strashko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 8bf48cf3be9b2..d060744dd0b26 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -376,6 +376,7 @@ static int am65_cpsw_nuss_rx_push(struct am65_cpsw_common *common, cppi5_hdesc_init(desc_rx, CPPI5_INFO0_HDESC_EPIB_PRESENT, AM65_CPSW_NAV_PS_DATA_SIZE); + k3_udma_glue_rx_dma_to_cppi5_addr(rx_chn->rx_chn, &buf_dma); cppi5_hdesc_attach_buf(desc_rx, buf_dma, skb_tailroom(skb), buf_dma, skb_tailroom(skb)); swdata = cppi5_hdesc_get_swdata(desc_rx); *((void **)swdata) = skb; @@ -692,6 +693,7 @@ static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma) swdata = cppi5_hdesc_get_swdata(desc_rx); skb = *swdata; cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); + k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); @@ -780,6 +782,7 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, swdata = cppi5_hdesc_get_swdata(desc_rx); skb = *swdata; cppi5_hdesc_get_obuf(desc_rx, &buf_dma, &buf_dma_len); + k3_udma_glue_rx_cppi5_to_dma_addr(rx_chn->rx_chn, &buf_dma); pkt_len = cppi5_hdesc_get_pktlen(desc_rx); cppi5_desc_get_tags_ids(&desc_rx->hdr, &port_id, NULL); dev_dbg(dev, "%s rx port_id:%d\n", __func__, port_id); @@ -875,19 +878,23 @@ static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn, next_desc = first_desc; cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len); + k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); dma_unmap_single(tx_chn->dma_dev, buf_dma, buf_dma_len, DMA_TO_DEVICE); next_desc_dma = cppi5_hdesc_get_next_hbdesc(first_desc); + k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma); while (next_desc_dma) { next_desc = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, next_desc_dma); cppi5_hdesc_get_obuf(next_desc, &buf_dma, &buf_dma_len); + k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); dma_unmap_page(tx_chn->dma_dev, buf_dma, buf_dma_len, DMA_TO_DEVICE); next_desc_dma = cppi5_hdesc_get_next_hbdesc(next_desc); + k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &next_desc_dma); k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); } @@ -1140,6 +1147,7 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb, cppi5_hdesc_set_pkttype(first_desc, 0x7); cppi5_desc_set_tags_ids(&first_desc->hdr, 0, port->port_id); + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma); cppi5_hdesc_attach_buf(first_desc, buf_dma, pkt_len, buf_dma, pkt_len); swdata = cppi5_hdesc_get_swdata(first_desc); *(swdata) = skb; @@ -1185,11 +1193,13 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb, } cppi5_hdesc_reset_hbdesc(next_desc); + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma); cppi5_hdesc_attach_buf(next_desc, buf_dma, frag_size, buf_dma, frag_size); desc_dma = k3_cppi_desc_pool_virt2dma(tx_chn->desc_pool, next_desc); + k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &desc_dma); cppi5_hdesc_link_hbdesc(cur_desc, desc_dma); pkt_len += frag_size; -- GitLab From 1dd3841033b3ad5a507b714d6db0cd401b3ca996 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Fri, 15 Jan 2021 21:28:52 +0200 Subject: [PATCH 1091/2012] net: ti: cpsw_ale: add driver data for AM64 CPSW3g The AM642x CPSW3g is similar to j721e-cpswxg except its ALE table size is 512 entries. Add entry for the same. Signed-off-by: Vignesh Raghavendra Signed-off-by: Grygorii Strashko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/cpsw_ale.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index cdc308a2aa3ed..d828f856237ad 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -1256,6 +1256,13 @@ static const struct cpsw_ale_dev_id cpsw_ale_id_match[] = { .major_ver_mask = 0x7, .vlan_entry_tbl = vlan_entry_k3_cpswxg, }, + { + .dev_id = "am64-cpswxg", + .features = CPSW_ALE_F_STATUS_REG | CPSW_ALE_F_HW_AUTOAGING, + .major_ver_mask = 0x7, + .vlan_entry_tbl = vlan_entry_k3_cpswxg, + .tbl_entries = 512, + }, { }, }; -- GitLab From 4f7cce2724031a1c302cd6b2bed677c2acebba83 Mon Sep 17 00:00:00 2001 From: Vignesh Raghavendra Date: Fri, 15 Jan 2021 21:28:53 +0200 Subject: [PATCH 1092/2012] net: ethernet: ti: am65-cpsw: add support for am64x cpsw3g The TI AM64x SoCs Gigabit Ethernet Switch subsystem (CPSW3g NUSS) has three ports (2 ext. ports) and provides Ethernet packet communication for the device and can be configured in multi port mode or as an Ethernet switch. This patch adds support for the corresponding CPSW3g version. Signed-off-by: Vignesh Raghavendra Signed-off-by: Grygorii Strashko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index d060744dd0b26..1850743c04dab 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2115,9 +2115,16 @@ static const struct am65_cpsw_pdata j721e_pdata = { .fdqring_mode = K3_RINGACC_RING_MODE_MESSAGE, }; +static const struct am65_cpsw_pdata am64x_cpswxg_pdata = { + .quirks = 0, + .ale_dev_id = "am64-cpswxg", + .fdqring_mode = K3_RINGACC_RING_MODE_RING, +}; + static const struct of_device_id am65_cpsw_nuss_of_mtable[] = { { .compatible = "ti,am654-cpsw-nuss", .data = &am65x_sr1_0}, { .compatible = "ti,j721e-cpsw-nuss", .data = &j721e_pdata}, + { .compatible = "ti,am642-cpsw-nuss", .data = &am64x_cpswxg_pdata}, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, am65_cpsw_nuss_of_mtable); -- GitLab From 0deee7aa23a5be51f3b3572572f55d9c92f4cf4a Mon Sep 17 00:00:00 2001 From: Jiapeng Zhong Date: Mon, 18 Jan 2021 16:31:02 +0800 Subject: [PATCH 1093/2012] taprio: boolean values to a bool variable Fix the following coccicheck warnings: ./net/sched/sch_taprio.c:393:3-16: WARNING: Assignment of 0/1 to bool variable. ./net/sched/sch_taprio.c:375:2-15: WARNING: Assignment of 0/1 to bool variable. ./net/sched/sch_taprio.c:244:4-19: WARNING: Assignment of 0/1 to bool variable. Reported-by: Abaci Robot Signed-off-by: Jiapeng Zhong Link: https://lore.kernel.org/r/1610958662-71166-1-git-send-email-abaci-bugfix@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 6f775275826a4..8287894541e3c 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -241,7 +241,7 @@ static struct sched_entry *find_entry_to_transmit(struct sk_buff *skb, /* Here, we are just trying to find out the * first available interval in the next cycle. */ - entry_available = 1; + entry_available = true; entry_found = entry; *interval_start = ktime_add_ns(curr_intv_start, cycle); *interval_end = ktime_add_ns(curr_intv_end, cycle); @@ -372,7 +372,7 @@ static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch) packet_transmit_time = length_to_duration(q, len); do { - sched_changed = 0; + sched_changed = false; entry = find_entry_to_transmit(skb, sch, sched, admin, minimum_time, @@ -390,7 +390,7 @@ static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch) if (admin && admin != sched && ktime_after(txtime, admin->base_time)) { sched = admin; - sched_changed = 1; + sched_changed = true; continue; } -- GitLab From c2e315b8c399cf364b740368561d9d8f3f354402 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Mon, 18 Jan 2021 03:15:39 -0800 Subject: [PATCH 1094/2012] net: tun: fix misspellings using codespell tool Some typos are found out by codespell tool: $ codespell -w -i 3 ./drivers/net/tun.c aovid ==> avoid Fix typos found by codespell. Signed-off-by: Menglong Dong Link: https://lore.kernel.org/r/20210118111539.35886-1-dong.menglong@zte.com.cn Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 702215596889b..62690baa19bc8 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2735,7 +2735,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) err = register_netdevice(tun->dev); if (err < 0) goto err_detach; - /* free_netdev() won't check refcnt, to aovid race + /* free_netdev() won't check refcnt, to avoid race * with dev_put() we need publish tun after registration. */ rcu_assign_pointer(tfile->tun, tun); -- GitLab From eaaf6112286567baa03244bdb1bf6479d5b4c359 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 18 Jan 2021 11:19:02 +0000 Subject: [PATCH 1095/2012] selftests: forwarding: Fix spelling mistake "succeded" -> "succeeded" There are two spelling mistakes in check_fail messages. Fix them. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210118111902.71096-1-colin.king@canonical.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/tc_chains.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh index 2934fb5ed2a2c..b95de0463ebdc 100755 --- a/tools/testing/selftests/net/forwarding/tc_chains.sh +++ b/tools/testing/selftests/net/forwarding/tc_chains.sh @@ -136,7 +136,7 @@ template_filter_fits() tc filter add dev $h2 ingress protocol ip pref 1 handle 1102 \ flower src_mac $h2mac action drop &> /dev/null - check_fail $? "Incorrectly succeded to insert filter which does not template" + check_fail $? "Incorrectly succeeded to insert filter which does not template" tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \ flower src_mac $h2mac action drop @@ -144,7 +144,7 @@ template_filter_fits() tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \ flower dst_mac $h2mac action drop &> /dev/null - check_fail $? "Incorrectly succeded to insert filter which does not template" + check_fail $? "Incorrectly succeeded to insert filter which does not template" tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \ flower &> /dev/null -- GitLab From 1e30b8d755b81b0d1585cb22bc753e9f2124fe87 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 18 Jan 2021 16:08:57 +0100 Subject: [PATCH 1096/2012] net: smsc911x: Make Runtime PM handling more fine-grained Currently the smsc911x driver has mininal power management: during driver probe, the device is powered up, and during driver remove, it is powered down. Improve power management by making it more fine-grained: 1. Power the device down when driver probe is finished, 2. Power the device (down) when it is opened (closed), 3. Make sure the device is powered during PHY access. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210118150857.796943-1-geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/smsc/smsc911x.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 823d9a7184fe6..606c79de93a68 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -557,6 +557,7 @@ static int smsc911x_mii_read(struct mii_bus *bus, int phyaddr, int regidx) unsigned int addr; int i, reg; + pm_runtime_get_sync(bus->parent); spin_lock_irqsave(&pdata->mac_lock, flags); /* Confirm MII not busy */ @@ -582,6 +583,7 @@ static int smsc911x_mii_read(struct mii_bus *bus, int phyaddr, int regidx) out: spin_unlock_irqrestore(&pdata->mac_lock, flags); + pm_runtime_put(bus->parent); return reg; } @@ -594,6 +596,7 @@ static int smsc911x_mii_write(struct mii_bus *bus, int phyaddr, int regidx, unsigned int addr; int i, reg; + pm_runtime_get_sync(bus->parent); spin_lock_irqsave(&pdata->mac_lock, flags); /* Confirm MII not busy */ @@ -623,6 +626,7 @@ static int smsc911x_mii_write(struct mii_bus *bus, int phyaddr, int regidx, out: spin_unlock_irqrestore(&pdata->mac_lock, flags); + pm_runtime_put(bus->parent); return reg; } @@ -1589,6 +1593,8 @@ static int smsc911x_open(struct net_device *dev) int retval; int irq_flags; + pm_runtime_get_sync(dev->dev.parent); + /* find and start the given phy */ if (!dev->phydev) { retval = smsc911x_mii_probe(dev); @@ -1735,6 +1741,7 @@ mii_free_out: phy_disconnect(dev->phydev); dev->phydev = NULL; out: + pm_runtime_put(dev->dev.parent); return retval; } @@ -1766,6 +1773,7 @@ static int smsc911x_stop(struct net_device *dev) dev->phydev = NULL; } netif_carrier_off(dev); + pm_runtime_put(dev->dev.parent); SMSC_TRACE(pdata, ifdown, "Interface stopped"); return 0; @@ -2334,7 +2342,6 @@ static int smsc911x_drv_remove(struct platform_device *pdev) free_netdev(dev); - pm_runtime_put(&pdev->dev); pm_runtime_disable(&pdev->dev); return 0; @@ -2540,6 +2547,7 @@ static int smsc911x_drv_probe(struct platform_device *pdev) } spin_unlock_irq(&pdata->mac_lock); + pm_runtime_put(&pdev->dev); netdev_info(dev, "MAC Address: %pM\n", dev->dev_addr); -- GitLab From fc6f89dd8c55cc756ebeff3d496218a2fbad3ec6 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Tue, 19 Jan 2021 07:50:59 +0000 Subject: [PATCH 1097/2012] octeontx2-af: Remove unneeded semicolons fix semicolon.cocci warnings: ./drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c:272:2-3: Unneeded semicolon drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c:1788:3-4: Unneeded semicolon drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c:1809:3-4: Unneeded semicolon drivers/net/ethernet/marvell/octeontx2/af/rvu.c:1326:2-3: Unneeded semicolon Signed-off-by: Xu Wang Link: https://lore.kernel.org/r/20210119075059.17493-1-vulab@iscas.ac.cn Link: https://lore.kernel.org/r/20210119075507.17699-1-vulab@iscas.ac.cn Link: https://lore.kernel.org/r/20210119080037.17931-1-vulab@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 2 +- drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 4 ++-- drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index e8fd712860a16..0b6bf9f0c6f0f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1323,7 +1323,7 @@ static int rvu_get_attach_blkaddr(struct rvu *rvu, int blktype, break; default: return rvu_get_blkaddr(rvu, blktype, 0); - }; + } if (is_block_implemented(rvu->hw, blkaddr)) return blkaddr; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 0c6882e84d425..f60499562d2e2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -1786,7 +1786,7 @@ static void rvu_dbg_npc_mcam_show_action(struct seq_file *s, break; default: break; - }; + } } else { switch (rule->rx_action.op) { case NIX_RX_ACTIONOP_DROP: @@ -1807,7 +1807,7 @@ static void rvu_dbg_npc_mcam_show_action(struct seq_file *s, break; default: break; - }; + } } } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index 7572321cea79e..4ba9d54ce4e33 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -274,7 +274,7 @@ static void npc_scan_parse_result(struct npc_mcam *mcam, u8 bit_number, break; default: return; - }; + } npc_set_kw_masks(mcam, type, nr_bits, kwi, offset, intf); } -- GitLab From b160c28548bc0a87cbd16d5af6d3edcfd70b8c9a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Jan 2021 08:49:00 -0800 Subject: [PATCH 1098/2012] tcp: do not mess with cloned skbs in tcp_add_backlog() Heiner Kallweit reported that some skbs were sent with the following invalid GSO properties : - gso_size > 0 - gso_type == 0 This was triggerring a WARN_ON_ONCE() in rtl8169_tso_csum_v2. Juerg Haefliger was able to reproduce a similar issue using a lan78xx NIC and a workload mixing TCP incoming traffic and forwarded packets. The problem is that tcp_add_backlog() is writing over gso_segs and gso_size even if the incoming packet will not be coalesced to the backlog tail packet. While skb_try_coalesce() would bail out if tail packet is cloned, this overwriting would lead to corruptions of other packets cooked by lan78xx, sharing a common super-packet. The strategy used by lan78xx is to use a big skb, and split it into all received packets using skb_clone() to avoid copies. The drawback of this strategy is that all the small skb share a common struct skb_shared_info. This patch rewrites TCP gso_size/gso_segs handling to only happen on the tail skb, since skb_try_coalesce() made sure it was not cloned. Fixes: 4f693b55c3d2 ("tcp: implement coalescing on backlog queue") Signed-off-by: Eric Dumazet Bisected-by: Juerg Haefliger Tested-by: Juerg Haefliger Reported-by: Heiner Kallweit Link: https://bugzilla.kernel.org/show_bug.cgi?id=209423 Link: https://lore.kernel.org/r/20210119164900.766957-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ipv4.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 58207c7769d05..4e82745d336fc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1760,6 +1760,7 @@ int tcp_v4_early_demux(struct sk_buff *skb) bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) { u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf); + u32 tail_gso_size, tail_gso_segs; struct skb_shared_info *shinfo; const struct tcphdr *th; struct tcphdr *thtail; @@ -1767,6 +1768,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) unsigned int hdrlen; bool fragstolen; u32 gso_segs; + u32 gso_size; int delta; /* In case all data was pulled from skb frags (in __pskb_pull_tail()), @@ -1792,13 +1794,6 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) */ th = (const struct tcphdr *)skb->data; hdrlen = th->doff * 4; - shinfo = skb_shinfo(skb); - - if (!shinfo->gso_size) - shinfo->gso_size = skb->len - hdrlen; - - if (!shinfo->gso_segs) - shinfo->gso_segs = 1; tail = sk->sk_backlog.tail; if (!tail) @@ -1821,6 +1816,15 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) goto no_coalesce; __skb_pull(skb, hdrlen); + + shinfo = skb_shinfo(skb); + gso_size = shinfo->gso_size ?: skb->len; + gso_segs = shinfo->gso_segs ?: 1; + + shinfo = skb_shinfo(tail); + tail_gso_size = shinfo->gso_size ?: (tail->len - hdrlen); + tail_gso_segs = shinfo->gso_segs ?: 1; + if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) { TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq; @@ -1847,11 +1851,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) } /* Not as strict as GRO. We only need to carry mss max value */ - skb_shinfo(tail)->gso_size = max(shinfo->gso_size, - skb_shinfo(tail)->gso_size); - - gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs; - skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF); + shinfo->gso_size = max(gso_size, tail_gso_size); + shinfo->gso_segs = min_t(u32, gso_segs + tail_gso_segs, 0xFFFF); sk->sk_backlog.len += delta; __NET_INC_STATS(sock_net(sk), -- GitLab From 00b229f762b020eebf55a52b984aec76ae0ad966 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 19 Jan 2021 17:56:56 +0100 Subject: [PATCH 1099/2012] net: fix GSO for SG-enabled devices The commit dbd50f238dec ("net: move the hsize check to the else block in skb_segment") introduced a data corruption for devices supporting scatter-gather. The problem boils down to signed/unsigned comparison given unexpected results: if signed 'hsize' is negative, it will be considered greater than a positive 'len', which is unsigned. This commit addresses resorting to the old checks order, so that 'hsize' never has a negative value when compared with 'len'. v1 -> v2: - reorder hsize checks instead of explicit cast (Alex) Bisected-by: Matthieu Baerts Fixes: dbd50f238dec ("net: move the hsize check to the else block in skb_segment") Signed-off-by: Paolo Abeni Reviewed-by: Xin Long Link: https://lore.kernel.org/r/861947c2d2d087db82af93c21920ce8147d15490.1611074818.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e835193cabcc3..cf2c4dcf42579 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3938,10 +3938,10 @@ normal: skb_release_head_state(nskb); __skb_push(nskb, doffset); } else { + if (hsize < 0) + hsize = 0; if (hsize > len || !sg) hsize = len; - else if (hsize < 0) - hsize = 0; nskb = __alloc_skb(hsize + doffset + headroom, GFP_ATOMIC, skb_alloc_rx_flag(head_skb), -- GitLab From 7b8fc0103bb51d1d3e1fb5fd67958612e709f883 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Mon, 18 Jan 2021 20:09:27 -0500 Subject: [PATCH 1100/2012] bonding: add a vlan+srcmac tx hashing option This comes from an end-user request, where they're running multiple VMs on hosts with bonded interfaces connected to some interest switch topologies, where 802.3ad isn't an option. They're currently running a proprietary solution that effectively achieves load-balancing of VMs and bandwidth utilization improvements with a similar form of transmission algorithm. Basically, each VM has it's own vlan, so it always sends its traffic out the same interface, unless that interface fails. Traffic gets split between the interfaces, maintaining a consistent path, with failover still available if an interface goes down. Unlike bond_eth_hash(), this hash function is using the full source MAC address instead of just the last byte, as there are so few components to the hash, and in the no-vlan case, we would be returning just the last byte of the source MAC as the hash value. It's entirely possible to have two NICs in a bond with the same last byte of their MAC, but not the same MAC, so this adjustment should guarantee distinct hashes in all cases. This has been rudimetarily tested to provide similar results to the proprietary solution it is aiming to replace. A patch for iproute2 is also posted, to properly support the new mode there as well. Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Cc: Thomas Davis Signed-off-by: Jarod Wilson Link: https://lore.kernel.org/r/20210119010927.1191922-1-jarod@redhat.com Signed-off-by: Jakub Kicinski --- Documentation/networking/bonding.rst | 13 +++++++++++ drivers/net/bonding/bond_main.c | 34 ++++++++++++++++++++++++++-- drivers/net/bonding/bond_options.c | 13 ++++++----- include/linux/netdevice.h | 1 + include/uapi/linux/if_bonding.h | 1 + 5 files changed, 54 insertions(+), 8 deletions(-) diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index adc314639085b..5f690f0ad0e4f 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -951,6 +951,19 @@ xmit_hash_policy packets will be distributed according to the encapsulated flows. + vlan+srcmac + + This policy uses a very rudimentary vlan ID and source mac + hash to load-balance traffic per-vlan, with failover + should one leg fail. The intended use case is for a bond + shared by multiple virtual machines, all configured to + use their own vlan, to give lacp-like functionality + without requiring lacp-capable switching hardware. + + The formula for the hash is simply + + hash = (vlan ID) XOR (source MAC vendor) XOR (source MAC dev) + The default value is layer2. This option was added in bonding version 2.6.3. In earlier versions of bonding, this parameter does not exist, and the layer2 policy is the only policy. The diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 539c6bc218df4..74cbbb22470b5 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -167,7 +167,7 @@ module_param(xmit_hash_policy, charp, 0); MODULE_PARM_DESC(xmit_hash_policy, "balance-alb, balance-tlb, balance-xor, 802.3ad hashing method; " "0 for layer 2 (default), 1 for layer 3+4, " "2 for layer 2+3, 3 for encap layer 2+3, " - "4 for encap layer 3+4"); + "4 for encap layer 3+4, 5 for vlan+srcmac"); module_param(arp_interval, int, 0); MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds"); module_param_array(arp_ip_target, charp, NULL, 0); @@ -1457,6 +1457,8 @@ static enum netdev_lag_hash bond_lag_hash_type(struct bonding *bond, return NETDEV_LAG_HASH_E23; case BOND_XMIT_POLICY_ENCAP34: return NETDEV_LAG_HASH_E34; + case BOND_XMIT_POLICY_VLAN_SRCMAC: + return NETDEV_LAG_HASH_VLAN_SRCMAC; default: return NETDEV_LAG_HASH_UNKNOWN; } @@ -3519,6 +3521,27 @@ static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, return true; } +static u32 bond_vlan_srcmac_hash(struct sk_buff *skb) +{ + struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); + u32 srcmac_vendor = 0, srcmac_dev = 0; + u16 vlan; + int i; + + for (i = 0; i < 3; i++) + srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i]; + + for (i = 3; i < ETH_ALEN; i++) + srcmac_dev = (srcmac_dev << 8) | mac_hdr->h_source[i]; + + if (!skb_vlan_tag_present(skb)) + return srcmac_vendor ^ srcmac_dev; + + vlan = skb_vlan_tag_get(skb); + + return vlan ^ srcmac_vendor ^ srcmac_dev; +} + /* Extract the appropriate headers based on bond's xmit policy */ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, struct flow_keys *fk) @@ -3526,10 +3549,14 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34; int noff, proto = -1; - if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23) { + switch (bond->params.xmit_policy) { + case BOND_XMIT_POLICY_ENCAP23: + case BOND_XMIT_POLICY_ENCAP34: memset(fk, 0, sizeof(*fk)); return __skb_flow_dissect(NULL, skb, &flow_keys_bonding, fk, NULL, 0, 0, 0, 0); + default: + break; } fk->ports.ports = 0; @@ -3591,6 +3618,9 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) skb->l4_hash) return skb->hash; + if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC) + return bond_vlan_srcmac_hash(skb); + if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || !bond_flow_dissect(bond, skb, &flow)) return bond_eth_hash(skb); diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 8fcbf7f9c7b2a..77d7c38bd4354 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -96,12 +96,13 @@ static const struct bond_opt_value bond_pps_tbl[] = { }; static const struct bond_opt_value bond_xmit_hashtype_tbl[] = { - { "layer2", BOND_XMIT_POLICY_LAYER2, BOND_VALFLAG_DEFAULT}, - { "layer3+4", BOND_XMIT_POLICY_LAYER34, 0}, - { "layer2+3", BOND_XMIT_POLICY_LAYER23, 0}, - { "encap2+3", BOND_XMIT_POLICY_ENCAP23, 0}, - { "encap3+4", BOND_XMIT_POLICY_ENCAP34, 0}, - { NULL, -1, 0}, + { "layer2", BOND_XMIT_POLICY_LAYER2, BOND_VALFLAG_DEFAULT}, + { "layer3+4", BOND_XMIT_POLICY_LAYER34, 0}, + { "layer2+3", BOND_XMIT_POLICY_LAYER23, 0}, + { "encap2+3", BOND_XMIT_POLICY_ENCAP23, 0}, + { "encap3+4", BOND_XMIT_POLICY_ENCAP34, 0}, + { "vlan+srcmac", BOND_XMIT_POLICY_VLAN_SRCMAC, 0}, + { NULL, -1, 0}, }; static const struct bond_opt_value bond_arp_validate_tbl[] = { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 02dcef4d66e2e..ef517254367d9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2617,6 +2617,7 @@ enum netdev_lag_hash { NETDEV_LAG_HASH_L23, NETDEV_LAG_HASH_E23, NETDEV_LAG_HASH_E34, + NETDEV_LAG_HASH_VLAN_SRCMAC, NETDEV_LAG_HASH_UNKNOWN, }; diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h index 45f3750aa861b..e8eb4ad03cf18 100644 --- a/include/uapi/linux/if_bonding.h +++ b/include/uapi/linux/if_bonding.h @@ -94,6 +94,7 @@ #define BOND_XMIT_POLICY_LAYER23 2 /* layer 2+3 (IP ^ MAC) */ #define BOND_XMIT_POLICY_ENCAP23 3 /* encapsulated layer 2+3 */ #define BOND_XMIT_POLICY_ENCAP34 4 /* encapsulated layer 3+4 */ +#define BOND_XMIT_POLICY_VLAN_SRCMAC 5 /* vlan + source MAC */ /* 802.3ad port state definitions (43.4.2.2 in the 802.3ad standard) */ #define LACP_STATE_LACP_ACTIVITY 0x1 -- GitLab From 8e4052c32d6b4b39c1e13c652c7e33748d447409 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 19 Jan 2021 17:48:03 +0300 Subject: [PATCH 1101/2012] net: dsa: b53: fix an off by one in checking "vlan->vid" The > comparison should be >= to prevent accessing one element beyond the end of the dev->vlans[] array in the caller function, b53_vlan_add(). The "dev->vlans" array is allocated in the b53_switch_init() function and it has "dev->num_vlans" elements. Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") Signed-off-by: Dan Carpenter Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/YAbxI97Dl/pmBy5V@mwanda Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 288b5a5c3e0db..95c7fa171e35a 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1404,7 +1404,7 @@ int b53_vlan_prepare(struct dsa_switch *ds, int port, !(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED)) return -EINVAL; - if (vlan->vid_end > dev->num_vlans) + if (vlan->vid_end >= dev->num_vlans) return -ERANGE; b53_enable_vlan(dev, true, ds->vlan_filtering); -- GitLab From 9c30ae8398b0813e237bde387d67a7f74ab2db2d Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 19 Jan 2021 11:26:19 -0800 Subject: [PATCH 1102/2012] tcp: fix TCP socket rehash stats mis-accounting The previous commit 32efcc06d2a1 ("tcp: export count for rehash attempts") would mis-account rehashing SNMP and socket stats: a. During handshake of an active open, only counts the first SYN timeout b. After handshake of passive and active open, stop updating after (roughly) TCP_RETRIES1 recurring RTOs c. After the socket aborts, over count timeout_rehash by 1 This patch fixes this by checking the rehash result from sk_rethink_txhash. Fixes: 32efcc06d2a1 ("tcp: export count for rehash attempts") Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Link: https://lore.kernel.org/r/20210119192619.1848270-1-ycheng@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 17 ++++++++++++----- net/ipv4/tcp_input.c | 5 ++--- net/ipv4/tcp_timer.c | 22 ++++++++-------------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index bdc4323ce53c9..129d200bccb46 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1921,10 +1921,13 @@ static inline void sk_set_txhash(struct sock *sk) sk->sk_txhash = net_tx_rndhash(); } -static inline void sk_rethink_txhash(struct sock *sk) +static inline bool sk_rethink_txhash(struct sock *sk) { - if (sk->sk_txhash) + if (sk->sk_txhash) { sk_set_txhash(sk); + return true; + } + return false; } static inline struct dst_entry * @@ -1947,12 +1950,10 @@ sk_dst_get(struct sock *sk) return dst; } -static inline void dst_negative_advice(struct sock *sk) +static inline void __dst_negative_advice(struct sock *sk) { struct dst_entry *ndst, *dst = __sk_dst_get(sk); - sk_rethink_txhash(sk); - if (dst && dst->ops->negative_advice) { ndst = dst->ops->negative_advice(dst); @@ -1964,6 +1965,12 @@ static inline void dst_negative_advice(struct sock *sk) } } +static inline void dst_negative_advice(struct sock *sk) +{ + sk_rethink_txhash(sk); + __dst_negative_advice(sk); +} + static inline void __sk_dst_set(struct sock *sk, struct dst_entry *dst) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bafcab75f4256..a7dfca0a38cd7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4397,10 +4397,9 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) * The receiver remembers and reflects via DSACKs. Leverage the * DSACK state and change the txhash to re-route speculatively. */ - if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq) { - sk_rethink_txhash(sk); + if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq && + sk_rethink_txhash(sk)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH); - } } static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 454732ecc8f33..faa92948441ba 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -219,14 +219,8 @@ static int tcp_write_timeout(struct sock *sk) int retry_until; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { - if (icsk->icsk_retransmits) { - dst_negative_advice(sk); - } else { - sk_rethink_txhash(sk); - tp->timeout_rehash++; - __NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPTIMEOUTREHASH); - } + if (icsk->icsk_retransmits) + __dst_negative_advice(sk); retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; expired = icsk->icsk_retransmits >= retry_until; } else { @@ -234,12 +228,7 @@ static int tcp_write_timeout(struct sock *sk) /* Black hole detection */ tcp_mtu_probing(icsk, sk); - dst_negative_advice(sk); - } else { - sk_rethink_txhash(sk); - tp->timeout_rehash++; - __NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPTIMEOUTREHASH); + __dst_negative_advice(sk); } retry_until = net->ipv4.sysctl_tcp_retries2; @@ -270,6 +259,11 @@ static int tcp_write_timeout(struct sock *sk) return 1; } + if (sk_rethink_txhash(sk)) { + tp->timeout_rehash++; + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH); + } + return 0; } -- GitLab From 08685be7761d69914f08c3d6211c543a385a5b9c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 11 Jan 2021 16:24:08 +1000 Subject: [PATCH 1103/2012] powerpc/64s: fix scv entry fallback flush vs interrupt The L1D flush fallback functions are not recoverable vs interrupts, yet the scv entry flush runs with MSR[EE]=1. This can result in a timer (soft-NMI) or MCE or SRESET interrupt hitting here and overwriting the EXRFI save area, which ends up corrupting userspace registers for scv return. Fix this by disabling RI and EE for the scv entry fallback flush. Fixes: f79643787e0a0 ("powerpc/64s: flush L1D on kernel entry") Cc: stable@vger.kernel.org # 5.9+ which also have flush L1D patch backport Reported-by: Tulio Magno Quites Machado Filho Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210111062408.287092-1-npiggin@gmail.com --- arch/powerpc/include/asm/exception-64s.h | 13 ++++++++++++ arch/powerpc/include/asm/feature-fixups.h | 10 ++++++++++ arch/powerpc/kernel/entry_64.S | 2 +- arch/powerpc/kernel/exceptions-64s.S | 19 ++++++++++++++++++ arch/powerpc/kernel/vmlinux.lds.S | 7 +++++++ arch/powerpc/lib/feature-fixups.c | 24 ++++++++++++++++++++--- 6 files changed, 71 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 1d32b174ab6ae..c1a8aac01cf91 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -63,6 +63,12 @@ nop; \ nop; +#define SCV_ENTRY_FLUSH_SLOT \ + SCV_ENTRY_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; + /* * r10 must be free to use, r13 must be paca */ @@ -70,6 +76,13 @@ STF_ENTRY_BARRIER_SLOT; \ ENTRY_FLUSH_SLOT +/* + * r10, ctr must be free to use, r13 must be paca + */ +#define SCV_INTERRUPT_TO_KERNEL \ + STF_ENTRY_BARRIER_SLOT; \ + SCV_ENTRY_FLUSH_SLOT + /* * Macros for annotating the expected destination of (h)rfid * diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index f6d2acb574252..ac605fc369c42 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -240,6 +240,14 @@ label##3: \ FTR_ENTRY_OFFSET 957b-958b; \ .popsection; +#define SCV_ENTRY_FLUSH_FIXUP_SECTION \ +957: \ + .pushsection __scv_entry_flush_fixup,"a"; \ + .align 2; \ +958: \ + FTR_ENTRY_OFFSET 957b-958b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -273,10 +281,12 @@ label##3: \ extern long stf_barrier_fallback; extern long entry_flush_fallback; +extern long scv_entry_flush_fallback; extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup; extern long __start___entry_flush_fixup, __stop___entry_flush_fixup; +extern long __start___scv_entry_flush_fixup, __stop___scv_entry_flush_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; extern long __start__btb_flush_fixup, __stop__btb_flush_fixup; diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index aa1af139d9472..33ddfeef4fe9e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -75,7 +75,7 @@ BEGIN_FTR_SECTION bne .Ltabort_syscall END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif - INTERRUPT_TO_KERNEL + SCV_INTERRUPT_TO_KERNEL mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e02ad6fefa46c..6e53f76387374 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2993,6 +2993,25 @@ TRAMP_REAL_BEGIN(entry_flush_fallback) ld r11,PACA_EXRFI+EX_R11(r13) blr +/* + * The SCV entry flush happens with interrupts enabled, so it must disable + * to prevent EXRFI being clobbered by NMIs (e.g., soft_nmi_common). r10 + * (containing LR) does not need to be preserved here because scv entry + * puts 0 in the pt_regs, CTR can be clobbered for the same reason. + */ +TRAMP_REAL_BEGIN(scv_entry_flush_fallback) + li r10,0 + mtmsrd r10,1 + lbz r10,PACAIRQHAPPENED(r13) + ori r10,r10,PACA_IRQ_HARD_DIS + stb r10,PACAIRQHAPPENED(r13) + std r11,PACA_EXRFI+EX_R11(r13) + L1D_DISPLACEMENT_FLUSH + ld r11,PACA_EXRFI+EX_R11(r13) + li r10,MSR_RI + mtmsrd r10,1 + blr + TRAMP_REAL_BEGIN(rfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 4ab426b8b0e02..72fa3c00229a5 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -145,6 +145,13 @@ SECTIONS __stop___entry_flush_fixup = .; } + . = ALIGN(8); + __scv_entry_flush_fixup : AT(ADDR(__scv_entry_flush_fixup) - LOAD_OFFSET) { + __start___scv_entry_flush_fixup = .; + *(__scv_entry_flush_fixup) + __stop___scv_entry_flush_fixup = .; + } + . = ALIGN(8); __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { __start___stf_exit_barrier_fixup = .; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 47821055b94c9..1fd31b4b0e139 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -290,9 +290,6 @@ void do_entry_flush_fixups(enum l1d_flush_type types) long *start, *end; int i; - start = PTRRELOC(&__start___entry_flush_fixup); - end = PTRRELOC(&__stop___entry_flush_fixup); - instrs[0] = 0x60000000; /* nop */ instrs[1] = 0x60000000; /* nop */ instrs[2] = 0x60000000; /* nop */ @@ -312,6 +309,8 @@ void do_entry_flush_fixups(enum l1d_flush_type types) if (types & L1D_FLUSH_MTTRIG) instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + start = PTRRELOC(&__start___entry_flush_fixup); + end = PTRRELOC(&__stop___entry_flush_fixup); for (i = 0; start < end; start++, i++) { dest = (void *)start + *start; @@ -328,6 +327,25 @@ void do_entry_flush_fixups(enum l1d_flush_type types) patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); } + start = PTRRELOC(&__start___scv_entry_flush_fixup); + end = PTRRELOC(&__stop___scv_entry_flush_fixup); + for (; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + + if (types == L1D_FLUSH_FALLBACK) + patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&scv_entry_flush_fallback, + BRANCH_SET_LINK); + else + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + } + + printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, (types == L1D_FLUSH_NONE) ? "no" : (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : -- GitLab From 710eb8e32d04714452759f2b66884bfa7e97d495 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 7 Jan 2021 09:18:45 +0200 Subject: [PATCH 1104/2012] vdpa/mlx5: Fix memory key MTT population map_direct_mr() assumed that the number of scatter/gather entries returned by dma_map_sg_attrs() was equal to the number of segments in the sgl list. This led to wrong population of the mkey object. Fix this by properly referring to the returned value. The hardware expects each MTT entry to contain the DMA address of a contiguous block of memory of size (1 << mr->log_size) bytes. dma_map_sg_attrs() can coalesce several sg entries into a single scatter/gather entry of contiguous DMA range so we need to scan the list and refer to the size of each s/g entry. In addition, get rid of fill_sg() which effect is overwritten by populate_mtts(). Fixes: 94abbccdf291 ("vdpa/mlx5: Add shared memory registration code") Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20210107071845.GA224876@mtl-vdi-166.wap.labs.mlnx Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/core/mlx5_vdpa.h | 1 + drivers/vdpa/mlx5/core/mr.c | 28 ++++++++++++---------------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 5c92a576edae8..08f742fd24099 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -15,6 +15,7 @@ struct mlx5_vdpa_direct_mr { struct sg_table sg_head; int log_size; int nsg; + int nent; struct list_head list; u64 offset; }; diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index 4b6195666c589..d300f799efcd1 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -25,17 +25,6 @@ static int get_octo_len(u64 len, int page_shift) return (npages + 1) / 2; } -static void fill_sg(struct mlx5_vdpa_direct_mr *mr, void *in) -{ - struct scatterlist *sg; - __be64 *pas; - int i; - - pas = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); - for_each_sg(mr->sg_head.sgl, sg, mr->nsg, i) - (*pas) = cpu_to_be64(sg_dma_address(sg)); -} - static void mlx5_set_access_mode(void *mkc, int mode) { MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); @@ -45,10 +34,18 @@ static void mlx5_set_access_mode(void *mkc, int mode) static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) { struct scatterlist *sg; + int nsg = mr->nsg; + u64 dma_addr; + u64 dma_len; + int j = 0; int i; - for_each_sg(mr->sg_head.sgl, sg, mr->nsg, i) - mtt[i] = cpu_to_be64(sg_dma_address(sg)); + for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) { + for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg); + nsg && dma_len; + nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size)) + mtt[j++] = cpu_to_be64(dma_addr); + } } static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) @@ -64,7 +61,6 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct return -ENOMEM; MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); - fill_sg(mr, in); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO)); MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO)); @@ -276,8 +272,8 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr done: mr->log_size = log_entity_size; mr->nsg = nsg; - err = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); - if (!err) + mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); + if (!mr->nent) goto err_map; err = create_direct_mr(mvdev, mr); -- GitLab From a37eef63bc9e16e06361b539e528058146af80ab Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 19 Jan 2021 14:03:18 +0100 Subject: [PATCH 1105/2012] drm/syncobj: Fix use-after-free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While reviewing Christian's annotation patch I noticed that we have a user-after-free for the WAIT_FOR_SUBMIT case: We drop the syncobj reference before we've completed the waiting. Of course usually there's nothing bad happening here since userspace keeps the reference, but we can't rely on userspace to play nice here! Signed-off-by: Daniel Vetter Fixes: bc9c80fe01a2 ("drm/syncobj: use the timeline point in drm_syncobj_find_fence v4") Reviewed-by: Christian König Cc: Christian König Cc: Lionel Landwerlin Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: # v5.2+ Link: https://patchwork.freedesktop.org/patch/msgid/20210119130318.615145-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/drm_syncobj.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 6e74e6745ecae..3491460498491 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -388,19 +388,18 @@ int drm_syncobj_find_fence(struct drm_file *file_private, return -ENOENT; *fence = drm_syncobj_fence_get(syncobj); - drm_syncobj_put(syncobj); if (*fence) { ret = dma_fence_chain_find_seqno(fence, point); if (!ret) - return 0; + goto out; dma_fence_put(*fence); } else { ret = -EINVAL; } if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT)) - return ret; + goto out; memset(&wait, 0, sizeof(wait)); wait.task = current; @@ -432,6 +431,9 @@ int drm_syncobj_find_fence(struct drm_file *file_private, if (wait.node.next) drm_syncobj_remove_wait(syncobj, &wait); +out: + drm_syncobj_put(syncobj); + return ret; } EXPORT_SYMBOL(drm_syncobj_find_fence); -- GitLab From 03f16c5075b22c8902d2af739969e878b0879c94 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 20 Jan 2021 20:41:35 +0900 Subject: [PATCH 1106/2012] can: dev: can_restart: fix use after free bug After calling netif_rx_ni(skb), dereferencing skb is unsafe. Especially, the can_frame cf which aliases skb memory is accessed after the netif_rx_ni() in: stats->rx_bytes += cf->len; Reordering the lines solves the issue. Fixes: 39549eef3587 ("can: CAN Network device driver and Netlink interface") Link: https://lore.kernel.org/r/20210120114137.200019-2-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 3486704c8a957..8b1ae023cb218 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -592,11 +592,11 @@ static void can_restart(struct net_device *dev) cf->can_id |= CAN_ERR_RESTARTED; - netif_rx_ni(skb); - stats->rx_packets++; stats->rx_bytes += cf->len; + netif_rx_ni(skb); + restart: netdev_dbg(dev, "restarted\n"); priv->can_stats.restarts++; -- GitLab From 75854cad5d80976f6ea0f0431f8cedd3bcc475cb Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 20 Jan 2021 20:41:36 +0900 Subject: [PATCH 1107/2012] can: vxcan: vxcan_xmit: fix use after free bug After calling netif_rx_ni(skb), dereferencing skb is unsafe. Especially, the canfd_frame cfd which aliases skb memory is accessed after the netif_rx_ni(). Fixes: a8f820a380a2 ("can: add Virtual CAN Tunnel driver (vxcan)") Link: https://lore.kernel.org/r/20210120114137.200019-3-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- drivers/net/can/vxcan.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c index fa47bab510bb9..f9a524c5f6d62 100644 --- a/drivers/net/can/vxcan.c +++ b/drivers/net/can/vxcan.c @@ -39,6 +39,7 @@ static netdev_tx_t vxcan_xmit(struct sk_buff *skb, struct net_device *dev) struct net_device *peer; struct canfd_frame *cfd = (struct canfd_frame *)skb->data; struct net_device_stats *peerstats, *srcstats = &dev->stats; + u8 len; if (can_dropped_invalid_skb(dev, skb)) return NETDEV_TX_OK; @@ -61,12 +62,13 @@ static netdev_tx_t vxcan_xmit(struct sk_buff *skb, struct net_device *dev) skb->dev = peer; skb->ip_summed = CHECKSUM_UNNECESSARY; + len = cfd->len; if (netif_rx_ni(skb) == NET_RX_SUCCESS) { srcstats->tx_packets++; - srcstats->tx_bytes += cfd->len; + srcstats->tx_bytes += len; peerstats = &peer->stats; peerstats->rx_packets++; - peerstats->rx_bytes += cfd->len; + peerstats->rx_bytes += len; } out_unlock: -- GitLab From 50aca891d7a554db0901b245167cd653d73aaa71 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 20 Jan 2021 20:41:37 +0900 Subject: [PATCH 1108/2012] can: peak_usb: fix use after free bugs After calling peak_usb_netif_rx_ni(skb), dereferencing skb is unsafe. Especially, the can_frame cf which aliases skb memory is accessed after the peak_usb_netif_rx_ni(). Reordering the lines solves the issue. Fixes: 0a25e1f4f185 ("can: peak_usb: add support for PEAK new CANFD USB adapters") Link: https://lore.kernel.org/r/20210120114137.200019-4-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/peak_usb/pcan_usb_fd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 61631f4fd92a1..f347ecc79aef2 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -514,11 +514,11 @@ static int pcan_usb_fd_decode_canmsg(struct pcan_usb_fd_if *usb_if, else memcpy(cfd->data, rm->d, cfd->len); - peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(rm->ts_low)); - netdev->stats.rx_packets++; netdev->stats.rx_bytes += cfd->len; + peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(rm->ts_low)); + return 0; } @@ -580,11 +580,11 @@ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if, if (!skb) return -ENOMEM; - peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(sm->ts_low)); - netdev->stats.rx_packets++; netdev->stats.rx_bytes += cf->len; + peak_usb_netif_rx(skb, &usb_if->time_ref, le32_to_cpu(sm->ts_low)); + return 0; } -- GitLab From dc5d17a3c39b06aef866afca19245a9cfb533a79 Mon Sep 17 00:00:00 2001 From: Xiao Ni Date: Thu, 10 Dec 2020 14:33:32 +0800 Subject: [PATCH 1109/2012] md: Set prev_flush_start and flush_bio in an atomic way One customer reports a crash problem which causes by flush request. It triggers a warning before crash. /* new request after previous flush is completed */ if (ktime_after(req_start, mddev->prev_flush_start)) { WARN_ON(mddev->flush_bio); mddev->flush_bio = bio; bio = NULL; } The WARN_ON is triggered. We use spin lock to protect prev_flush_start and flush_bio in md_flush_request. But there is no lock protection in md_submit_flush_data. It can set flush_bio to NULL first because of compiler reordering write instructions. For example, flush bio1 sets flush bio to NULL first in md_submit_flush_data. An interrupt or vmware causing an extended stall happen between updating flush_bio and prev_flush_start. Because flush_bio is NULL, flush bio2 can get the lock and submit to underlayer disks. Then flush bio1 updates prev_flush_start after the interrupt or extended stall. Then flush bio3 enters in md_flush_request. The start time req_start is behind prev_flush_start. The flush_bio is not NULL(flush bio2 hasn't finished). So it can trigger the WARN_ON now. Then it calls INIT_WORK again. INIT_WORK() will re-initialize the list pointers in the work_struct, which then can result in a corrupted work list and the work_struct queued a second time. With the work list corrupted, it can lead in invalid work items being used and cause a crash in process_one_work. We need to make sure only one flush bio can be handled at one same time. So add spin lock in md_submit_flush_data to protect prev_flush_start and flush_bio in an atomic way. Reviewed-by: David Jeffery Signed-off-by: Xiao Ni Signed-off-by: Song Liu --- drivers/md/md.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index ca409428b4fcb..04384452a7abd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -639,8 +639,10 @@ static void md_submit_flush_data(struct work_struct *ws) * could wait for this and below md_handle_request could wait for those * bios because of suspend check */ + spin_lock_irq(&mddev->lock); mddev->prev_flush_start = mddev->start_flush; mddev->flush_bio = NULL; + spin_unlock_irq(&mddev->lock); wake_up(&mddev->sb_wait); if (bio->bi_iter.bi_size == 0) { -- GitLab From bc895e8b2a64e502fbba72748d59618272052a8b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 20 Jan 2021 00:24:24 +0100 Subject: [PATCH 1110/2012] bpf: Fix signed_{sub,add32}_overflows type handling Fix incorrect signed_{sub,add32}_overflows() input types (and a related buggy comment). It looks like this might have slipped in via copy/paste issue, also given prior to 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") the signature of signed_sub_overflows() had s64 a and s64 b as its input args whereas now they are truncated to s32. Thus restore proper types. Also, the case of signed_add32_overflows() is not consistent to signed_sub32_overflows(). Both have s32 as inputs, therefore align the former. Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") Reported-by: De4dCr0w Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 36af69fac5910..e7368c5eacb7b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5313,7 +5313,7 @@ static bool signed_add_overflows(s64 a, s64 b) return res < a; } -static bool signed_add32_overflows(s64 a, s64 b) +static bool signed_add32_overflows(s32 a, s32 b) { /* Do the add in u32, where overflow is well-defined */ s32 res = (s32)((u32)a + (u32)b); @@ -5323,7 +5323,7 @@ static bool signed_add32_overflows(s64 a, s64 b) return res < a; } -static bool signed_sub_overflows(s32 a, s32 b) +static bool signed_sub_overflows(s64 a, s64 b) { /* Do the sub in u64, where overflow is well-defined */ s64 res = (s64)((u64)a - (u64)b); @@ -5335,7 +5335,7 @@ static bool signed_sub_overflows(s32 a, s32 b) static bool signed_sub32_overflows(s32 a, s32 b) { - /* Do the sub in u64, where overflow is well-defined */ + /* Do the sub in u32, where overflow is well-defined */ s32 res = (s32)((u32)a - (u32)b); if (b < 0) -- GitLab From 09a4f6f5d21cb1f2633f4e8b893336b60eee9a01 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 19 Jan 2021 17:15:26 +0000 Subject: [PATCH 1111/2012] ASoC: dt-bindings: lpass: Fix and common up lpass dai ids Existing header file design of having separate SoC specific header files for the common lpass driver has mutiple issues. This design is prone to break as an when new SoC header is added as the common DAI ids of other SoCs will be overwritten by the new ones. One of them surfaced by recent patch that adds support to sc7180, this one totally broke LPASS drivers on other Qualcomm SoCs. Before this gets worst, fix this by having a common header qcom,lpass.h. This should fix the issue and any new DAI ids should be added to the common header. This will be more sustainable then the existing design! Fixes: 12fbfc4cabec6595 ("ASoC: Add sc7180-lpass binding header hdmi define") Reported-by: Jun Nie Reported-by: Stephan Gerhold Tested-by: Srinivasa Rao Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210119171527.32145-2-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- include/dt-bindings/sound/apq8016-lpass.h | 7 +++---- include/dt-bindings/sound/qcom,lpass.h | 15 +++++++++++++++ include/dt-bindings/sound/sc7180-lpass.h | 6 ++---- 3 files changed, 20 insertions(+), 8 deletions(-) create mode 100644 include/dt-bindings/sound/qcom,lpass.h diff --git a/include/dt-bindings/sound/apq8016-lpass.h b/include/dt-bindings/sound/apq8016-lpass.h index 3c3e16c0aadbf..dc605c4bc2249 100644 --- a/include/dt-bindings/sound/apq8016-lpass.h +++ b/include/dt-bindings/sound/apq8016-lpass.h @@ -2,9 +2,8 @@ #ifndef __DT_APQ8016_LPASS_H #define __DT_APQ8016_LPASS_H -#define MI2S_PRIMARY 0 -#define MI2S_SECONDARY 1 -#define MI2S_TERTIARY 2 -#define MI2S_QUATERNARY 3 +#include + +/* NOTE: Use qcom,lpass.h to define any AIF ID's for LPASS */ #endif /* __DT_APQ8016_LPASS_H */ diff --git a/include/dt-bindings/sound/qcom,lpass.h b/include/dt-bindings/sound/qcom,lpass.h new file mode 100644 index 0000000000000..7b0b80b38699e --- /dev/null +++ b/include/dt-bindings/sound/qcom,lpass.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __DT_QCOM_LPASS_H +#define __DT_QCOM_LPASS_H + +#define MI2S_PRIMARY 0 +#define MI2S_SECONDARY 1 +#define MI2S_TERTIARY 2 +#define MI2S_QUATERNARY 3 +#define MI2S_QUINARY 4 + +#define LPASS_DP_RX 5 + +#define LPASS_MCLK0 0 + +#endif /* __DT_QCOM_LPASS_H */ diff --git a/include/dt-bindings/sound/sc7180-lpass.h b/include/dt-bindings/sound/sc7180-lpass.h index 56ecaafd2dc68..5c1ee8b36b197 100644 --- a/include/dt-bindings/sound/sc7180-lpass.h +++ b/include/dt-bindings/sound/sc7180-lpass.h @@ -2,10 +2,8 @@ #ifndef __DT_SC7180_LPASS_H #define __DT_SC7180_LPASS_H -#define MI2S_PRIMARY 0 -#define MI2S_SECONDARY 1 -#define LPASS_DP_RX 2 +#include -#define LPASS_MCLK0 0 +/* NOTE: Use qcom,lpass.h to define any AIF ID's for LPASS */ #endif /* __DT_APQ8016_LPASS_H */ -- GitLab From cd3484f7f1386071b1af159023917ed12c182d39 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 19 Jan 2021 17:15:27 +0000 Subject: [PATCH 1112/2012] ASoC: qcom: Fix broken support to MI2S TERTIARY and QUATERNARY lpass hdmi support patch totally removed support for MI2S TERTIARY and QUATERNARY. One of the major issue was spotted with the design of having separate SoC specific header files for the common lpass driver. This design is prone to break as an when new SoC header is added as the common DAI ids of other SoCs will be overwritten by the new ones. Having a common header qcom,lpass.h should fix the issue and any new DAI ids should be added to the common header. With this change lpass also needs a new of_xlate function to resolve dai name. Fixes: 7cb37b7bd0d3 ("ASoC: qcom: Add support for lpass hdmi driver") Reported-by: Jun Nie Reported-by: Stephan Gerhold Tested-by: Srinivasa Rao Signed-off-by: Srinivas Kandagatla Tested-by: Stephan Gerhold Link: https://lore.kernel.org/r/20210119171527.32145-3-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/lpass-cpu.c | 22 ++++++++++++++++++++++ sound/soc/qcom/lpass-platform.c | 12 ++++++++++++ sound/soc/qcom/lpass-sc7180.c | 9 +++------ sound/soc/qcom/lpass.h | 2 +- 4 files changed, 38 insertions(+), 7 deletions(-) diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index c5e99c2d89c7e..66b834312f330 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -344,8 +344,30 @@ int asoc_qcom_lpass_cpu_dai_probe(struct snd_soc_dai *dai) } EXPORT_SYMBOL_GPL(asoc_qcom_lpass_cpu_dai_probe); +static int asoc_qcom_of_xlate_dai_name(struct snd_soc_component *component, + struct of_phandle_args *args, + const char **dai_name) +{ + struct lpass_data *drvdata = snd_soc_component_get_drvdata(component); + struct lpass_variant *variant = drvdata->variant; + int id = args->args[0]; + int ret = -EINVAL; + int i; + + for (i = 0; i < variant->num_dai; i++) { + if (variant->dai_driver[i].id == id) { + *dai_name = variant->dai_driver[i].name; + ret = 0; + break; + } + } + + return ret; +} + static const struct snd_soc_component_driver lpass_cpu_comp_driver = { .name = "lpass-cpu", + .of_xlate_dai_name = asoc_qcom_of_xlate_dai_name, }; static bool lpass_cpu_regmap_writeable(struct device *dev, unsigned int reg) diff --git a/sound/soc/qcom/lpass-platform.c b/sound/soc/qcom/lpass-platform.c index d1c248590f3ab..0074b7f2dbc10 100644 --- a/sound/soc/qcom/lpass-platform.c +++ b/sound/soc/qcom/lpass-platform.c @@ -257,6 +257,9 @@ static int lpass_platform_pcmops_hw_params(struct snd_soc_component *component, break; case MI2S_PRIMARY: case MI2S_SECONDARY: + case MI2S_TERTIARY: + case MI2S_QUATERNARY: + case MI2S_QUINARY: ret = regmap_fields_write(dmactl->intf, id, LPAIF_DMACTL_AUDINTF(dma_port)); if (ret) { @@ -507,6 +510,9 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, break; case MI2S_PRIMARY: case MI2S_SECONDARY: + case MI2S_TERTIARY: + case MI2S_QUATERNARY: + case MI2S_QUINARY: reg_irqclr = LPAIF_IRQCLEAR_REG(v, LPAIF_IRQ_PORT_HOST); val_irqclr = LPAIF_IRQ_ALL(ch); @@ -559,6 +565,9 @@ static int lpass_platform_pcmops_trigger(struct snd_soc_component *component, break; case MI2S_PRIMARY: case MI2S_SECONDARY: + case MI2S_TERTIARY: + case MI2S_QUATERNARY: + case MI2S_QUINARY: reg_irqen = LPAIF_IRQEN_REG(v, LPAIF_IRQ_PORT_HOST); val_mask = LPAIF_IRQ_ALL(ch); val_irqen = 0; @@ -655,6 +664,9 @@ static irqreturn_t lpass_dma_interrupt_handler( break; case MI2S_PRIMARY: case MI2S_SECONDARY: + case MI2S_TERTIARY: + case MI2S_QUATERNARY: + case MI2S_QUINARY: map = drvdata->lpaif_map; reg = LPAIF_IRQCLEAR_REG(v, LPAIF_IRQ_PORT_HOST); val = 0; diff --git a/sound/soc/qcom/lpass-sc7180.c b/sound/soc/qcom/lpass-sc7180.c index 85db650c2169d..8c168d3c589e9 100644 --- a/sound/soc/qcom/lpass-sc7180.c +++ b/sound/soc/qcom/lpass-sc7180.c @@ -20,7 +20,7 @@ #include "lpass.h" static struct snd_soc_dai_driver sc7180_lpass_cpu_dai_driver[] = { - [MI2S_PRIMARY] = { + { .id = MI2S_PRIMARY, .name = "Primary MI2S", .playback = { @@ -44,9 +44,7 @@ static struct snd_soc_dai_driver sc7180_lpass_cpu_dai_driver[] = { }, .probe = &asoc_qcom_lpass_cpu_dai_probe, .ops = &asoc_qcom_lpass_cpu_dai_ops, - }, - - [MI2S_SECONDARY] = { + }, { .id = MI2S_SECONDARY, .name = "Secondary MI2S", .playback = { @@ -60,8 +58,7 @@ static struct snd_soc_dai_driver sc7180_lpass_cpu_dai_driver[] = { }, .probe = &asoc_qcom_lpass_cpu_dai_probe, .ops = &asoc_qcom_lpass_cpu_dai_ops, - }, - [LPASS_DP_RX] = { + }, { .id = LPASS_DP_RX, .name = "Hdmi", .playback = { diff --git a/sound/soc/qcom/lpass.h b/sound/soc/qcom/lpass.h index 0195372905ed3..2d68af0da34d8 100644 --- a/sound/soc/qcom/lpass.h +++ b/sound/soc/qcom/lpass.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include "lpass-hdmi.h" #define LPASS_AHBIX_CLOCK_FREQUENCY 131072000 -- GitLab From 40caffd66ca9ad1baa2d5541232675160bc6c772 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 20 Jan 2021 15:42:11 +0100 Subject: [PATCH 1113/2012] ASoC: AMD Renoir - refine DMI entries for some Lenovo products Apparently, the DMI board name LNVNB161216 is also used also for products with the digital microphones connected to the AMD's audio bridge. Refine the DMI table - use product name identifiers extracted from https://bugzilla.redhat.com/show_bug.cgi?id=1892115 . The report for Lenovo Yoga Slim 7 14ARE05 (82A2) is in buglink. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=211299 Cc: Signed-off-by: Jaroslav Kysela Cc: Mark Brown Link: https://lore.kernel.org/r/20210120144211.817937-1-perex@perex.cz Signed-off-by: Mark Brown --- sound/soc/amd/renoir/rn-pci-acp3x.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/sound/soc/amd/renoir/rn-pci-acp3x.c b/sound/soc/amd/renoir/rn-pci-acp3x.c index deca8c7a0e878..050a61fe9693f 100644 --- a/sound/soc/amd/renoir/rn-pci-acp3x.c +++ b/sound/soc/amd/renoir/rn-pci-acp3x.c @@ -165,10 +165,24 @@ static int rn_acp_deinit(void __iomem *acp_base) static const struct dmi_system_id rn_acp_quirk_table[] = { { - /* Lenovo IdeaPad Flex 5 14ARE05, IdeaPad 5 15ARE05 */ + /* Lenovo IdeaPad S340-14API */ .matches = { DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "LNVNB161216"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "81NB"), + } + }, + { + /* Lenovo IdeaPad Flex 5 14ARE05 */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "81X2"), + } + }, + { + /* Lenovo IdeaPad 5 15ARE05 */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "81YQ"), } }, { -- GitLab From 1e066a23e76f90c9c39c189fe0dbf7c6e3dd5044 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 19 Jan 2021 17:47:00 +0000 Subject: [PATCH 1114/2012] ASoC: qcom: lpass-ipq806x: fix bitwidth regmap field BIT_WIDTH field in I2S_CTL register is two bits wide, however recent regmap field conversion patch trimmed it down to one bit. Fix this by correcting the bit range! Fixes: b5022a36d28f ("ASoC: qcom: lpass: Use regmap_field for i2sctl and dmactl registers") Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210119174700.32639-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/qcom/lpass-ipq806x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/qcom/lpass-ipq806x.c b/sound/soc/qcom/lpass-ipq806x.c index 92f98b4df47ff..ef8a7984f2323 100644 --- a/sound/soc/qcom/lpass-ipq806x.c +++ b/sound/soc/qcom/lpass-ipq806x.c @@ -131,7 +131,7 @@ static struct lpass_variant ipq806x_data = { .micmode = REG_FIELD_ID(0x0010, 4, 7, 5, 0x4), .micmono = REG_FIELD_ID(0x0010, 3, 3, 5, 0x4), .wssrc = REG_FIELD_ID(0x0010, 2, 2, 5, 0x4), - .bitwidth = REG_FIELD_ID(0x0010, 0, 0, 5, 0x4), + .bitwidth = REG_FIELD_ID(0x0010, 0, 1, 5, 0x4), .rdma_dyncclk = REG_FIELD_ID(0x6000, 12, 12, 4, 0x1000), .rdma_bursten = REG_FIELD_ID(0x6000, 11, 11, 4, 0x1000), -- GitLab From 543466ef3571069b8eb13a8ff7c7cfc8d8a75c43 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 20 Jan 2021 12:59:13 +0300 Subject: [PATCH 1115/2012] ASoC: topology: Fix memory corruption in soc_tplg_denum_create_values() The allocation uses sizeof(u32) when it should use sizeof(unsigned long) so it leads to memory corruption later in the function when the data is initialized. Fixes: 5aebe7c7f9c2 ("ASoC: topology: fix endianness issues") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YAf+8QZoOv+ct526@mwanda Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 950c45008e245..37a5d73e643b8 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -902,7 +902,7 @@ static int soc_tplg_denum_create_values(struct soc_tplg *tplg, struct soc_enum * return -EINVAL; se->dobj.control.dvalues = devm_kcalloc(tplg->dev, le32_to_cpu(ec->items), - sizeof(u32), + sizeof(*se->dobj.control.dvalues), GFP_KERNEL); if (!se->dobj.control.dvalues) return -ENOMEM; -- GitLab From 55a8b42e8645a6dab88674a30cb6ed328e660680 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 20 Jan 2021 00:26:35 -0800 Subject: [PATCH 1116/2012] spi: altera: Fix memory leak on error path Release master that have been previously allocated if the number of chipselect is invalid. Fixes: 8e04187c1bc7 ("spi: altera: add SPI core parameters support via platform data.") Signed-off-by: Pan Bian Reviewed-by: Tom Rix Link: https://lore.kernel.org/r/20210120082635.49304-1-bianpan2016@163.com Signed-off-by: Mark Brown --- drivers/spi/spi-altera.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-altera.c b/drivers/spi/spi-altera.c index cbc4c28c1541c..62ea0c9e321b4 100644 --- a/drivers/spi/spi-altera.c +++ b/drivers/spi/spi-altera.c @@ -254,7 +254,8 @@ static int altera_spi_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Invalid number of chipselect: %hu\n", pdata->num_chipselect); - return -EINVAL; + err = -EINVAL; + goto exit; } master->num_chipselect = pdata->num_chipselect; -- GitLab From c89dffc70b340780e5b933832d8c3e045ef3791e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 18 Jan 2021 14:59:20 +0900 Subject: [PATCH 1117/2012] tcp: Fix potential use-after-free due to double kfree() Receiving ACK with a valid SYN cookie, cookie_v4_check() allocates struct request_sock and then can allocate inet_rsk(req)->ireq_opt. After that, tcp_v4_syn_recv_sock() allocates struct sock and copies ireq_opt to inet_sk(sk)->inet_opt. Normally, tcp_v4_syn_recv_sock() inserts the full socket into ehash and sets NULL to ireq_opt. Otherwise, tcp_v4_syn_recv_sock() has to reset inet_opt by NULL and free the full socket. The commit 01770a1661657 ("tcp: fix race condition when creating child sockets from syncookies") added a new path, in which more than one cores create full sockets for the same SYN cookie. Currently, the core which loses the race frees the full socket without resetting inet_opt, resulting in that both sock_put() and reqsk_put() call kfree() for the same memory: sock_put sk_free __sk_free sk_destruct __sk_destruct sk->sk_destruct/inet_sock_destruct kfree(rcu_dereference_protected(inet->inet_opt, 1)); reqsk_put reqsk_free __reqsk_free req->rsk_ops->destructor/tcp_v4_reqsk_destructor kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); Calling kmalloc() between the double kfree() can lead to use-after-free, so this patch fixes it by setting NULL to inet_opt before sock_put(). As a side note, this kind of issue does not happen for IPv6. This is because tcp_v6_syn_recv_sock() clones both ipv6_opt and pktopts which correspond to ireq_opt in IPv4. Fixes: 01770a166165 ("tcp: fix race condition when creating child sockets from syncookies") CC: Ricardo Dias Signed-off-by: Kuniyuki Iwashima Reviewed-by: Benjamin Herrenschmidt Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20210118055920.82516-1-kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_ipv4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4e82745d336fc..777306b5bc224 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1595,6 +1595,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, tcp_move_syn(newtp, req); ireq->ireq_opt = NULL; } else { + newinet->inet_opt = NULL; + if (!req_unhash && found_dup_sk) { /* This code path should only be executed in the * syncookie case only @@ -1602,8 +1604,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, bh_unlock_sock(newsk); sock_put(newsk); newsk = NULL; - } else { - newinet->inet_opt = NULL; } } return newsk; -- GitLab From 584b7cfcdc7d6d416a9d6fece9516764bd977d2e Mon Sep 17 00:00:00 2001 From: Alban Bedel Date: Tue, 19 Jan 2021 15:06:38 +0100 Subject: [PATCH 1118/2012] net: mscc: ocelot: Fix multicast to the CPU port Multicast entries in the MAC table use the high bits of the MAC address to encode the ports that should get the packets. But this port mask does not work for the CPU port, to receive these packets on the CPU port the MAC_CPU_COPY flag must be set. Because of this IPv6 was effectively not working because neighbor solicitations were never received. This was not apparent before commit 9403c158 (net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries) as the IPv6 entries were broken so all incoming IPv6 multicast was then treated as unknown and flooded on all ports. To fix this problem rework the ocelot_mact_learn() to set the MAC_CPU_COPY flag when a multicast entry that target the CPU port is added. For this we have to read back the ports endcoded in the pseudo MAC address by the caller. It is not a very nice design but that avoid changing the callers and should make backporting easier. Signed-off-by: Alban Bedel Fixes: 9403c158b872 ("net: mscc: ocelot: support IPv4, IPv6 and plain Ethernet mdb entries") Link: https://lore.kernel.org/r/20210119140638.203374-1-alban.bedel@aerq.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 0b9992bd66262..ff87a0bc089cf 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -60,14 +60,27 @@ int ocelot_mact_learn(struct ocelot *ocelot, int port, const unsigned char mac[ETH_ALEN], unsigned int vid, enum macaccess_entry_type type) { + u32 cmd = ANA_TABLES_MACACCESS_VALID | + ANA_TABLES_MACACCESS_DEST_IDX(port) | + ANA_TABLES_MACACCESS_ENTRYTYPE(type) | + ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_LEARN); + unsigned int mc_ports; + + /* Set MAC_CPU_COPY if the CPU port is used by a multicast entry */ + if (type == ENTRYTYPE_MACv4) + mc_ports = (mac[1] << 8) | mac[2]; + else if (type == ENTRYTYPE_MACv6) + mc_ports = (mac[0] << 8) | mac[1]; + else + mc_ports = 0; + + if (mc_ports & BIT(ocelot->num_phys_ports)) + cmd |= ANA_TABLES_MACACCESS_MAC_CPU_COPY; + ocelot_mact_select(ocelot, mac, vid); /* Issue a write command */ - ocelot_write(ocelot, ANA_TABLES_MACACCESS_VALID | - ANA_TABLES_MACACCESS_DEST_IDX(port) | - ANA_TABLES_MACACCESS_ENTRYTYPE(type) | - ANA_TABLES_MACACCESS_MAC_TABLE_CMD(MACACCESS_CMD_LEARN), - ANA_TABLES_MACACCESS); + ocelot_write(ocelot, cmd, ANA_TABLES_MACACCESS); return ocelot_mact_wait_for_completion(ocelot); } -- GitLab From de658a195ee23ca6aaffe197d1d2ea040beea0a2 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Tue, 19 Jan 2021 17:12:08 -0800 Subject: [PATCH 1119/2012] net: usb: cdc_ncm: don't spew notifications RTL8156 sends notifications about every 32ms. Only display/log notifications when something changes. This issue has been reported by others: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1832472 https://lkml.org/lkml/2020/8/27/1083 ... [785962.779840] usb 1-1: new high-speed USB device number 5 using xhci_hcd [785962.929944] usb 1-1: New USB device found, idVendor=0bda, idProduct=8156, bcdDevice=30.00 [785962.929949] usb 1-1: New USB device strings: Mfr=1, Product=2, SerialNumber=6 [785962.929952] usb 1-1: Product: USB 10/100/1G/2.5G LAN [785962.929954] usb 1-1: Manufacturer: Realtek [785962.929956] usb 1-1: SerialNumber: 000000001 [785962.991755] usbcore: registered new interface driver cdc_ether [785963.017068] cdc_ncm 1-1:2.0: MAC-Address: 00:24:27:88:08:15 [785963.017072] cdc_ncm 1-1:2.0: setting rx_max = 16384 [785963.017169] cdc_ncm 1-1:2.0: setting tx_max = 16384 [785963.017682] cdc_ncm 1-1:2.0 usb0: register 'cdc_ncm' at usb-0000:00:14.0-1, CDC NCM, 00:24:27:88:08:15 [785963.019211] usbcore: registered new interface driver cdc_ncm [785963.023856] usbcore: registered new interface driver cdc_wdm [785963.025461] usbcore: registered new interface driver cdc_mbim [785963.038824] cdc_ncm 1-1:2.0 enx002427880815: renamed from usb0 [785963.089586] cdc_ncm 1-1:2.0 enx002427880815: network connection: disconnected [785963.121673] cdc_ncm 1-1:2.0 enx002427880815: network connection: disconnected [785963.153682] cdc_ncm 1-1:2.0 enx002427880815: network connection: disconnected ... This is about 2KB per second and will overwrite all contents of a 1MB dmesg buffer in under 10 minutes rendering them useless for debugging many kernel problems. This is also an extra 180 MB/day in /var/logs (or 1GB per week) rendering the majority of those logs useless too. When the link is up (expected state), spew amount is >2x higher: ... [786139.600992] cdc_ncm 2-1:2.0 enx002427880815: network connection: connected [786139.632997] cdc_ncm 2-1:2.0 enx002427880815: 2500 mbit/s downlink 2500 mbit/s uplink [786139.665097] cdc_ncm 2-1:2.0 enx002427880815: network connection: connected [786139.697100] cdc_ncm 2-1:2.0 enx002427880815: 2500 mbit/s downlink 2500 mbit/s uplink [786139.729094] cdc_ncm 2-1:2.0 enx002427880815: network connection: connected [786139.761108] cdc_ncm 2-1:2.0 enx002427880815: 2500 mbit/s downlink 2500 mbit/s uplink ... Chrome OS cannot support RTL8156 until this is fixed. Signed-off-by: Grant Grundler Reviewed-by: Hayes Wang Link: https://lore.kernel.org/r/20210120011208.3768105-1-grundler@chromium.org Signed-off-by: Jakub Kicinski --- drivers/net/usb/cdc_ncm.c | 12 +++++++++++- include/linux/usb/usbnet.h | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 5a78848db93fd..291e76d32abe7 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1827,6 +1827,15 @@ cdc_ncm_speed_change(struct usbnet *dev, uint32_t rx_speed = le32_to_cpu(data->DLBitRRate); uint32_t tx_speed = le32_to_cpu(data->ULBitRate); + /* if the speed hasn't changed, don't report it. + * RTL8156 shipped before 2021 sends notification about every 32ms. + */ + if (dev->rx_speed == rx_speed && dev->tx_speed == tx_speed) + return; + + dev->rx_speed = rx_speed; + dev->tx_speed = tx_speed; + /* * Currently the USB-NET API does not support reporting the actual * device speed. Do print it instead. @@ -1867,7 +1876,8 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE. */ - usbnet_link_change(dev, !!event->wValue, 0); + if (netif_carrier_ok(dev->net) != !!event->wValue) + usbnet_link_change(dev, !!event->wValue, 0); break; case USB_CDC_NOTIFY_SPEED_CHANGE: diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 88a7673894d5e..cfbfd6fe01dfa 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -81,6 +81,8 @@ struct usbnet { # define EVENT_LINK_CHANGE 11 # define EVENT_SET_RX_MODE 12 # define EVENT_NO_IP_ALIGN 13 + u32 rx_speed; /* in bps - NOT Mbps */ + u32 tx_speed; /* in bps - NOT Mbps */ }; static inline struct usb_driver *driver_of(struct usb_interface *intf) -- GitLab From 0c630a66bf10991b0ef13d27c93d7545e692ef5b Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Tue, 19 Jan 2021 20:44:23 -0800 Subject: [PATCH 1120/2012] net: systemport: free dev before on error path On the error path, it should goto the error handling label to free allocated memory rather than directly return. Fixes: 31bc72d97656 ("net: systemport: fetch and use clock resources") Signed-off-by: Pan Bian Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20210120044423.1704-1-bianpan2016@163.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bcmsysport.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index b1ae9eb8f2479..0404aafd5ce56 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2503,8 +2503,10 @@ static int bcm_sysport_probe(struct platform_device *pdev) priv = netdev_priv(dev); priv->clk = devm_clk_get_optional(&pdev->dev, "sw_sysport"); - if (IS_ERR(priv->clk)) - return PTR_ERR(priv->clk); + if (IS_ERR(priv->clk)) { + ret = PTR_ERR(priv->clk); + goto err_free_netdev; + } /* Allocate number of TX rings */ priv->tx_rings = devm_kcalloc(&pdev->dev, txq, -- GitLab From f1b0a8ea9f12b8ade0dbe40dd57e4ffa9a30ed93 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Wed, 20 Jan 2021 17:19:13 +0100 Subject: [PATCH 1121/2012] Revert "RDMA/rxe: Remove VLAN code leftovers from RXE" This reverts commit b2d2440430c0fdd5e0cad3efd6d1c9e3d3d02e5b. It's true that creating rxe on top of 802.1q interfaces doesn't work. Thus, commit fd49ddaf7e26 ("RDMA/rxe: prevent rxe creation on top of vlan interface") was absolutely correct. But b2d2440430c0 was incorrect assuming that with this change, RDMA and VLAN don't work togehter at all. It just has to be set up differently. Rather than creating rxe on top of the VLAN interface, rxe must be created on top of the physical interface. RDMA then works just fine through VLAN interfaces on top of that physical interface, via the "upper device" logic. This is hard to see in the rxe logic because it never talks about vlan, but instead rxe carefully selects upper vlan netdevices when working with packets which in turn imply certain vlan tagging. This is all done correctly and interacts with the gid table with VLAN support the same as real HW does. b2d2440430c0 broke this setup deliberately and should thus be reverted. Also, b2d2440430c0 removed rxe_dma_device(), so adapt the revert to discard that hunk. Fixes: b2d2440430c0 ("RDMA/rxe: Remove VLAN code leftovers from RXE") Link: https://lore.kernel.org/r/20210120161913.7347-1-mwilck@suse.com Signed-off-by: Martin Wilck Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 6 ++++++ drivers/infiniband/sw/rxe/rxe_resp.c | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index c4b06ced30a75..943914c2a50c7 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -153,9 +154,14 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct udphdr *udph; struct net_device *ndev = skb->dev; + struct net_device *rdev = ndev; struct rxe_dev *rxe = rxe_get_dev_from_net(ndev); struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); + if (!rxe && is_vlan_dev(rdev)) { + rdev = vlan_dev_real_dev(ndev); + rxe = rxe_get_dev_from_net(rdev); + } if (!rxe) goto drop; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 5a098083a9d22..c7e3b6a4af38f 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -872,6 +872,11 @@ static enum resp_states do_complete(struct rxe_qp *qp, else wc->network_hdr_type = RDMA_NETWORK_IPV6; + if (is_vlan_dev(skb->dev)) { + wc->wc_flags |= IB_WC_WITH_VLAN; + wc->vlan_id = vlan_dev_vlan_id(skb->dev); + } + if (pkt->mask & RXE_IMMDT_MASK) { wc->wc_flags |= IB_WC_WITH_IMM; wc->ex.imm_data = immdt_imm(pkt); -- GitLab From 9275c206f88e5c49cb3e71932c81c8561083db9e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jan 2021 09:33:52 +0100 Subject: [PATCH 1122/2012] nvme-pci: refactor nvme_unmap_data Split out three helpers from nvme_unmap_data that will allow finer grained unwinding from nvme_map_data. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Marc Orr --- drivers/nvme/host/pci.c | 77 ++++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 28 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 25456d02eddb8..e29ece9e4d4b8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -543,50 +543,71 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) return true; } -static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) +static void nvme_free_prps(struct nvme_dev *dev, struct request *req) { - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1; - dma_addr_t dma_addr = iod->first_dma, next_dma_addr; + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + dma_addr_t dma_addr = iod->first_dma; int i; - if (iod->dma_len) { - dma_unmap_page(dev->dev, dma_addr, iod->dma_len, - rq_dma_dir(req)); - return; + for (i = 0; i < iod->npages; i++) { + __le64 *prp_list = nvme_pci_iod_list(req)[i]; + dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]); + + dma_pool_free(dev->prp_page_pool, prp_list, dma_addr); + dma_addr = next_dma_addr; } - WARN_ON_ONCE(!iod->nents); +} - if (is_pci_p2pdma_page(sg_page(iod->sg))) - pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents, - rq_dma_dir(req)); - else - dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req)); +static void nvme_free_sgls(struct nvme_dev *dev, struct request *req) +{ + const int last_sg = SGES_PER_PAGE - 1; + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + dma_addr_t dma_addr = iod->first_dma; + int i; + for (i = 0; i < iod->npages; i++) { + struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i]; + dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr); - if (iod->npages == 0) - dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], - dma_addr); + dma_pool_free(dev->prp_page_pool, sg_list, dma_addr); + dma_addr = next_dma_addr; + } - for (i = 0; i < iod->npages; i++) { - void *addr = nvme_pci_iod_list(req)[i]; +} - if (iod->use_sgl) { - struct nvme_sgl_desc *sg_list = addr; +static void nvme_unmap_sg(struct nvme_dev *dev, struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - next_dma_addr = - le64_to_cpu((sg_list[SGES_PER_PAGE - 1]).addr); - } else { - __le64 *prp_list = addr; + if (is_pci_p2pdma_page(sg_page(iod->sg))) + pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents, + rq_dma_dir(req)); + else + dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req)); +} - next_dma_addr = le64_to_cpu(prp_list[last_prp]); - } +static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - dma_pool_free(dev->prp_page_pool, addr, dma_addr); - dma_addr = next_dma_addr; + if (iod->dma_len) { + dma_unmap_page(dev->dev, iod->first_dma, iod->dma_len, + rq_dma_dir(req)); + return; } + WARN_ON_ONCE(!iod->nents); + + nvme_unmap_sg(dev, req); + if (iod->npages == 0) + dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], + iod->first_dma); + else if (iod->use_sgl) + nvme_free_sgls(dev, req); + else + nvme_free_prps(dev, req); mempool_free(iod->sg, dev->iod_mempool); } -- GitLab From fa0732168fa1369dd089e5b06d6158a68229f7b7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jan 2021 09:35:01 +0100 Subject: [PATCH 1123/2012] nvme-pci: fix error unwind in nvme_map_data Properly unwind step by step using refactored helpers from nvme_unmap_data to avoid a potential double dma_unmap on a mapping failure. Fixes: 7fe07d14f71f ("nvme-pci: merge nvme_free_iod into nvme_unmap_data") Reported-by: Marc Orr Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Marc Orr --- drivers/nvme/host/pci.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e29ece9e4d4b8..856aa31931c14 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -683,7 +683,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, __le64 *old_prp_list = prp_list; prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); if (!prp_list) - return BLK_STS_RESOURCE; + goto free_prps; list[iod->npages++] = prp_list; prp_list[0] = old_prp_list[i - 1]; old_prp_list[i - 1] = cpu_to_le64(prp_dma); @@ -703,14 +703,14 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, dma_addr = sg_dma_address(sg); dma_len = sg_dma_len(sg); } - done: cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma); - return BLK_STS_OK; - - bad_sgl: +free_prps: + nvme_free_prps(dev, req); + return BLK_STS_RESOURCE; +bad_sgl: WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents), "Invalid SGL for payload:%d nents:%d\n", blk_rq_payload_bytes(req), iod->nents); @@ -782,7 +782,7 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); if (!sg_list) - return BLK_STS_RESOURCE; + goto free_sgls; i = 0; nvme_pci_iod_list(req)[iod->npages++] = sg_list; @@ -795,6 +795,9 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, } while (--entries > 0); return BLK_STS_OK; +free_sgls: + nvme_free_sgls(dev, req); + return BLK_STS_RESOURCE; } static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, @@ -863,7 +866,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); iod->nents = blk_rq_map_sg(req->q, req, iod->sg); if (!iod->nents) - goto out; + goto out_free_sg; if (is_pci_p2pdma_page(sg_page(iod->sg))) nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg, @@ -872,16 +875,21 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN); if (!nr_mapped) - goto out; + goto out_free_sg; iod->use_sgl = nvme_pci_use_sgls(dev, req); if (iod->use_sgl) ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped); else ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); -out: if (ret != BLK_STS_OK) - nvme_unmap_data(dev, req); + goto out_unmap_sg; + return BLK_STS_OK; + +out_unmap_sg: + nvme_unmap_sg(dev, req); +out_free_sg: + mempool_free(iod->sg, dev->iod_mempool); return ret; } -- GitLab From 78a18fec5258c8df9435399a1ea022d73d3eceb9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 15 Jan 2021 22:57:52 +0100 Subject: [PATCH 1124/2012] ACPI: scan: Make acpi_bus_get_device() clear return pointer on error Set the acpi_device pointer which acpi_bus_get_device() returns-by- reference to NULL on errors. We've recently had 2 cases where callers of acpi_bus_get_device() did not properly error check the return value, so set the returned- by-reference acpi_device pointer to NULL, because at least some callers of acpi_bus_get_device() expect that to be done on errors. [ rjw: This issue was exposed by commit 71da201f38df ("ACPI: scan: Defer enumeration of devices with _DEP lists") which caused it to be much more likely to occur on some systems, but the real defect had been introduced by an earlier commit. ] Fixes: 40e7fcb19293 ("ACPI: Add _DEP support to fix battery issue on Asus T100TA") Fixes: bcfcd409d4db ("usb: split code locating ACPI companion into port and device") Reported-by: Pierre-Louis Bossart Tested-by: Pierre-Louis Bossart Diagnosed-by: Rafael J. Wysocki Signed-off-by: Hans de Goede Cc: All applicable [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 58ff36340cd7c..1db063b02f63e 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -586,6 +586,8 @@ static int acpi_get_device_data(acpi_handle handle, struct acpi_device **device, if (!device) return -EINVAL; + *device = NULL; + status = acpi_get_data_full(handle, acpi_scan_drop_device, (void **)device, callback); if (ACPI_FAILURE(status) || !*device) { -- GitLab From fc4cb1e15f0c66f2e37314349dc4a82bd946fbb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Wed, 20 Jan 2021 16:28:42 +0100 Subject: [PATCH 1125/2012] ASoC: topology: Properly unregister DAI on removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DAIs need to be removed when topology unload function is called (usually done when component is being removed). We can't do this when device is being removed, as structures we operate on when removing DAI can already be freed. Fixes: 6ae4902f2f34 ("ASoC: soc-topology: use devm_snd_soc_register_dai()") Signed-off-by: Amadeusz Sławiński Tested-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210120152846.1703655-2-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/soc-topology.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 950c45008e245..36489c8396d21 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -447,7 +447,7 @@ static void remove_dai(struct snd_soc_component *comp, { struct snd_soc_dai_driver *dai_drv = container_of(dobj, struct snd_soc_dai_driver, dobj); - struct snd_soc_dai *dai; + struct snd_soc_dai *dai, *_dai; if (pass != SOC_TPLG_PASS_PCM_DAI) return; @@ -455,9 +455,9 @@ static void remove_dai(struct snd_soc_component *comp, if (dobj->ops && dobj->ops->dai_unload) dobj->ops->dai_unload(comp, dobj); - for_each_component_dais(comp, dai) + for_each_component_dais_safe(comp, dai, _dai) if (dai->driver == dai_drv) - dai->driver = NULL; + snd_soc_unregister_dai(dai); list_del(&dobj->list); } @@ -1742,7 +1742,7 @@ static int soc_tplg_dai_create(struct soc_tplg *tplg, list_add(&dai_drv->dobj.list, &tplg->comp->dobj_list); /* register the DAI to the component */ - dai = devm_snd_soc_register_dai(tplg->dev, tplg->comp, dai_drv, false); + dai = snd_soc_register_dai(tplg->comp, dai_drv, false); if (!dai) return -ENOMEM; @@ -1750,6 +1750,7 @@ static int soc_tplg_dai_create(struct soc_tplg *tplg, ret = snd_soc_dapm_new_dai_widgets(dapm, dai); if (ret != 0) { dev_err(dai->dev, "Failed to create DAI widgets %d\n", ret); + snd_soc_unregister_dai(dai); return ret; } -- GitLab From 5ac154443e686b06242aa49de30a12b74ea9ca98 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Wed, 20 Jan 2021 17:22:36 +0800 Subject: [PATCH 1126/2012] ASoC: mediatek: mt8183-mt6358: ignore TDM DAI link by default hdmi-codec is an optional property. Ignore to bind TDM DAI link if the property isn't specified. Fixes: f2024dc55fcb ("ASoC: mediatek: mt8183: use hdmi-codec") Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20210120092237.1553938-2-tzungbi@google.com Signed-off-by: Mark Brown --- sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c index 8c8340854859a..1ce3eddbee13b 100644 --- a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c @@ -515,6 +515,7 @@ static struct snd_soc_dai_link mt8183_mt6358_ts3a227_dai_links[] = { .ignore_suspend = 1, .be_hw_params_fixup = mt8183_i2s_hw_params_fixup, .ops = &mt8183_mt6358_tdm_ops, + .ignore = 1, .init = mt8183_mt6358_ts3a227_max98357_hdmi_init, SND_SOC_DAILINK_REG(tdm), }, @@ -661,8 +662,10 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev) SND_SOC_DAIFMT_CBM_CFM; } - if (hdmi_codec && strcmp(dai_link->name, "TDM") == 0) + if (hdmi_codec && strcmp(dai_link->name, "TDM") == 0) { dai_link->codecs->of_node = hdmi_codec; + dai_link->ignore = 0; + } if (!dai_link->platforms->name) dai_link->platforms->of_node = platform_node; -- GitLab From 4d36ed8eb0f749c9e781e0d3b041a7adeedcdaa9 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Wed, 20 Jan 2021 17:22:37 +0800 Subject: [PATCH 1127/2012] ASoC: mediatek: mt8183-da7219: ignore TDM DAI link by default hdmi-codec is an optional property. Ignore to bind TDM DAI link if the property isn't specified. Fixes: 5bdbe9771177 ("ASoC: mediatek: mt8183-da7219: use hdmi-codec") Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20210120092237.1553938-3-tzungbi@google.com Signed-off-by: Mark Brown --- sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c index 078e58f1ad0b2..cfbd0c65c7a38 100644 --- a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c @@ -532,6 +532,7 @@ static struct snd_soc_dai_link mt8183_da7219_dai_links[] = { .dpcm_playback = 1, .ignore_suspend = 1, .be_hw_params_fixup = mt8183_i2s_hw_params_fixup, + .ignore = 1, .init = mt8183_da7219_max98357_hdmi_init, SND_SOC_DAILINK_REG(tdm), }, @@ -754,8 +755,10 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev) } } - if (hdmi_codec && strcmp(dai_link->name, "TDM") == 0) + if (hdmi_codec && strcmp(dai_link->name, "TDM") == 0) { dai_link->codecs->of_node = hdmi_codec; + dai_link->ignore = 0; + } if (!dai_link->platforms->name) dai_link->platforms->of_node = platform_node; -- GitLab From db58465f1121086b524be80be39d1fedbe5387f3 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 20 Jan 2021 16:11:12 +0000 Subject: [PATCH 1128/2012] cachefiles: Drop superfluous readpages aops NULL check After the recent actions to convert readpages aops to readahead, the NULL checks of readpages aops in cachefiles_read_or_alloc_page() may hit falsely. More badly, it's an ASSERT() call, and this panics. Drop the superfluous NULL checks for fixing this regression. [DH: Note that cachefiles never actually used readpages, so this check was never actually necessary] BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=208883 BugLink: https://bugzilla.opensuse.org/show_bug.cgi?id=1175245 Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem") Signed-off-by: Takashi Iwai Signed-off-by: David Howells Acked-by: Matthew Wilcox (Oracle) Signed-off-by: Linus Torvalds --- fs/cachefiles/rdwr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 8bda092e60c5a..e027c718ca01a 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -413,7 +413,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, inode = d_backing_inode(object->backer); ASSERT(S_ISREG(inode->i_mode)); - ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; @@ -713,7 +712,6 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, inode = d_backing_inode(object->backer); ASSERT(S_ISREG(inode->i_mode)); - ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; -- GitLab From 7178a107f5ea7bdb1cc23073234f0ded0ef90ec7 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Tue, 19 Jan 2021 00:13:19 +0000 Subject: [PATCH 1129/2012] X.509: Fix crash caused by NULL pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the following call path, `sig->pkey_algo` is not assigned in asymmetric_key_verify_signature(), which causes runtime crash in public_key_verify_signature(). keyctl_pkey_verify asymmetric_key_verify_signature verify_signature public_key_verify_signature This patch simply check this situation and fixes the crash caused by NULL pointer. Fixes: 215525639631 ("X.509: support OSCCA SM2-with-SM3 certificate verification") Reported-by: Tobias Markus Signed-off-by: Tianjia Zhang Signed-off-by: David Howells Reviewed-and-tested-by: Toke Høiland-Jørgensen Tested-by: João Fonseca Acked-by: Jarkko Sakkinen Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Linus Torvalds --- crypto/asymmetric_keys/public_key.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index 8892908ad58ce..788a4ba1e2e74 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -356,7 +356,8 @@ int public_key_verify_signature(const struct public_key *pkey, if (ret) goto error_free_key; - if (strcmp(sig->pkey_algo, "sm2") == 0 && sig->data_size) { + if (sig->pkey_algo && strcmp(sig->pkey_algo, "sm2") == 0 && + sig->data_size) { ret = cert_sig_digest_update(sig, tfm); if (ret) goto error_free_key; -- GitLab From 506c203cc3de6e26666b8476d287dee81595d6dc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 20 Jan 2021 21:45:54 +0100 Subject: [PATCH 1130/2012] ALSA: usb-audio: Fix hw constraints dependencies Since the recent refactoring, it's been reported that some USB-audio devices (typically webcams) are no longer detected properly by PulseAudio. The debug session revealed that it's failing at probing by PA to try the sample rate 44.1kHz while the device has discrete sample rates other than 44.1kHz. But the puzzle was that arecord works as is, and some other devices with the discrete rates work, either. After all, this turned out to be the lack of the dependencies in a few hw constraint rules: snd_pcm_hw_rule_add() has the (variable) arguments specifying the dependent parameters, and some functions didn't set the target parameter itself as the dependencies. This resulted in an invalid parameter that could be generated only in a certain call pattern. This bug itself has been present in the code, but it didn't trigger errors just because the rules were casually avoiding such a corner case. After the recent refactoring and cleanup, however, the hw constraints work "as expected", and the problem surfaced now. For fixing the problem above, this patch adds the missing dependent parameters to each snd_pcm_hw_rule() call. Fixes: bc4e94aa8e72 ("ALSA: usb-audio: Handle discrete rates properly in hw constraints") BugLink: http://bugzilla.opensuse.org/show_bug.cgi?id=1181014 Link: https://lore.kernel.org/r/20210120204554.30177-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/pcm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index f71965bf815fd..078bb4c940334 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -981,6 +981,7 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, hw_rule_rate, subs, + SNDRV_PCM_HW_PARAM_RATE, SNDRV_PCM_HW_PARAM_FORMAT, SNDRV_PCM_HW_PARAM_CHANNELS, param_period_time_if_needed, @@ -990,6 +991,7 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, hw_rule_channels, subs, + SNDRV_PCM_HW_PARAM_CHANNELS, SNDRV_PCM_HW_PARAM_FORMAT, SNDRV_PCM_HW_PARAM_RATE, param_period_time_if_needed, @@ -998,6 +1000,7 @@ static int setup_hw_info(struct snd_pcm_runtime *runtime, struct snd_usb_substre return err; err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT, hw_rule_format, subs, + SNDRV_PCM_HW_PARAM_FORMAT, SNDRV_PCM_HW_PARAM_RATE, SNDRV_PCM_HW_PARAM_CHANNELS, param_period_time_if_needed, -- GitLab From b135b3358d73aa2a8b2be35d08e422421d1c609e Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 19 Jan 2021 16:55:10 +0100 Subject: [PATCH 1131/2012] mtd: rawnand: omap: Use BCH private fields in the specific OOB layout The OMAP driver may leverage software BCH logic to locate errors while using its own hardware to detect the presence of errors. This is achieved with a "mixed" mode which initializes manually the software BCH internal logic while providing its own OOB layout. The issue here comes from the fact that the BCH driver has been updated to only use generic NAND objects, and no longer depend on raw NAND structures as it is usable from SPI-NAND as well. However, at the end of the BCH context initialization, the driver checks the validity of the OOB layout. At this stage, the raw NAND fields have not been populated yet while being used by the layout helpers, leading to an invalid layout. The chosen solution here is to include the BCH structure definition and to refer to the BCH fields directly (de-referenced as a const pointer here) to know as early as possible the number of steps and ECC bytes which have been chosen. Note: I don't know which commit exactly triggered the error, but the entire migration to a generic BCH driver got merged in one go, so this should not be a problem for stable backports. Reported-by: Adam Ford Fixes: 80fe603160a4 ("mtd: nand: ecc-bch: Stop using raw NAND structures") Signed-off-by: Miquel Raynal Tested-by: Adam Ford #logicpd-torpedo-37xx-devkit-28.dts Link: https://lore.kernel.org/linux-mtd/20210119155510.5655-1-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/omap2.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c index fbb9955f24671..2c3e65cb68f33 100644 --- a/drivers/mtd/nand/raw/omap2.c +++ b/drivers/mtd/nand/raw/omap2.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1866,18 +1867,19 @@ static const struct mtd_ooblayout_ops omap_ooblayout_ops = { static int omap_sw_ooblayout_ecc(struct mtd_info *mtd, int section, struct mtd_oob_region *oobregion) { - struct nand_chip *chip = mtd_to_nand(mtd); + struct nand_device *nand = mtd_to_nanddev(mtd); + const struct nand_ecc_sw_bch_conf *engine_conf = nand->ecc.ctx.priv; int off = BADBLOCK_MARKER_LENGTH; - if (section >= chip->ecc.steps) + if (section >= engine_conf->nsteps) return -ERANGE; /* * When SW correction is employed, one OMAP specific marker byte is * reserved after each ECC step. */ - oobregion->offset = off + (section * (chip->ecc.bytes + 1)); - oobregion->length = chip->ecc.bytes; + oobregion->offset = off + (section * (engine_conf->code_size + 1)); + oobregion->length = engine_conf->code_size; return 0; } @@ -1885,7 +1887,8 @@ static int omap_sw_ooblayout_ecc(struct mtd_info *mtd, int section, static int omap_sw_ooblayout_free(struct mtd_info *mtd, int section, struct mtd_oob_region *oobregion) { - struct nand_chip *chip = mtd_to_nand(mtd); + struct nand_device *nand = mtd_to_nanddev(mtd); + const struct nand_ecc_sw_bch_conf *engine_conf = nand->ecc.ctx.priv; int off = BADBLOCK_MARKER_LENGTH; if (section) @@ -1895,7 +1898,7 @@ static int omap_sw_ooblayout_free(struct mtd_info *mtd, int section, * When SW correction is employed, one OMAP specific marker byte is * reserved after each ECC step. */ - off += ((chip->ecc.bytes + 1) * chip->ecc.steps); + off += ((engine_conf->code_size + 1) * engine_conf->nsteps); if (off >= mtd->oobsize) return -ERANGE; -- GitLab From 9bb48c82aced07698a2d08ee0f1475a6c4f6b266 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 19 Jan 2021 11:41:16 -0800 Subject: [PATCH 1132/2012] tty: implement write_iter This makes the tty layer use the .write_iter() function instead of the traditional .write() functionality. That allows writev(), but more importantly also makes it possible to enable .splice_write() for ttys, reinstating the "splice to tty" functionality that was lost in commit 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops"). Fixes: 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops") Reported-by: Oliver Giles Cc: Christoph Hellwig Cc: Greg Kroah-Hartman Cc: Al Viro Signed-off-by: Linus Torvalds --- drivers/tty/tty_io.c | 48 ++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 56ade99ef99f4..338bc4ef55496 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -143,9 +143,8 @@ LIST_HEAD(tty_drivers); /* linked list of tty drivers */ DEFINE_MUTEX(tty_mutex); static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *); -static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *); -ssize_t redirected_tty_write(struct file *, const char __user *, - size_t, loff_t *); +static ssize_t tty_write(struct kiocb *, struct iov_iter *); +ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); static __poll_t tty_poll(struct file *, poll_table *); static int tty_open(struct inode *, struct file *); long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg); @@ -478,7 +477,8 @@ static void tty_show_fdinfo(struct seq_file *m, struct file *file) static const struct file_operations tty_fops = { .llseek = no_llseek, .read = tty_read, - .write = tty_write, + .write_iter = tty_write, + .splice_write = iter_file_splice_write, .poll = tty_poll, .unlocked_ioctl = tty_ioctl, .compat_ioctl = tty_compat_ioctl, @@ -491,7 +491,8 @@ static const struct file_operations tty_fops = { static const struct file_operations console_fops = { .llseek = no_llseek, .read = tty_read, - .write = redirected_tty_write, + .write_iter = redirected_tty_write, + .splice_write = iter_file_splice_write, .poll = tty_poll, .unlocked_ioctl = tty_ioctl, .compat_ioctl = tty_compat_ioctl, @@ -607,9 +608,9 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session) /* This breaks for file handles being sent over AF_UNIX sockets ? */ list_for_each_entry(priv, &tty->tty_files, list) { filp = priv->file; - if (filp->f_op->write == redirected_tty_write) + if (filp->f_op->write_iter == redirected_tty_write) cons_filp = filp; - if (filp->f_op->write != tty_write) + if (filp->f_op->write_iter != tty_write) continue; closecount++; __tty_fasync(-1, filp, 0); /* can't block */ @@ -902,9 +903,9 @@ static inline ssize_t do_tty_write( ssize_t (*write)(struct tty_struct *, struct file *, const unsigned char *, size_t), struct tty_struct *tty, struct file *file, - const char __user *buf, - size_t count) + struct iov_iter *from) { + size_t count = iov_iter_count(from); ssize_t ret, written = 0; unsigned int chunk; @@ -956,14 +957,20 @@ static inline ssize_t do_tty_write( size_t size = count; if (size > chunk) size = chunk; + ret = -EFAULT; - if (copy_from_user(tty->write_buf, buf, size)) + if (copy_from_iter(tty->write_buf, size, from) != size) break; + ret = write(tty, file, tty->write_buf, size); if (ret <= 0) break; + + /* FIXME! Have Al check this! */ + if (ret != size) + iov_iter_revert(from, size-ret); + written += ret; - buf += ret; count -= ret; if (!count) break; @@ -1023,9 +1030,9 @@ void tty_write_message(struct tty_struct *tty, char *msg) * write method will not be invoked in parallel for each device. */ -static ssize_t tty_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t tty_write(struct kiocb *iocb, struct iov_iter *from) { + struct file *file = iocb->ki_filp; struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; ssize_t ret; @@ -1038,18 +1045,15 @@ static ssize_t tty_write(struct file *file, const char __user *buf, if (tty->ops->write_room == NULL) tty_err(tty, "missing write_room method\n"); ld = tty_ldisc_ref_wait(tty); - if (!ld) - return hung_up_tty_write(file, buf, count, ppos); - if (!ld->ops->write) + if (!ld || !ld->ops->write) ret = -EIO; else - ret = do_tty_write(ld->ops->write, tty, file, buf, count); + ret = do_tty_write(ld->ops->write, tty, file, from); tty_ldisc_deref(ld); return ret; } -ssize_t redirected_tty_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +ssize_t redirected_tty_write(struct kiocb *iocb, struct iov_iter *iter) { struct file *p = NULL; @@ -1060,11 +1064,11 @@ ssize_t redirected_tty_write(struct file *file, const char __user *buf, if (p) { ssize_t res; - res = vfs_write(p, buf, count, &p->f_pos); + res = vfs_iocb_iter_write(p, iocb, iter); fput(p); return res; } - return tty_write(file, buf, count, ppos); + return tty_write(iocb, iter); } /** @@ -2293,7 +2297,7 @@ static int tioccons(struct file *file) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (file->f_op->write == redirected_tty_write) { + if (file->f_op->write_iter == redirected_tty_write) { struct file *f; spin_lock(&redirect_lock); f = redirect; -- GitLab From 646188c9550f74454dfc172a347dad693e5bfc84 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 19 Jan 2021 17:53:35 +0300 Subject: [PATCH 1133/2012] net: dsa: Fix off by one in dsa_loop_port_vlan_add() The > comparison is intended to be >= to prevent reading beyond the end of the ps->vlans[] array. It doesn't affect run time though because the ps->vlans[] array has VLAN_N_VID (4096) elements and the vlan->vid cannot be > 4094 because it is checked earlier. Signed-off-by: Dan Carpenter Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/YAbyb5kBJQlpYCs2@mwanda Signed-off-by: Jakub Kicinski --- drivers/net/dsa/dsa_loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c index 5f69216376fe2..8c283f59158b0 100644 --- a/drivers/net/dsa/dsa_loop.c +++ b/drivers/net/dsa/dsa_loop.c @@ -207,7 +207,7 @@ static int dsa_loop_port_vlan_add(struct dsa_switch *ds, int port, struct mii_bus *bus = ps->bus; struct dsa_loop_vlan *vl; - if (vlan->vid > ARRAY_SIZE(ps->vlans)) + if (vlan->vid >= ARRAY_SIZE(ps->vlans)) return -ERANGE; /* Just do a sleeping operation to make lockdep checks effective */ -- GitLab From 389102a3515b53a38858554a915006be7f0b6a06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valdis=20Kl=C4=93tnieks?= Date: Fri, 15 Jan 2021 02:07:03 -0500 Subject: [PATCH 1134/2012] scsi: target: iscsi: Fix typo in comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correct the spelling of Nagle's name in a comment. Link: https://lore.kernel.org/r/2921.1610694423@turing-police Reviewed-by: Chaitanya Kulkarni Signed-off-by: Valdis Klētnieks Signed-off-by: Martin K. Petersen --- drivers/target/iscsi/iscsi_target_login.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 893d1b406c292..1a9c50401bdb5 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -896,7 +896,7 @@ int iscsit_setup_np( else len = sizeof(struct sockaddr_in); /* - * Set SO_REUSEADDR, and disable Nagel Algorithm with TCP_NODELAY. + * Set SO_REUSEADDR, and disable Nagle Algorithm with TCP_NODELAY. */ if (np->np_network_transport == ISCSI_TCP) tcp_sock_set_nodelay(sock->sk); -- GitLab From aa2c24e7f415e9c13635cee22ff4e15a80215551 Mon Sep 17 00:00:00 2001 From: Enzo Matsumiya Date: Mon, 18 Jan 2021 15:49:22 -0300 Subject: [PATCH 1135/2012] scsi: qla2xxx: Fix description for parameter ql2xenforce_iocb_limit Parameter ql2xenforce_iocb_limit is enabled by default. Link: https://lore.kernel.org/r/20210118184922.23793-1-ematsumiya@suse.de Fixes: 89c72f4245a8 ("scsi: qla2xxx: Add IOCB resource tracking") Reviewed-by: Daniel Wagner Reviewed-by: Himanshu Madhani Signed-off-by: Enzo Matsumiya Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index f80abe28f35a4..0e0fe5b094966 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -42,7 +42,7 @@ MODULE_PARM_DESC(ql2xfulldump_on_mpifail, int ql2xenforce_iocb_limit = 1; module_param(ql2xenforce_iocb_limit, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(ql2xenforce_iocb_limit, - "Enforce IOCB throttling, to avoid FW congestion. (default: 0)"); + "Enforce IOCB throttling, to avoid FW congestion. (default: 1)"); /* * CT6 CTX allocation cache -- GitLab From c369d7fc8fddc5e5af4aea73dd403681a74c1a86 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 20 Jan 2021 01:10:45 +0100 Subject: [PATCH 1136/2012] net: dsa: microchip: ksz8795: Fix KSZ8794 port map again The KSZ8795 switch has 4 external ports {0,1,2,3} and 1 CPU port {4}, so does the KSZ8765. The KSZ8794 seems to be repackaged KSZ8795 with different ID and port 3 not routed out, however the port 3 registers are present in the silicon, so the KSZ8794 switch has 3 external ports {0,1,2} and 1 CPU port {4}. Currently the driver always uses the last port as CPU port, on KSZ8795/KSZ8765 that is port 4 and that is OK, but on KSZ8794 that is port 3 and that is not OK, as it must also be port 4. This patch adjusts the driver such that it always registers a switch with 5 ports total (4 external ports, 1 CPU port), always sets the CPU port to switch port 4, and then configures the external port mask according to the switch model -- 3 ports for KSZ8794 and 4 for KSZ8795/KSZ8765. Fixes: 68a1b676db52 ("net: dsa: microchip: ksz8795: remove superfluous port_cnt assignment") Fixes: 4ce2a984abd8 ("net: dsa: microchip: ksz8795: use phy_port_cnt where possible") Fixes: 241ed719bc98 ("net: dsa: microchip: ksz8795: use port_cnt instead of TOTOAL_PORT_NUM") Signed-off-by: Marek Vasut Cc: Michael Grzeschik Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20210120001045.488506-1-marex@denx.de Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz8795.c | 30 +++++++++++++++++--------- drivers/net/dsa/microchip/ksz_common.c | 2 +- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index c973db101b729..a4570ba29c831 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -1187,6 +1187,20 @@ static const struct ksz_chip_data ksz8795_switch_chips[] = { .port_cnt = 5, /* total cpu and user ports */ }, { + /* + * WARNING + * ======= + * KSZ8794 is similar to KSZ8795, except the port map + * contains a gap between external and CPU ports, the + * port map is NOT continuous. The per-port register + * map is shifted accordingly too, i.e. registers at + * offset 0x40 are NOT used on KSZ8794 and they ARE + * used on KSZ8795 for external port 3. + * external cpu + * KSZ8794 0,1,2 4 + * KSZ8795 0,1,2,3 4 + * KSZ8765 0,1,2,3 4 + */ .chip_id = 0x8794, .dev_name = "KSZ8794", .num_vlans = 4096, @@ -1220,9 +1234,13 @@ static int ksz8795_switch_init(struct ksz_device *dev) dev->num_vlans = chip->num_vlans; dev->num_alus = chip->num_alus; dev->num_statics = chip->num_statics; - dev->port_cnt = chip->port_cnt; + dev->port_cnt = fls(chip->cpu_ports); + dev->cpu_port = fls(chip->cpu_ports) - 1; + dev->phy_port_cnt = dev->port_cnt - 1; dev->cpu_ports = chip->cpu_ports; - + dev->host_mask = chip->cpu_ports; + dev->port_mask = (BIT(dev->phy_port_cnt) - 1) | + chip->cpu_ports; break; } } @@ -1231,17 +1249,9 @@ static int ksz8795_switch_init(struct ksz_device *dev) if (!dev->cpu_ports) return -ENODEV; - dev->port_mask = BIT(dev->port_cnt) - 1; - dev->port_mask |= dev->host_mask; - dev->reg_mib_cnt = KSZ8795_COUNTER_NUM; dev->mib_cnt = ARRAY_SIZE(mib_names); - dev->phy_port_cnt = dev->port_cnt - 1; - - dev->cpu_port = dev->port_cnt - 1; - dev->host_mask = BIT(dev->cpu_port); - dev->ports = devm_kzalloc(dev->dev, dev->port_cnt * sizeof(struct ksz_port), GFP_KERNEL); diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index cf743133b0b93..4899636644438 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -434,7 +434,7 @@ int ksz_switch_register(struct ksz_device *dev, if (of_property_read_u32(port, "reg", &port_num)) continue; - if (port_num >= dev->port_cnt) + if (!(dev->port_mask & BIT(port_num))) return -EINVAL; of_get_phy_mode(port, &dev->ports[port_num].interface); -- GitLab From 1c45ba93d34cd6af75228f34d0675200c81738b5 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 20 Jan 2021 04:05:02 +0100 Subject: [PATCH 1137/2012] net: dsa: microchip: Adjust reset release timing to match reference reset circuit KSZ8794CNX datasheet section 8.0 RESET CIRCUIT describes recommended circuit for interfacing with CPU/FPGA reset consisting of 10k pullup resistor and 10uF capacitor to ground. This circuit takes ~100 ms to rise enough to release the reset. For maximum supply voltage VDDIO=3.3V VIH=2.0V R=10kR C=10uF that is VDDIO - VIH t = R * C * -ln( ------------- ) = 10000*0.00001*-(-0.93)=0.093 s VDDIO so we need ~95 ms for the reset to really de-assert, and then the original 100us for the switch itself to come out of reset. Simply msleep() for 100 ms which fits the constraint with a bit of extra space. Fixes: 5b797980908a ("net: dsa: microchip: Implement recommended reset timing") Reviewed-by: Florian Fainelli Signed-off-by: Marek Vasut Cc: Michael Grzeschik Reviewed-by: Paul Barker Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210120030502.617185-1-marex@denx.de Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 4899636644438..389abfd277702 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -400,7 +400,7 @@ int ksz_switch_register(struct ksz_device *dev, gpiod_set_value_cansleep(dev->reset_gpio, 1); usleep_range(10000, 12000); gpiod_set_value_cansleep(dev->reset_gpio, 0); - usleep_range(100, 1000); + msleep(100); } mutex_init(&dev->dev_mutex); -- GitLab From 60f5ad5e19c0996df7ca4ce7ef5fd4596cb13f01 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 20 Jan 2021 16:44:10 +0100 Subject: [PATCH 1138/2012] nexthop: Use a dedicated policy for nh_valid_get_del_req() This function uses the global nexthop policy only to then bounce all arguments except for NHA_ID. Instead, just create a new policy that only includes the one allowed attribute. Signed-off-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e53e43aef7854..391079ff1bb5e 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -36,6 +36,10 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { [NHA_FDB] = { .type = NLA_FLAG }, }; +static const struct nla_policy rtm_nh_policy_get[] = { + [NHA_ID] = { .type = NLA_U32 }, +}; + static bool nexthop_notifiers_is_empty(struct net *net) { return !net->nexthop.notifier_chain.head; @@ -1842,28 +1846,16 @@ static int nh_valid_get_del_req(struct nlmsghdr *nlh, u32 *id, struct netlink_ext_ack *extack) { struct nhmsg *nhm = nlmsg_data(nlh); - struct nlattr *tb[NHA_MAX + 1]; - int err, i; + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)]; + int err; - err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy, - extack); + err = nlmsg_parse(nlh, sizeof(*nhm), tb, + ARRAY_SIZE(rtm_nh_policy_get) - 1, + rtm_nh_policy_get, extack); if (err < 0) return err; err = -EINVAL; - for (i = 0; i < __NHA_MAX; ++i) { - if (!tb[i]) - continue; - - switch (i) { - case NHA_ID: - break; - default: - NL_SET_ERR_MSG_ATTR(extack, tb[i], - "Unexpected attribute in request"); - goto out; - } - } if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { NL_SET_ERR_MSG(extack, "Invalid values in header"); goto out; -- GitLab From 44551bff290d11038816ae5da963d2de12e16c31 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 20 Jan 2021 16:44:11 +0100 Subject: [PATCH 1139/2012] nexthop: Use a dedicated policy for nh_valid_dump_req() This function uses the global nexthop policy, but only accepts four particular attributes. Create a new policy that only includes the four supported attributes, and use it. Convert the loop to a series of ifs. Signed-off-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 60 +++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 391079ff1bb5e..bbea78ea4870a 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -40,6 +40,13 @@ static const struct nla_policy rtm_nh_policy_get[] = { [NHA_ID] = { .type = NLA_U32 }, }; +static const struct nla_policy rtm_nh_policy_dump[] = { + [NHA_OIF] = { .type = NLA_U32 }, + [NHA_GROUPS] = { .type = NLA_FLAG }, + [NHA_MASTER] = { .type = NLA_U32 }, + [NHA_FDB] = { .type = NLA_FLAG }, +}; + static bool nexthop_notifiers_is_empty(struct net *net) { return !net->nexthop.notifier_chain.head; @@ -1983,48 +1990,35 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, bool *fdb_filter, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; - struct nlattr *tb[NHA_MAX + 1]; + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; struct nhmsg *nhm; - int err, i; + int err; u32 idx; - err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy, - NULL); + err = nlmsg_parse(nlh, sizeof(*nhm), tb, + ARRAY_SIZE(rtm_nh_policy_dump) - 1, + rtm_nh_policy_dump, NULL); if (err < 0) return err; - for (i = 0; i <= NHA_MAX; ++i) { - if (!tb[i]) - continue; - - switch (i) { - case NHA_OIF: - idx = nla_get_u32(tb[i]); - if (idx > INT_MAX) { - NL_SET_ERR_MSG(extack, "Invalid device index"); - return -EINVAL; - } - *dev_idx = idx; - break; - case NHA_MASTER: - idx = nla_get_u32(tb[i]); - if (idx > INT_MAX) { - NL_SET_ERR_MSG(extack, "Invalid master device index"); - return -EINVAL; - } - *master_idx = idx; - break; - case NHA_GROUPS: - *group_filter = true; - break; - case NHA_FDB: - *fdb_filter = true; - break; - default: - NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request"); + if (tb[NHA_OIF]) { + idx = nla_get_u32(tb[NHA_OIF]); + if (idx > INT_MAX) { + NL_SET_ERR_MSG(extack, "Invalid device index"); + return -EINVAL; + } + *dev_idx = idx; + } + if (tb[NHA_MASTER]) { + idx = nla_get_u32(tb[NHA_MASTER]); + if (idx > INT_MAX) { + NL_SET_ERR_MSG(extack, "Invalid master device index"); return -EINVAL; } + *master_idx = idx; } + *group_filter = nla_get_flag(tb[NHA_GROUPS]); + *fdb_filter = nla_get_flag(tb[NHA_FDB]); nhm = nlmsg_data(nlh); if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { -- GitLab From 643d0878e674434e427888339e6d57c1cc25ee66 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 20 Jan 2021 16:44:12 +0100 Subject: [PATCH 1140/2012] nexthop: Specialize rtm_nh_policy This policy is currently only used for creation of new next hops and new next hop groups. Rename it accordingly and remove the two attributes that are not valid in that context: NHA_GROUPS and NHA_MASTER. For consistency with other policies, do not mention policy array size in the declarator, and replace NHA_MAX for ARRAY_SIZE as appropriate. Note that with this commit, NHA_MAX and __NHA_MAX are not used anymore. Leave them in purely as a user API. Signed-off-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index bbea78ea4870a..e6dfca4262424 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -22,7 +22,7 @@ static void remove_nexthop(struct net *net, struct nexthop *nh, #define NH_DEV_HASHBITS 8 #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS) -static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { +static const struct nla_policy rtm_nh_policy_new[] = { [NHA_ID] = { .type = NLA_U32 }, [NHA_GROUP] = { .type = NLA_BINARY }, [NHA_GROUP_TYPE] = { .type = NLA_U16 }, @@ -31,8 +31,6 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { [NHA_GATEWAY] = { .type = NLA_BINARY }, [NHA_ENCAP_TYPE] = { .type = NLA_U16 }, [NHA_ENCAP] = { .type = NLA_NESTED }, - [NHA_GROUPS] = { .type = NLA_FLAG }, - [NHA_MASTER] = { .type = NLA_U32 }, [NHA_FDB] = { .type = NLA_FLAG }, }; @@ -576,7 +574,8 @@ static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family, return 0; } -static int nh_check_attr_group(struct net *net, struct nlattr *tb[], +static int nh_check_attr_group(struct net *net, + struct nlattr *tb[], size_t tb_size, struct netlink_ext_ack *extack) { unsigned int len = nla_len(tb[NHA_GROUP]); @@ -635,7 +634,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], return -EINVAL; } } - for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) { + for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) { if (!tb[i]) continue; if (i == NHA_FDB) @@ -1654,11 +1653,12 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, struct netlink_ext_ack *extack) { struct nhmsg *nhm = nlmsg_data(nlh); - struct nlattr *tb[NHA_MAX + 1]; + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)]; int err; - err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy, - extack); + err = nlmsg_parse(nlh, sizeof(*nhm), tb, + ARRAY_SIZE(rtm_nh_policy_new) - 1, + rtm_nh_policy_new, extack); if (err < 0) return err; @@ -1685,11 +1685,6 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, goto out; } - if (tb[NHA_GROUPS] || tb[NHA_MASTER]) { - NL_SET_ERR_MSG(extack, "Invalid attributes in request"); - goto out; - } - memset(cfg, 0, sizeof(*cfg)); cfg->nlflags = nlh->nlmsg_flags; cfg->nlinfo.portid = NETLINK_CB(skb).portid; @@ -1731,7 +1726,7 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, NL_SET_ERR_MSG(extack, "Invalid group type"); goto out; } - err = nh_check_attr_group(net, tb, extack); + err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb), extack); /* no other attributes should be set */ goto out; -- GitLab From 2014beea7eb165c745706b13659a0f1d0a9a2a61 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Jan 2021 12:25:18 -0800 Subject: [PATCH 1141/2012] net: move net_set_todo inside rollback_registered() Commit 93ee31f14f6f ("[NET]: Fix free_netdev on register_netdev failure.") moved net_set_todo() outside of rollback_registered() so that rollback_registered() can be used in the failure path of register_netdevice() but without risking a double free. Since commit cf124db566e6 ("net: Fix inconsistent teardown and release of private netdev state."), however, we have a better way of handling that condition, since destructors don't call free_netdev() directly. After the change in commit c269a24ce057 ("net: make free_netdev() more lenient with unregistering devices") we can now move net_set_todo() back. Reviewed-by: Edwin Peer Signed-off-by: Jakub Kicinski --- net/core/dev.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 07a0347c33fb0..9476edf702646 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9546,8 +9546,10 @@ static void rollback_registered_many(struct list_head *head) synchronize_net(); - list_for_each_entry(dev, head, unreg_list) + list_for_each_entry(dev, head, unreg_list) { dev_put(dev); + net_set_todo(dev); + } } static void rollback_registered(struct net_device *dev) @@ -10109,7 +10111,6 @@ int register_netdevice(struct net_device *dev) /* Expect explicit free_netdev() on failure */ dev->needs_free_netdev = false; rollback_registered(dev); - net_set_todo(dev); goto out; } /* @@ -10732,8 +10733,6 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) list_move_tail(&dev->unreg_list, head); } else { rollback_registered(dev); - /* Finish processing unregister after unlock */ - net_set_todo(dev); } } EXPORT_SYMBOL(unregister_netdevice_queue); @@ -10747,12 +10746,8 @@ EXPORT_SYMBOL(unregister_netdevice_queue); */ void unregister_netdevice_many(struct list_head *head) { - struct net_device *dev; - if (!list_empty(head)) { rollback_registered_many(head); - list_for_each_entry(dev, head, unreg_list) - net_set_todo(dev); list_del(head); } } -- GitLab From 037e56bd965e1bc72c2fa9684ac25b56839a338e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Jan 2021 12:25:19 -0800 Subject: [PATCH 1142/2012] net: inline rollback_registered() rollback_registered() is a local helper, it's common for driver code to call unregister_netdevice_queue(dev, NULL) when they want to unregister netdevices under rtnl_lock. Inline rollback_registered() and adjust the only remaining caller. Reviewed-by: Edwin Peer Signed-off-by: Jakub Kicinski --- net/core/dev.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 9476edf702646..9a147142c477a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9552,15 +9552,6 @@ static void rollback_registered_many(struct list_head *head) } } -static void rollback_registered(struct net_device *dev) -{ - LIST_HEAD(single); - - list_add(&dev->unreg_list, &single); - rollback_registered_many(&single); - list_del(&single); -} - static netdev_features_t netdev_sync_upper_features(struct net_device *lower, struct net_device *upper, netdev_features_t features) { @@ -10110,7 +10101,7 @@ int register_netdevice(struct net_device *dev) if (ret) { /* Expect explicit free_netdev() on failure */ dev->needs_free_netdev = false; - rollback_registered(dev); + unregister_netdevice_queue(dev, NULL); goto out; } /* @@ -10732,7 +10723,11 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) if (head) { list_move_tail(&dev->unreg_list, head); } else { - rollback_registered(dev); + LIST_HEAD(single); + + list_add(&dev->unreg_list, &single); + rollback_registered_many(&single); + list_del(&single); } } EXPORT_SYMBOL(unregister_netdevice_queue); -- GitLab From bcfe2f1a3818d9dca945b6aca4ae741cb1f75329 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Jan 2021 12:25:20 -0800 Subject: [PATCH 1143/2012] net: move rollback_registered_many() Move rollback_registered_many() and add a temporary forward declaration to make merging the code into unregister_netdevice_many() easier to review. No functional changes. Reviewed-by: Edwin Peer Signed-off-by: Jakub Kicinski --- net/core/dev.c | 188 +++++++++++++++++++++++++------------------------ 1 file changed, 95 insertions(+), 93 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 9a147142c477a..1fb99ae8cc22e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9459,99 +9459,6 @@ static void net_set_todo(struct net_device *dev) dev_net(dev)->dev_unreg_count++; } -static void rollback_registered_many(struct list_head *head) -{ - struct net_device *dev, *tmp; - LIST_HEAD(close_head); - - BUG_ON(dev_boot_phase); - ASSERT_RTNL(); - - list_for_each_entry_safe(dev, tmp, head, unreg_list) { - /* Some devices call without registering - * for initialization unwind. Remove those - * devices and proceed with the remaining. - */ - if (dev->reg_state == NETREG_UNINITIALIZED) { - pr_debug("unregister_netdevice: device %s/%p never was registered\n", - dev->name, dev); - - WARN_ON(1); - list_del(&dev->unreg_list); - continue; - } - dev->dismantle = true; - BUG_ON(dev->reg_state != NETREG_REGISTERED); - } - - /* If device is running, close it first. */ - list_for_each_entry(dev, head, unreg_list) - list_add_tail(&dev->close_list, &close_head); - dev_close_many(&close_head, true); - - list_for_each_entry(dev, head, unreg_list) { - /* And unlink it from device chain. */ - unlist_netdevice(dev); - - dev->reg_state = NETREG_UNREGISTERING; - } - flush_all_backlogs(); - - synchronize_net(); - - list_for_each_entry(dev, head, unreg_list) { - struct sk_buff *skb = NULL; - - /* Shutdown queueing discipline. */ - dev_shutdown(dev); - - dev_xdp_uninstall(dev); - - /* Notify protocols, that we are about to destroy - * this device. They should clean all the things. - */ - call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - - if (!dev->rtnl_link_ops || - dev->rtnl_link_state == RTNL_LINK_INITIALIZED) - skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, - GFP_KERNEL, NULL, 0); - - /* - * Flush the unicast and multicast chains - */ - dev_uc_flush(dev); - dev_mc_flush(dev); - - netdev_name_node_alt_flush(dev); - netdev_name_node_free(dev->name_node); - - if (dev->netdev_ops->ndo_uninit) - dev->netdev_ops->ndo_uninit(dev); - - if (skb) - rtmsg_ifinfo_send(skb, dev, GFP_KERNEL); - - /* Notifier chain MUST detach us all upper devices. */ - WARN_ON(netdev_has_any_upper_dev(dev)); - WARN_ON(netdev_has_any_lower_dev(dev)); - - /* Remove entries from kobject tree */ - netdev_unregister_kobject(dev); -#ifdef CONFIG_XPS - /* Remove XPS queueing entries */ - netif_reset_xps_queues_gt(dev, 0); -#endif - } - - synchronize_net(); - - list_for_each_entry(dev, head, unreg_list) { - dev_put(dev); - net_set_todo(dev); - } -} - static netdev_features_t netdev_sync_upper_features(struct net_device *lower, struct net_device *upper, netdev_features_t features) { @@ -10703,6 +10610,8 @@ void synchronize_net(void) } EXPORT_SYMBOL(synchronize_net); +static void rollback_registered_many(struct list_head *head); + /** * unregister_netdevice_queue - remove device from the kernel * @dev: device @@ -10748,6 +10657,99 @@ void unregister_netdevice_many(struct list_head *head) } EXPORT_SYMBOL(unregister_netdevice_many); +static void rollback_registered_many(struct list_head *head) +{ + struct net_device *dev, *tmp; + LIST_HEAD(close_head); + + BUG_ON(dev_boot_phase); + ASSERT_RTNL(); + + list_for_each_entry_safe(dev, tmp, head, unreg_list) { + /* Some devices call without registering + * for initialization unwind. Remove those + * devices and proceed with the remaining. + */ + if (dev->reg_state == NETREG_UNINITIALIZED) { + pr_debug("unregister_netdevice: device %s/%p never was registered\n", + dev->name, dev); + + WARN_ON(1); + list_del(&dev->unreg_list); + continue; + } + dev->dismantle = true; + BUG_ON(dev->reg_state != NETREG_REGISTERED); + } + + /* If device is running, close it first. */ + list_for_each_entry(dev, head, unreg_list) + list_add_tail(&dev->close_list, &close_head); + dev_close_many(&close_head, true); + + list_for_each_entry(dev, head, unreg_list) { + /* And unlink it from device chain. */ + unlist_netdevice(dev); + + dev->reg_state = NETREG_UNREGISTERING; + } + flush_all_backlogs(); + + synchronize_net(); + + list_for_each_entry(dev, head, unreg_list) { + struct sk_buff *skb = NULL; + + /* Shutdown queueing discipline. */ + dev_shutdown(dev); + + dev_xdp_uninstall(dev); + + /* Notify protocols, that we are about to destroy + * this device. They should clean all the things. + */ + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + + if (!dev->rtnl_link_ops || + dev->rtnl_link_state == RTNL_LINK_INITIALIZED) + skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, + GFP_KERNEL, NULL, 0); + + /* + * Flush the unicast and multicast chains + */ + dev_uc_flush(dev); + dev_mc_flush(dev); + + netdev_name_node_alt_flush(dev); + netdev_name_node_free(dev->name_node); + + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); + + if (skb) + rtmsg_ifinfo_send(skb, dev, GFP_KERNEL); + + /* Notifier chain MUST detach us all upper devices. */ + WARN_ON(netdev_has_any_upper_dev(dev)); + WARN_ON(netdev_has_any_lower_dev(dev)); + + /* Remove entries from kobject tree */ + netdev_unregister_kobject(dev); +#ifdef CONFIG_XPS + /* Remove XPS queueing entries */ + netif_reset_xps_queues_gt(dev, 0); +#endif + } + + synchronize_net(); + + list_for_each_entry(dev, head, unreg_list) { + dev_put(dev); + net_set_todo(dev); + } +} + /** * unregister_netdev - remove device from the kernel * @dev: device -- GitLab From 0cbe1e57a7b93517100b0eb63d8e445cfbeb630c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Jan 2021 12:25:21 -0800 Subject: [PATCH 1144/2012] net: inline rollback_registered_many() Similar to the change for rollback_registered() - rollback_registered_many() was a part of unregister_netdevice_many() minus the net_set_todo(), which is no longer needed. Functionally this patch moves the list_empty() check back after: BUG_ON(dev_boot_phase); ASSERT_RTNL(); but I can't find any reason why that would be an issue. Reviewed-by: Edwin Peer Signed-off-by: Jakub Kicinski --- net/core/dev.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1fb99ae8cc22e..00f970ba0248a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5709,7 +5709,7 @@ static void flush_all_backlogs(void) } /* we can have in flight packet[s] on the cpus we are not flushing, - * synchronize_net() in rollback_registered_many() will take care of + * synchronize_net() in unregister_netdevice_many() will take care of * them */ for_each_cpu(cpu, &flush_cpus) @@ -10610,8 +10610,6 @@ void synchronize_net(void) } EXPORT_SYMBOL(synchronize_net); -static void rollback_registered_many(struct list_head *head); - /** * unregister_netdevice_queue - remove device from the kernel * @dev: device @@ -10635,8 +10633,7 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) LIST_HEAD(single); list_add(&dev->unreg_list, &single); - rollback_registered_many(&single); - list_del(&single); + unregister_netdevice_many(&single); } } EXPORT_SYMBOL(unregister_netdevice_queue); @@ -10649,15 +10646,6 @@ EXPORT_SYMBOL(unregister_netdevice_queue); * we force a list_del() to make sure stack wont be corrupted later. */ void unregister_netdevice_many(struct list_head *head) -{ - if (!list_empty(head)) { - rollback_registered_many(head); - list_del(head); - } -} -EXPORT_SYMBOL(unregister_netdevice_many); - -static void rollback_registered_many(struct list_head *head) { struct net_device *dev, *tmp; LIST_HEAD(close_head); @@ -10665,6 +10653,9 @@ static void rollback_registered_many(struct list_head *head) BUG_ON(dev_boot_phase); ASSERT_RTNL(); + if (list_empty(head)) + return; + list_for_each_entry_safe(dev, tmp, head, unreg_list) { /* Some devices call without registering * for initialization unwind. Remove those @@ -10748,7 +10739,10 @@ static void rollback_registered_many(struct list_head *head) dev_put(dev); net_set_todo(dev); } + + list_del(head); } +EXPORT_SYMBOL(unregister_netdevice_many); /** * unregister_netdev - remove device from the kernel -- GitLab From 7baf2429a1a965369b0ce44efb6315cdd515aa9c Mon Sep 17 00:00:00 2001 From: wenxu Date: Tue, 19 Jan 2021 16:31:50 +0800 Subject: [PATCH 1145/2012] net/sched: cls_flower add CT_FLAGS_INVALID flag support This patch add the TCA_FLOWER_KEY_CT_FLAGS_INVALID flag to match the ct_state with invalid for conntrack. Signed-off-by: wenxu Acked-by: Marcelo Ricardo Leitner Link: https://lore.kernel.org/r/1611045110-682-1-git-send-email-wenxu@ucloud.cn Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 ++-- include/net/sch_generic.h | 1 + include/uapi/linux/pkt_cls.h | 1 + net/core/dev.c | 2 ++ net/core/flow_dissector.c | 13 +++++++++---- net/sched/act_ct.c | 1 + net/sched/cls_flower.c | 4 +++- 7 files changed, 19 insertions(+), 7 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 46f901adf1a80..186dad231e302 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1353,8 +1353,8 @@ void skb_flow_dissect_ct(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, - u16 *ctinfo_map, - size_t mapsize); + u16 *ctinfo_map, size_t mapsize, + bool post_ct); void skb_flow_dissect_tunnel_info(const struct sk_buff *skb, struct flow_dissector *flow_dissector, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 639e465a108f4..e7bee99aebce4 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -388,6 +388,7 @@ struct qdisc_skb_cb { #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; u16 mru; + bool post_ct; }; typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index ee95f42fb0ecf..709668e264b06 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -591,6 +591,7 @@ enum { TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED = 1 << 1, /* Part of an existing connection. */ TCA_FLOWER_KEY_CT_FLAGS_RELATED = 1 << 2, /* Related to an established connection. */ TCA_FLOWER_KEY_CT_FLAGS_TRACKED = 1 << 3, /* Conntrack has occurred. */ + TCA_FLOWER_KEY_CT_FLAGS_INVALID = 1 << 4, /* Conntrack is invalid. */ }; enum { diff --git a/net/core/dev.c b/net/core/dev.c index 00f970ba0248a..d9ce02e959926 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3878,6 +3878,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ qdisc_skb_cb(skb)->mru = 0; + qdisc_skb_cb(skb)->post_ct = false; mini_qdisc_bstats_cpu_update(miniq, skb); switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { @@ -4960,6 +4961,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, qdisc_skb_cb(skb)->pkt_len = skb->len; qdisc_skb_cb(skb)->mru = 0; + qdisc_skb_cb(skb)->post_ct = false; skb->tc_at_ingress = 1; mini_qdisc_bstats_cpu_update(miniq, skb); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2d70ded389aeb..c565c7a170910 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -237,9 +237,8 @@ skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type, void skb_flow_dissect_ct(const struct sk_buff *skb, struct flow_dissector *flow_dissector, - void *target_container, - u16 *ctinfo_map, - size_t mapsize) + void *target_container, u16 *ctinfo_map, + size_t mapsize, bool post_ct) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) struct flow_dissector_key_ct *key; @@ -251,13 +250,19 @@ skb_flow_dissect_ct(const struct sk_buff *skb, return; ct = nf_ct_get(skb, &ctinfo); - if (!ct) + if (!ct && !post_ct) return; key = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_CT, target_container); + if (!ct) { + key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED | + TCA_FLOWER_KEY_CT_FLAGS_INVALID; + return; + } + if (ctinfo < mapsize) key->ct_state = ctinfo_map[ctinfo]; #if IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 83a5c6722a069..b3442078aabcd 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1030,6 +1030,7 @@ out_push: out: tcf_action_update_bstats(&c->common, skb); + qdisc_skb_cb(skb)->post_ct = true; if (defrag) qdisc_skb_cb(skb)->pkt_len = skb->len; return retval; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 84f932532db7d..4a9297a89c770 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -302,6 +302,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct cls_fl_head *head = rcu_dereference_bh(tp->root); + bool post_ct = qdisc_skb_cb(skb)->post_ct; struct fl_flow_key skb_key; struct fl_flow_mask *mask; struct cls_fl_filter *f; @@ -318,7 +319,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key); skb_flow_dissect_ct(skb, &mask->dissector, &skb_key, fl_ct_info_to_flower_map, - ARRAY_SIZE(fl_ct_info_to_flower_map)); + ARRAY_SIZE(fl_ct_info_to_flower_map), + post_ct); skb_flow_dissect_hash(skb, &mask->dissector, &skb_key); skb_flow_dissect(skb, &mask->dissector, &skb_key, 0); -- GitLab From 4eb5d4a5b4d64bb9495141b2f323caf7524ef8a6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 16 Jan 2021 13:59:17 +0800 Subject: [PATCH 1146/2012] udp: not remove the CRC flag from dev features when need_csum is false In __skb_udp_tunnel_segment(), when it's a SCTP over VxLAN/GENEVE packet and need_csum is false, which means the outer udp checksum doesn't need to be computed, csum_start and csum_offset could be used by the inner SCTP CRC CSUM for SCTP HW CRC offload. So this patch is to not remove the CRC flag from dev features when need_csum is false. Signed-off-by: Xin Long Link: https://lore.kernel.org/r/1e81b700642498546eaa3f298e023fd7ad394f85.1610776757.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index ff39e94781bfb..1168d186cc438 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -68,8 +68,8 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)))); features &= skb->dev->hw_enc_features; - /* CRC checksum can't be handled by HW when it's a UDP tunneling packet. */ - features &= ~NETIF_F_SCTP_CRC; + if (need_csum) + features &= ~NETIF_F_SCTP_CRC; /* The only checksum offload we care about from here on out is the * outer one so strip the existing checksum feature flags and -- GitLab From 1a2367665ac2a1a7ad2119e4175287b66c2f09be Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 16 Jan 2021 12:44:11 +0800 Subject: [PATCH 1147/2012] ip_gre: remove CRC flag from dev features in gre_gso_segment This patch is to let it always do CRC checksum in sctp_gso_segment() by removing CRC flag from the dev features in gre_gso_segment() for SCTP over GRE, just as it does in Commit 527beb8ef9c0 ("udp: support sctp over udp in skb_udp_tunnel_segment") for SCTP over UDP. It could set csum/csum_start in GSO CB properly in sctp_gso_segment() after that commit, so it would do checksum with gso_make_checksum() in gre_gso_segment(), and Commit 622e32b7d4a6 ("net: gre: recompute gre csum for sctp over gre tunnels") can be reverted now. Note that when need_csum is false, we can still leave CRC checksum of SCTP to HW by not clearing this CRC flag if it's supported, as Jakub and Alex noticed. v1->v2: - improve the changelog. - fix "rev xmas tree" in varibles declaration. v2->v3: - remove CRC flag from dev features only when need_csum is true. Signed-off-by: Xin Long Link: https://lore.kernel.org/r/00439f24d5f69e2c6fa2beadc681d056c15c258f.1610772251.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/gre_offload.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index e0a2465758872..10bc49bde9a11 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -15,10 +15,10 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, netdev_features_t features) { int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); - bool need_csum, need_recompute_csum, gso_partial; struct sk_buff *segs = ERR_PTR(-EINVAL); u16 mac_offset = skb->mac_header; __be16 protocol = skb->protocol; + bool need_csum, gso_partial; u16 mac_len = skb->mac_len; int gre_offset, outer_hlen; @@ -41,10 +41,11 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, skb->protocol = skb->inner_protocol; need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM); - need_recompute_csum = skb->csum_not_inet; skb->encap_hdr_csum = need_csum; features &= skb->dev->hw_enc_features; + if (need_csum) + features &= ~NETIF_F_SCTP_CRC; /* segment inner packet. */ segs = skb_mac_gso_segment(skb, features); @@ -99,15 +100,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, } *(pcsum + 1) = 0; - if (need_recompute_csum && !skb_is_gso(skb)) { - __wsum csum; - - csum = skb_checksum(skb, gre_offset, - skb->len - gre_offset, 0); - *pcsum = csum_fold(csum); - } else { - *pcsum = gso_make_checksum(skb, 0); - } + *pcsum = gso_make_checksum(skb, 0); } while ((skb = skb->next)); out: return segs; -- GitLab From 9e8789c85deee047c5753e22f725d5fc10682468 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Tue, 19 Jan 2021 21:24:24 +0100 Subject: [PATCH 1148/2012] net: stmmac: dwmac-meson8b: fix the RX delay validation When has_prg_eth1_rgmii_rx_delay is true then we support RX delays between 0ps and 3000ps in 200ps steps. Swap the validation of the RX delay based on the has_prg_eth1_rgmii_rx_delay flag so the 200ps check is now applied correctly on G12A SoCs (instead of only allow 0ps or 2000ps on G12A, but 0..3000ps in 200ps steps on older SoCs which don't support that). Fixes: de94fc104d58ea ("net: stmmac: dwmac-meson8b: add support for the RGMII RX delay on G12A") Reported-by: Martijn van Deventer Signed-off-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20210119202424.591349-1-martin.blumenstingl@googlemail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 55152d7ba99ad..848e5c37746bb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -443,16 +443,16 @@ static int meson8b_dwmac_probe(struct platform_device *pdev) } if (dwmac->data->has_prg_eth1_rgmii_rx_delay) { - if (dwmac->rx_delay_ps != 0 && dwmac->rx_delay_ps != 2000) { + if (dwmac->rx_delay_ps > 3000 || dwmac->rx_delay_ps % 200) { dev_err(dwmac->dev, - "The only allowed RGMII RX delays values are: 0ps, 2000ps"); + "The RGMII RX delay range is 0..3000ps in 200ps steps"); ret = -EINVAL; goto err_remove_config_dt; } } else { - if (dwmac->rx_delay_ps > 3000 || dwmac->rx_delay_ps % 200) { + if (dwmac->rx_delay_ps != 0 && dwmac->rx_delay_ps != 2000) { dev_err(dwmac->dev, - "The RGMII RX delay range is 0..3000ps in 200ps steps"); + "The only allowed RGMII RX delays values are: 0ps, 2000ps"); ret = -EINVAL; goto err_remove_config_dt; } -- GitLab From 17cbe03872be8878e2f84047424350d036915df1 Mon Sep 17 00:00:00 2001 From: Levi Yun Date: Wed, 20 Jan 2021 21:28:18 +0900 Subject: [PATCH 1149/2012] mm/memblock: Fix typo in comment of memblock_phys_alloc_try_nid() memblock_phys_alloc_try_nid function's comments has typo NUMA as MUMA. Correct this typo. Signed-off-by: Levi Yun Reviewed-by: David Hildenbrand Signed-off-by: Mike Rapoport --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index d24bcfa88d2f1..1eaaec1e7687c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1427,7 +1427,7 @@ phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size, } /** - * memblock_phys_alloc_try_nid - allocate a memory block from specified MUMA node + * memblock_phys_alloc_try_nid - allocate a memory block from specified NUMA node * @size: size of memory block to be allocated in bytes * @align: alignment of the region and block's size * @nid: nid of the free area to find, %NUMA_NO_NODE for any node -- GitLab From e1663372d5ffaa3fc79b7932878c5c860f735412 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Fri, 8 Jan 2021 16:12:54 +0000 Subject: [PATCH 1150/2012] KVM: arm64: Compute TPIDR_EL2 ignoring MTE tag KASAN in HW_TAGS mode will store MTE tags in the top byte of the pointer. When computing the offset for TPIDR_EL2 we don't want anything in the top byte, so remove the tag to ensure the computation is correct no matter what the tag. Fixes: 94ab5b61ee16 ("kasan, arm64: enable CONFIG_KASAN_HW_TAGS") Signed-off-by: Steven Price [maz: added comment] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210108161254.53674-1-steven.price@arm.com --- arch/arm64/kvm/arm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 04c44853b103b..fe60d25c000e4 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1396,8 +1396,9 @@ static void cpu_init_hyp_mode(void) * Calculate the raw per-cpu offset without a translation from the * kernel's mapping to the linear mapping, and store it in tpidr_el2 * so that we can use adr_l to access per-cpu variables in EL2. + * Also drop the KASAN tag which gets in the way... */ - params->tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) - + params->tpidr_el2 = (unsigned long)kasan_reset_tag(this_cpu_ptr_nvhe_sym(__per_cpu_start)) - (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); params->mair_el2 = read_sysreg(mair_el1); -- GitLab From dcb3b06d9c34f33a249f65c08805461fb0c4325b Mon Sep 17 00:00:00 2001 From: Rouven Czerwinski Date: Tue, 5 Jan 2021 11:28:44 +0100 Subject: [PATCH 1151/2012] tee: optee: replace might_sleep with cond_resched might_sleep() is a debugging aid and triggers rescheduling only for certain kernel configurations. Replace with an explicit check and reschedule to work for all kernel configurations. Fixes the following trace: [ 572.945146] rcu: INFO: rcu_sched self-detected stall on CPU [ 572.949275] rcu: 0-....: (2099 ticks this GP) idle=572/1/0x40000002 softirq=7412/7412 fqs=974 [ 572.957964] (t=2100 jiffies g=10393 q=21) [ 572.962054] NMI backtrace for cpu 0 [ 572.965540] CPU: 0 PID: 165 Comm: xtest Not tainted 5.8.7 #1 [ 572.971188] Hardware name: STM32 (Device Tree Support) [ 572.976354] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 572.984080] [] (show_stack) from [] (dump_stack+0xc4/0xd8) [ 572.991300] [] (dump_stack) from [] (nmi_cpu_backtrace+0x90/0xc4) [ 572.999130] [] (nmi_cpu_backtrace) from [] (nmi_trigger_cpumask_backtrace+0xec/0x130) [ 573.008706] [] (nmi_trigger_cpumask_backtrace) from [] (rcu_dump_cpu_stacks+0xe8/0x110) [ 573.018453] [] (rcu_dump_cpu_stacks) from [] (rcu_sched_clock_irq+0x7fc/0xa88) [ 573.027416] [] (rcu_sched_clock_irq) from [] (update_process_times+0x30/0x8c) [ 573.036291] [] (update_process_times) from [] (tick_sched_timer+0x4c/0xa8) [ 573.044905] [] (tick_sched_timer) from [] (__hrtimer_run_queues+0x174/0x358) [ 573.053696] [] (__hrtimer_run_queues) from [] (hrtimer_interrupt+0x118/0x2bc) [ 573.062573] [] (hrtimer_interrupt) from [] (arch_timer_handler_virt+0x28/0x30) [ 573.071536] [] (arch_timer_handler_virt) from [] (handle_percpu_devid_irq+0x8c/0x240) [ 573.081109] [] (handle_percpu_devid_irq) from [] (generic_handle_irq+0x34/0x44) [ 573.090156] [] (generic_handle_irq) from [] (__handle_domain_irq+0x5c/0xb0) [ 573.098857] [] (__handle_domain_irq) from [] (gic_handle_irq+0x4c/0x90) [ 573.107209] [] (gic_handle_irq) from [] (__irq_svc+0x6c/0x90) [ 573.114682] Exception stack(0xd90dfcf8 to 0xd90dfd40) [ 573.119732] fce0: ffff0004 00000000 [ 573.127917] fd00: 00000000 00000000 00000000 00000000 00000000 00000000 d93493cc ffff0000 [ 573.136098] fd20: d2bc39c0 be926998 d90dfd58 d90dfd48 c09f3384 c01151f0 400d0013 ffffffff [ 573.144281] [] (__irq_svc) from [] (__arm_smccc_smc+0x10/0x20) [ 573.151854] [] (__arm_smccc_smc) from [] (optee_smccc_smc+0x3c/0x44) [ 573.159948] [] (optee_smccc_smc) from [] (optee_do_call_with_arg+0xb8/0x154) [ 573.168735] [] (optee_do_call_with_arg) from [] (optee_invoke_func+0x110/0x190) [ 573.177786] [] (optee_invoke_func) from [] (tee_ioctl+0x10b8/0x11c0) [ 573.185879] [] (tee_ioctl) from [] (ksys_ioctl+0xe0/0xa4c) [ 573.193101] [] (ksys_ioctl) from [] (ret_fast_syscall+0x0/0x54) [ 573.200750] Exception stack(0xd90dffa8 to 0xd90dfff0) [ 573.205803] ffa0: be926bf4 be926a78 00000003 8010a403 be926908 004e3cf8 [ 573.213987] ffc0: be926bf4 be926a78 00000000 00000036 be926908 be926918 be9269b0 bffdf0f8 [ 573.222162] ffe0: b6d76fb0 be9268fc b6d66621 b6c7e0d8 seen on STM32 DK2 with CONFIG_PREEMPT_NONE. Fixes: 9f02b8f61f29 ("tee: optee: add might_sleep for RPC requests") Signed-off-by: Rouven Czerwinski Tested-by: Sumit Garg [jw: added fixes tag + small adjustments in the code] Signed-off-by: Jens Wiklander --- drivers/tee/optee/call.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c index c981757ba0d40..780d7c4fd7565 100644 --- a/drivers/tee/optee/call.c +++ b/drivers/tee/optee/call.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -148,7 +149,8 @@ u32 optee_do_call_with_arg(struct tee_context *ctx, phys_addr_t parg) */ optee_cq_wait_for_completion(&optee->call_queue, &w); } else if (OPTEE_SMC_RETURN_IS_RPC(res.a0)) { - might_sleep(); + if (need_resched()) + cond_resched(); param.a0 = res.a0; param.a1 = res.a1; param.a2 = res.a2; -- GitLab From 9529aaa056edc76b3a41df616c71117ebe11e049 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 21 Jan 2021 10:56:36 +0000 Subject: [PATCH 1152/2012] KVM: arm64: Filter out v8.1+ events on v8.0 HW When running on v8.0 HW, make sure we don't try to advertise events in the 0x4000-0x403f range. Cc: stable@vger.kernel.org Fixes: 88865beca9062 ("KVM: arm64: Mask out filtered events in PCMEID{0,1}_EL1") Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210121105636.1478491-1-maz@kernel.org --- arch/arm64/kvm/pmu-emul.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 4ad66a532e38b..247422ac78a9e 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -788,7 +788,7 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) { unsigned long *bmap = vcpu->kvm->arch.pmu_filter; u64 val, mask = 0; - int base, i; + int base, i, nr_events; if (!pmceid1) { val = read_sysreg(pmceid0_el0); @@ -801,13 +801,17 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) if (!bmap) return val; + nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; + for (i = 0; i < 32; i += 8) { u64 byte; byte = bitmap_get_value8(bmap, base + i); mask |= byte << i; - byte = bitmap_get_value8(bmap, 0x4000 + base + i); - mask |= byte << (32 + i); + if (nr_events >= (0x4000 + base + 32)) { + byte = bitmap_get_value8(bmap, 0x4000 + base + i); + mask |= byte << (32 + i); + } } return val & mask; -- GitLab From e45122893a9870813f9bd7b4add4f613e6f29008 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 20 Jan 2021 21:09:48 -0800 Subject: [PATCH 1153/2012] x86/fpu: Add kernel_fpu_begin_mask() to selectively initialize state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, requesting kernel FPU access doesn't distinguish which parts of the extended ("FPU") state are needed. This is nice for simplicity, but there are a few cases in which it's suboptimal: - The vast majority of in-kernel FPU users want XMM/YMM/ZMM state but do not use legacy 387 state. These users want MXCSR initialized but don't care about the FPU control word. Skipping FNINIT would save time. (Empirically, FNINIT is several times slower than LDMXCSR.) - Code that wants MMX doesn't want or need MXCSR initialized. _mmx_memcpy(), for example, can run before CR4.OSFXSR gets set, and initializing MXCSR will fail because LDMXCSR generates an #UD when the aforementioned CR4 bit is not set. - Any future in-kernel users of XFD (eXtended Feature Disable)-capable dynamic states will need special handling. Add a more specific API that allows callers to specify exactly what they want. Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Tested-by: Krzysztof Piotr Olędzki Link: https://lkml.kernel.org/r/aff1cac8b8fc7ee900cf73e8f2369966621b053f.1611205691.git.luto@kernel.org --- arch/x86/include/asm/fpu/api.h | 15 +++++++++++++-- arch/x86/kernel/fpu/core.c | 9 +++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index a5aba4ab02248..67a4f1cb2aac5 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -16,14 +16,25 @@ * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It * disables preemption so be careful if you intend to use it for long periods * of time. - * If you intend to use the FPU in softirq you need to check first with + * If you intend to use the FPU in irq/softirq you need to check first with * irq_fpu_usable() if it is possible. */ -extern void kernel_fpu_begin(void); + +/* Kernel FPU states to initialize in kernel_fpu_begin_mask() */ +#define KFPU_387 _BITUL(0) /* 387 state will be initialized */ +#define KFPU_MXCSR _BITUL(1) /* MXCSR will be initialized */ + +extern void kernel_fpu_begin_mask(unsigned int kfpu_mask); extern void kernel_fpu_end(void); extern bool irq_fpu_usable(void); extern void fpregs_mark_activate(void); +/* Code that is unaware of kernel_fpu_begin_mask() can use this */ +static inline void kernel_fpu_begin(void) +{ + kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR); +} + /* * Use fpregs_lock() while editing CPU's FPU registers or fpu->state. * A context switch will (and softirq might) save CPU's FPU registers to diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index eb86a2b831b15..571220ac8beaa 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -121,7 +121,7 @@ int copy_fpregs_to_fpstate(struct fpu *fpu) } EXPORT_SYMBOL(copy_fpregs_to_fpstate); -void kernel_fpu_begin(void) +void kernel_fpu_begin_mask(unsigned int kfpu_mask) { preempt_disable(); @@ -141,13 +141,14 @@ void kernel_fpu_begin(void) } __cpu_invalidate_fpregs_state(); - if (boot_cpu_has(X86_FEATURE_XMM)) + /* Put sane initial values into the control registers. */ + if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM)) ldmxcsr(MXCSR_DEFAULT); - if (boot_cpu_has(X86_FEATURE_FPU)) + if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU)) asm volatile ("fninit"); } -EXPORT_SYMBOL_GPL(kernel_fpu_begin); +EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask); void kernel_fpu_end(void) { -- GitLab From 07be2fed5ee7b3a01e0b21c15814b590af9c1527 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 18 Dec 2020 19:47:13 +0100 Subject: [PATCH 1154/2012] net/fq_impl: bulk-free packets from a flow on overmemory This is similar to what sch_fq_codel does. It also amortizes the worst case cost of a follow-up patch that changes the selection of the biggest flow for dropping packets Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20201218184718.93650-1-nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/fq_impl.h | 55 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index e73d74d2fabfb..06d2a79233c9e 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -11,17 +11,25 @@ /* functions that are embedded into includer */ + +static void +__fq_adjust_removal(struct fq *fq, struct fq_flow *flow, unsigned int packets, + unsigned int bytes, unsigned int truesize) +{ + struct fq_tin *tin = flow->tin; + + tin->backlog_bytes -= bytes; + tin->backlog_packets -= packets; + flow->backlog -= bytes; + fq->backlog -= packets; + fq->memory_usage -= truesize; +} + static void fq_adjust_removal(struct fq *fq, struct fq_flow *flow, struct sk_buff *skb) { - struct fq_tin *tin = flow->tin; - - tin->backlog_bytes -= skb->len; - tin->backlog_packets--; - flow->backlog -= skb->len; - fq->backlog--; - fq->memory_usage -= skb->truesize; + __fq_adjust_removal(fq, flow, 1, skb->len, skb->truesize); } static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow) @@ -59,6 +67,34 @@ static struct sk_buff *fq_flow_dequeue(struct fq *fq, return skb; } +static int fq_flow_drop(struct fq *fq, struct fq_flow *flow, + fq_skb_free_t free_func) +{ + unsigned int packets = 0, bytes = 0, truesize = 0; + struct fq_tin *tin = flow->tin; + struct sk_buff *skb; + int pending; + + lockdep_assert_held(&fq->lock); + + pending = min_t(int, 32, skb_queue_len(&flow->queue) / 2); + do { + skb = __skb_dequeue(&flow->queue); + if (!skb) + break; + + packets++; + bytes += skb->len; + truesize += skb->truesize; + free_func(fq, tin, flow, skb); + } while (packets < pending); + + __fq_adjust_removal(fq, flow, packets, bytes, truesize); + fq_rejigger_backlog(fq, flow); + + return packets; +} + static struct sk_buff *fq_tin_dequeue(struct fq *fq, struct fq_tin *tin, fq_tin_dequeue_t dequeue_func) @@ -190,12 +226,9 @@ static void fq_tin_enqueue(struct fq *fq, if (!flow) return; - skb = fq_flow_dequeue(fq, flow); - if (!skb) + if (!fq_flow_drop(fq, flow, free_func)) return; - free_func(fq, flow->tin, flow, skb); - flow->tin->overlimit++; fq->overlimit++; if (oom) { -- GitLab From bf9009bf21b53501f2abb2f59f9314d85bde5fc9 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 18 Dec 2020 19:47:14 +0100 Subject: [PATCH 1155/2012] net/fq_impl: drop get_default_func, move default flow to fq_tin Simplifies the code and prepares for a rework of scanning for flows on overmemory drop. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20201218184718.93650-2-nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/fq.h | 1 + include/net/fq_impl.h | 11 +++++------ net/mac80211/ieee80211_i.h | 1 - net/mac80211/tx.c | 22 ++++------------------ 4 files changed, 10 insertions(+), 25 deletions(-) diff --git a/include/net/fq.h b/include/net/fq.h index e39f3f8d5f8a8..5df100b77099e 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -47,6 +47,7 @@ struct fq_flow { struct fq_tin { struct list_head new_flows; struct list_head old_flows; + struct fq_flow default_flow; u32 backlog_bytes; u32 backlog_packets; u32 overlimit; diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index 06d2a79233c9e..dd374c7f0fe52 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -151,8 +151,7 @@ static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb) static struct fq_flow *fq_flow_classify(struct fq *fq, struct fq_tin *tin, u32 idx, - struct sk_buff *skb, - fq_flow_get_default_t get_default_func) + struct sk_buff *skb) { struct fq_flow *flow; @@ -160,7 +159,7 @@ static struct fq_flow *fq_flow_classify(struct fq *fq, flow = &fq->flows[idx]; if (flow->tin && flow->tin != tin) { - flow = get_default_func(fq, tin, idx, skb); + flow = &tin->default_flow; tin->collisions++; fq->collisions++; } @@ -192,15 +191,14 @@ static void fq_recalc_backlog(struct fq *fq, static void fq_tin_enqueue(struct fq *fq, struct fq_tin *tin, u32 idx, struct sk_buff *skb, - fq_skb_free_t free_func, - fq_flow_get_default_t get_default_func) + fq_skb_free_t free_func) { struct fq_flow *flow; bool oom; lockdep_assert_held(&fq->lock); - flow = fq_flow_classify(fq, tin, idx, skb, get_default_func); + flow = fq_flow_classify(fq, tin, idx, skb); flow->tin = tin; flow->backlog += skb->len; @@ -331,6 +329,7 @@ static void fq_tin_init(struct fq_tin *tin) { INIT_LIST_HEAD(&tin->new_flows); INIT_LIST_HEAD(&tin->old_flows); + fq_flow_init(&tin->default_flow); } static int fq_init(struct fq *fq, int flows_cnt) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8bf9c0e974d62..c0f6168fdeedd 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -848,7 +848,6 @@ enum txq_info_flags { */ struct txq_info { struct fq_tin tin; - struct fq_flow def_flow; struct codel_vars def_cvars; struct codel_stats cstats; struct sk_buff_head frags; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ebb3228ce9718..935a9ce657490 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1309,7 +1309,7 @@ static struct sk_buff *codel_dequeue_func(struct codel_vars *cvars, fq = &local->fq; if (cvars == &txqi->def_cvars) - flow = &txqi->def_flow; + flow = &txqi->tin.default_flow; else flow = &fq->flows[cvars - local->cvars]; @@ -1352,7 +1352,7 @@ static struct sk_buff *fq_tin_dequeue_func(struct fq *fq, cparams = &local->cparams; } - if (flow == &txqi->def_flow) + if (flow == &tin->default_flow) cvars = &txqi->def_cvars; else cvars = &local->cvars[flow - fq->flows]; @@ -1379,17 +1379,6 @@ static void fq_skb_free_func(struct fq *fq, ieee80211_free_txskb(&local->hw, skb); } -static struct fq_flow *fq_flow_get_default_func(struct fq *fq, - struct fq_tin *tin, - int idx, - struct sk_buff *skb) -{ - struct txq_info *txqi; - - txqi = container_of(tin, struct txq_info, tin); - return &txqi->def_flow; -} - static void ieee80211_txq_enqueue(struct ieee80211_local *local, struct txq_info *txqi, struct sk_buff *skb) @@ -1402,8 +1391,7 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local, spin_lock_bh(&fq->lock); fq_tin_enqueue(fq, tin, flow_idx, skb, - fq_skb_free_func, - fq_flow_get_default_func); + fq_skb_free_func); spin_unlock_bh(&fq->lock); } @@ -1446,7 +1434,6 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, struct txq_info *txqi, int tid) { fq_tin_init(&txqi->tin); - fq_flow_init(&txqi->def_flow); codel_vars_init(&txqi->def_cvars); codel_stats_init(&txqi->cstats); __skb_queue_head_init(&txqi->frags); @@ -3283,8 +3270,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, */ tin = &txqi->tin; - flow = fq_flow_classify(fq, tin, flow_idx, skb, - fq_flow_get_default_func); + flow = fq_flow_classify(fq, tin, flow_idx, skb); head = skb_peek_tail(&flow->queue); if (!head || skb_is_gso(head)) goto out; -- GitLab From d7b649291782430904e17cde2ebfc90f76021ca5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 18 Dec 2020 19:47:15 +0100 Subject: [PATCH 1156/2012] net/fq_impl: do not maintain a backlog-sorted list of flows A sorted flow list is only needed to drop packets in the biggest flow when hitting the overmemory condition. By scanning flows only when needed, we can avoid paying the cost of maintaining the list under normal conditions In order to avoid scanning lots of empty flows and touching too many cold cache lines, a bitmap of flows with backlog is maintained Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20201218184718.93650-3-nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/fq.h | 10 ++-- include/net/fq_impl.h | 113 +++++++++++++++++++++++++----------------- net/mac80211/tx.c | 2 - 3 files changed, 71 insertions(+), 54 deletions(-) diff --git a/include/net/fq.h b/include/net/fq.h index 5df100b77099e..2eccbbd2b5595 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -19,8 +19,6 @@ struct fq_tin; * @flowchain: can be linked to fq_tin's new_flows or old_flows. Used for DRR++ * (deficit round robin) based round robin queuing similar to the one * found in net/sched/sch_fq_codel.c - * @backlogchain: can be linked to other fq_flow and fq. Used to keep track of - * fat flows and efficient head-dropping if packet limit is reached * @queue: sk_buff queue to hold packets * @backlog: number of bytes pending in the queue. The number of packets can be * found in @queue.qlen @@ -29,7 +27,6 @@ struct fq_tin; struct fq_flow { struct fq_tin *tin; struct list_head flowchain; - struct list_head backlogchain; struct sk_buff_head queue; u32 backlog; int deficit; @@ -47,6 +44,7 @@ struct fq_flow { struct fq_tin { struct list_head new_flows; struct list_head old_flows; + struct list_head tin_list; struct fq_flow default_flow; u32 backlog_bytes; u32 backlog_packets; @@ -60,14 +58,14 @@ struct fq_tin { /** * struct fq - main container for fair queuing purposes * - * @backlogs: linked to fq_flows. Used to maintain fat flows for efficient - * head-dropping when @backlog reaches @limit * @limit: max number of packets that can be queued across all flows * @backlog: number of packets queued across all flows */ struct fq { struct fq_flow *flows; - struct list_head backlogs; + unsigned long *flows_bitmap; + + struct list_head tin_backlog; spinlock_t lock; u32 flows_cnt; u32 limit; diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index dd374c7f0fe52..a5f67a2c0c730 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -17,12 +17,24 @@ __fq_adjust_removal(struct fq *fq, struct fq_flow *flow, unsigned int packets, unsigned int bytes, unsigned int truesize) { struct fq_tin *tin = flow->tin; + int idx; tin->backlog_bytes -= bytes; tin->backlog_packets -= packets; flow->backlog -= bytes; fq->backlog -= packets; fq->memory_usage -= truesize; + + if (flow->backlog) + return; + + if (flow == &tin->default_flow) { + list_del_init(&tin->tin_list); + return; + } + + idx = flow - fq->flows; + __clear_bit(idx, fq->flows_bitmap); } static void fq_adjust_removal(struct fq *fq, @@ -32,24 +44,6 @@ static void fq_adjust_removal(struct fq *fq, __fq_adjust_removal(fq, flow, 1, skb->len, skb->truesize); } -static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow) -{ - struct fq_flow *i; - - if (flow->backlog == 0) { - list_del_init(&flow->backlogchain); - } else { - i = flow; - - list_for_each_entry_continue(i, &fq->backlogs, backlogchain) - if (i->backlog < flow->backlog) - break; - - list_move_tail(&flow->backlogchain, - &i->backlogchain); - } -} - static struct sk_buff *fq_flow_dequeue(struct fq *fq, struct fq_flow *flow) { @@ -62,7 +56,6 @@ static struct sk_buff *fq_flow_dequeue(struct fq *fq, return NULL; fq_adjust_removal(fq, flow, skb); - fq_rejigger_backlog(fq, flow); return skb; } @@ -90,7 +83,6 @@ static int fq_flow_drop(struct fq *fq, struct fq_flow *flow, } while (packets < pending); __fq_adjust_removal(fq, flow, packets, bytes, truesize); - fq_rejigger_backlog(fq, flow); return packets; } @@ -170,22 +162,36 @@ static struct fq_flow *fq_flow_classify(struct fq *fq, return flow; } -static void fq_recalc_backlog(struct fq *fq, - struct fq_tin *tin, - struct fq_flow *flow) +static struct fq_flow *fq_find_fattest_flow(struct fq *fq) { - struct fq_flow *i; + struct fq_tin *tin; + struct fq_flow *flow = NULL; + u32 len = 0; + int i; - if (list_empty(&flow->backlogchain)) - list_add_tail(&flow->backlogchain, &fq->backlogs); + for_each_set_bit(i, fq->flows_bitmap, fq->flows_cnt) { + struct fq_flow *cur = &fq->flows[i]; + unsigned int cur_len; - i = flow; - list_for_each_entry_continue_reverse(i, &fq->backlogs, - backlogchain) - if (i->backlog > flow->backlog) - break; + cur_len = cur->backlog; + if (cur_len <= len) + continue; + + flow = cur; + len = cur_len; + } + + list_for_each_entry(tin, &fq->tin_backlog, tin_list) { + unsigned int cur_len = tin->default_flow.backlog; - list_move(&flow->backlogchain, &i->backlogchain); + if (cur_len <= len) + continue; + + flow = &tin->default_flow; + len = cur_len; + } + + return flow; } static void fq_tin_enqueue(struct fq *fq, @@ -200,6 +206,13 @@ static void fq_tin_enqueue(struct fq *fq, flow = fq_flow_classify(fq, tin, idx, skb); + if (!flow->backlog) { + if (flow != &tin->default_flow) + __set_bit(idx, fq->flows_bitmap); + else if (list_empty(&tin->tin_list)) + list_add(&tin->tin_list, &fq->tin_backlog); + } + flow->tin = tin; flow->backlog += skb->len; tin->backlog_bytes += skb->len; @@ -207,8 +220,6 @@ static void fq_tin_enqueue(struct fq *fq, fq->memory_usage += skb->truesize; fq->backlog++; - fq_recalc_backlog(fq, tin, flow); - if (list_empty(&flow->flowchain)) { flow->deficit = fq->quantum; list_add_tail(&flow->flowchain, @@ -218,9 +229,7 @@ static void fq_tin_enqueue(struct fq *fq, __skb_queue_tail(&flow->queue, skb); oom = (fq->memory_usage > fq->memory_limit); while (fq->backlog > fq->limit || oom) { - flow = list_first_entry_or_null(&fq->backlogs, - struct fq_flow, - backlogchain); + flow = fq_find_fattest_flow(fq); if (!flow) return; @@ -255,8 +264,6 @@ static void fq_flow_filter(struct fq *fq, fq_adjust_removal(fq, flow, skb); free_func(fq, tin, flow, skb); } - - fq_rejigger_backlog(fq, flow); } static void fq_tin_filter(struct fq *fq, @@ -279,16 +286,18 @@ static void fq_flow_reset(struct fq *fq, struct fq_flow *flow, fq_skb_free_t free_func) { + struct fq_tin *tin = flow->tin; struct sk_buff *skb; while ((skb = fq_flow_dequeue(fq, flow))) - free_func(fq, flow->tin, flow, skb); + free_func(fq, tin, flow, skb); - if (!list_empty(&flow->flowchain)) + if (!list_empty(&flow->flowchain)) { list_del_init(&flow->flowchain); - - if (!list_empty(&flow->backlogchain)) - list_del_init(&flow->backlogchain); + if (list_empty(&tin->new_flows) && + list_empty(&tin->old_flows)) + list_del_init(&tin->tin_list); + } flow->tin = NULL; @@ -314,6 +323,7 @@ static void fq_tin_reset(struct fq *fq, fq_flow_reset(fq, flow, free_func); } + WARN_ON_ONCE(!list_empty(&tin->tin_list)); WARN_ON_ONCE(tin->backlog_bytes); WARN_ON_ONCE(tin->backlog_packets); } @@ -321,7 +331,6 @@ static void fq_tin_reset(struct fq *fq, static void fq_flow_init(struct fq_flow *flow) { INIT_LIST_HEAD(&flow->flowchain); - INIT_LIST_HEAD(&flow->backlogchain); __skb_queue_head_init(&flow->queue); } @@ -329,6 +338,7 @@ static void fq_tin_init(struct fq_tin *tin) { INIT_LIST_HEAD(&tin->new_flows); INIT_LIST_HEAD(&tin->old_flows); + INIT_LIST_HEAD(&tin->tin_list); fq_flow_init(&tin->default_flow); } @@ -337,8 +347,8 @@ static int fq_init(struct fq *fq, int flows_cnt) int i; memset(fq, 0, sizeof(fq[0])); - INIT_LIST_HEAD(&fq->backlogs); spin_lock_init(&fq->lock); + INIT_LIST_HEAD(&fq->tin_backlog); fq->flows_cnt = max_t(u32, flows_cnt, 1); fq->quantum = 300; fq->limit = 8192; @@ -348,6 +358,14 @@ static int fq_init(struct fq *fq, int flows_cnt) if (!fq->flows) return -ENOMEM; + fq->flows_bitmap = kcalloc(BITS_TO_LONGS(fq->flows_cnt), sizeof(long), + GFP_KERNEL); + if (!fq->flows_bitmap) { + kvfree(fq->flows); + fq->flows = NULL; + return -ENOMEM; + } + for (i = 0; i < fq->flows_cnt; i++) fq_flow_init(&fq->flows[i]); @@ -364,6 +382,9 @@ static void fq_reset(struct fq *fq, kvfree(fq->flows); fq->flows = NULL; + + kfree(fq->flows_bitmap); + fq->flows_bitmap = NULL; } #endif diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 935a9ce657490..d981647c28632 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3337,8 +3337,6 @@ out_recalc: if (head->len != orig_len) { flow->backlog += head->len - orig_len; tin->backlog_bytes += head->len - orig_len; - - fq_recalc_backlog(fq, tin, flow); } out: spin_unlock_bh(&fq->lock); -- GitLab From 80a915ec4427f0083829f7e6518ee9f21521ee1e Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 18 Dec 2020 19:47:18 +0100 Subject: [PATCH 1157/2012] mac80211: add rx decapsulation offload support This allows drivers to pass 802.3 frames to mac80211, with some restrictions: - the skb must be passed with a valid sta - fast-rx needs to be active for the sta - monitor mode needs to be disabled mac80211 will tell the driver when it is safe to enable rx decap offload for a particular station. In order to implement support, a driver must: - call ieee80211_hw_set(hw, SUPPORTS_RX_DECAP_OFFLOAD) - implement ops->sta_set_decap_offload - mark 802.3 frames with RX_FLAG_8023 If it doesn't want to enable offload for some vif types, it can mask out IEEE80211_OFFLOAD_DECAP_ENABLED in vif->offload_flags from within the .add_interface or .update_vif_offload driver ops Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20201218184718.93650-6-nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/mac80211.h | 16 +++ net/mac80211/debugfs.c | 1 + net/mac80211/debugfs_sta.c | 1 + net/mac80211/driver-ops.h | 16 +++ net/mac80211/iface.c | 17 ++- net/mac80211/rx.c | 243 ++++++++++++++++++++++++------------- net/mac80211/sta_info.h | 2 + net/mac80211/trace.h | 18 ++- 8 files changed, 225 insertions(+), 89 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2bdbf62f4ecd7..176fe0d9f67f8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1296,6 +1296,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * the "0-length PSDU" field included there. The value for it is * in &struct ieee80211_rx_status. Note that if this value isn't * known the frame shouldn't be reported. + * @RX_FLAG_8023: the frame has an 802.3 header (decap offload performed by + * hardware or driver) */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = BIT(0), @@ -1328,6 +1330,7 @@ enum mac80211_rx_flags { RX_FLAG_RADIOTAP_HE_MU = BIT(27), RX_FLAG_RADIOTAP_LSIG = BIT(28), RX_FLAG_NO_PSDU = BIT(29), + RX_FLAG_8023 = BIT(30), }; /** @@ -1649,11 +1652,15 @@ enum ieee80211_vif_flags { * The driver supports sending frames passed as 802.3 frames by mac80211. * It must also support sending 802.11 packets for the same interface. * @IEEE80211_OFFLOAD_ENCAP_4ADDR: support 4-address mode encapsulation offload + * @IEEE80211_OFFLOAD_DECAP_ENABLED: rx encapsulation offload is enabled + * The driver supports passing received 802.11 frames as 802.3 frames to + * mac80211. */ enum ieee80211_offload_flags { IEEE80211_OFFLOAD_ENCAP_ENABLED = BIT(0), IEEE80211_OFFLOAD_ENCAP_4ADDR = BIT(1), + IEEE80211_OFFLOAD_DECAP_ENABLED = BIT(2), }; /** @@ -2389,6 +2396,9 @@ struct ieee80211_txq { * @IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD: Hardware supports tx encapsulation * offload * + * @IEEE80211_HW_SUPPORTS_RX_DECAP_OFFLOAD: Hardware supports rx decapsulation + * offload + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2442,6 +2452,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID, IEEE80211_HW_AMPDU_KEYBORDER_SUPPORT, IEEE80211_HW_SUPPORTS_TX_ENCAP_OFFLOAD, + IEEE80211_HW_SUPPORTS_RX_DECAP_OFFLOAD, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -3881,6 +3892,8 @@ enum ieee80211_reconfig_type { * @sta_set_4addr: Called to notify the driver when a station starts/stops using * 4-address mode * @set_sar_specs: Update the SAR (TX power) settings. + * @sta_set_decap_offload: Called to notify the driver when a station is allowed + * to use rx decapsulation offload */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -4198,6 +4211,9 @@ struct ieee80211_ops { struct ieee80211_sta *sta, bool enabled); int (*set_sar_specs)(struct ieee80211_hw *hw, const struct cfg80211_sar_specs *sar); + void (*sta_set_decap_offload)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, bool enabled); }; /** diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 9e723d9434219..08a6f6644dc4e 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -405,6 +405,7 @@ static const char *hw_flag_names[] = { FLAG(SUPPORTS_ONLY_HE_MULTI_BSSID), FLAG(AMPDU_KEYBORDER_SUPPORT), FLAG(SUPPORTS_TX_ENCAP_OFFLOAD), + FLAG(SUPPORTS_RX_DECAP_OFFLOAD), #undef FLAG }; diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index eb4bb79d936ad..5a27c61a7b388 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -79,6 +79,7 @@ static const char * const sta_flag_names[] = { FLAG(MPSP_RECIPIENT), FLAG(PS_DELIVER), FLAG(USES_ENCRYPTION), + FLAG(DECAP_OFFLOAD), #undef FLAG }; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index bcdfd19a596be..604ca59937f0f 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1413,4 +1413,20 @@ static inline void drv_sta_set_4addr(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline void drv_sta_set_decap_offload(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + bool enabled) +{ + sdata = get_bss_sdata(sdata); + if (!check_sdata_in_driver(sdata)) + return; + + trace_drv_sta_set_decap_offload(local, sdata, sta, enabled); + if (local->ops->sta_set_decap_offload) + local->ops->sta_set_decap_offload(&local->hw, &sdata->vif, sta, + enabled); + trace_drv_return_void(local); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 3b9ec4ef81c3b..1b44690823aa5 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -765,7 +765,7 @@ static const struct net_device_ops ieee80211_dataif_8023_ops = { .ndo_get_stats64 = ieee80211_get_stats64, }; -static bool ieee80211_iftype_supports_encap_offload(enum nl80211_iftype iftype) +static bool ieee80211_iftype_supports_hdr_offload(enum nl80211_iftype iftype) { switch (iftype) { /* P2P GO and client are mapped to AP/STATION types */ @@ -785,7 +785,7 @@ static bool ieee80211_set_sdata_offload_flags(struct ieee80211_sub_if_data *sdat flags = sdata->vif.offload_flags; if (ieee80211_hw_check(&local->hw, SUPPORTS_TX_ENCAP_OFFLOAD) && - ieee80211_iftype_supports_encap_offload(sdata->vif.type)) { + ieee80211_iftype_supports_hdr_offload(sdata->vif.type)) { flags |= IEEE80211_OFFLOAD_ENCAP_ENABLED; if (!ieee80211_hw_check(&local->hw, SUPPORTS_TX_FRAG) && @@ -798,10 +798,21 @@ static bool ieee80211_set_sdata_offload_flags(struct ieee80211_sub_if_data *sdat flags &= ~IEEE80211_OFFLOAD_ENCAP_ENABLED; } + if (ieee80211_hw_check(&local->hw, SUPPORTS_RX_DECAP_OFFLOAD) && + ieee80211_iftype_supports_hdr_offload(sdata->vif.type)) { + flags |= IEEE80211_OFFLOAD_DECAP_ENABLED; + + if (local->monitors) + flags &= ~IEEE80211_OFFLOAD_DECAP_ENABLED; + } else { + flags &= ~IEEE80211_OFFLOAD_DECAP_ENABLED; + } + if (sdata->vif.offload_flags == flags) return false; sdata->vif.offload_flags = flags; + ieee80211_check_fast_rx_iface(sdata); return true; } @@ -819,7 +830,7 @@ static void ieee80211_set_vif_encap_ops(struct ieee80211_sub_if_data *sdata) } if (!ieee80211_hw_check(&local->hw, SUPPORTS_TX_ENCAP_OFFLOAD) || - !ieee80211_iftype_supports_encap_offload(bss->vif.type)) + !ieee80211_iftype_supports_hdr_offload(bss->vif.type)) return; enabled = bss->vif.offload_flags & IEEE80211_OFFLOAD_ENCAP_ENABLED; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 972895e9f22dc..c1343c028b767 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4095,7 +4095,9 @@ void ieee80211_check_fast_rx(struct sta_info *sta) .vif_type = sdata->vif.type, .control_port_protocol = sdata->control_port_protocol, }, *old, *new = NULL; + bool set_offload = false; bool assign = false; + bool offload; /* use sparse to check that we don't return without updating */ __acquire(check_fast_rx); @@ -4208,6 +4210,17 @@ void ieee80211_check_fast_rx(struct sta_info *sta) if (assign) new = kmemdup(&fastrx, sizeof(fastrx), GFP_KERNEL); + offload = assign && + (sdata->vif.offload_flags & IEEE80211_OFFLOAD_DECAP_ENABLED); + + if (offload) + set_offload = !test_and_set_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD); + else + set_offload = test_and_clear_sta_flag(sta, WLAN_STA_DECAP_OFFLOAD); + + if (set_offload) + drv_sta_set_decap_offload(local, sdata, &sta->sta, assign); + spin_lock_bh(&sta->lock); old = rcu_dereference_protected(sta->fast_rx, true); rcu_assign_pointer(sta->fast_rx, new); @@ -4254,6 +4267,104 @@ void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->sta_mtx); } +static void ieee80211_rx_8023(struct ieee80211_rx_data *rx, + struct ieee80211_fast_rx *fast_rx, + int orig_len) +{ + struct ieee80211_sta_rx_stats *stats; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); + struct sta_info *sta = rx->sta; + struct sk_buff *skb = rx->skb; + void *sa = skb->data + ETH_ALEN; + void *da = skb->data; + + stats = &sta->rx_stats; + if (fast_rx->uses_rss) + stats = this_cpu_ptr(sta->pcpu_rx_stats); + + /* statistics part of ieee80211_rx_h_sta_process() */ + if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { + stats->last_signal = status->signal; + if (!fast_rx->uses_rss) + ewma_signal_add(&sta->rx_stats_avg.signal, + -status->signal); + } + + if (status->chains) { + int i; + + stats->chains = status->chains; + for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { + int signal = status->chain_signal[i]; + + if (!(status->chains & BIT(i))) + continue; + + stats->chain_signal_last[i] = signal; + if (!fast_rx->uses_rss) + ewma_signal_add(&sta->rx_stats_avg.chain_signal[i], + -signal); + } + } + /* end of statistics */ + + stats->last_rx = jiffies; + stats->last_rate = sta_stats_encode_rate(status); + + stats->fragments++; + stats->packets++; + + skb->dev = fast_rx->dev; + + dev_sw_netstats_rx_add(fast_rx->dev, skb->len); + + /* The seqno index has the same property as needed + * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS + * for non-QoS-data frames. Here we know it's a data + * frame, so count MSDUs. + */ + u64_stats_update_begin(&stats->syncp); + stats->msdu[rx->seqno_idx]++; + stats->bytes += orig_len; + u64_stats_update_end(&stats->syncp); + + if (fast_rx->internal_forward) { + struct sk_buff *xmit_skb = NULL; + if (is_multicast_ether_addr(da)) { + xmit_skb = skb_copy(skb, GFP_ATOMIC); + } else if (!ether_addr_equal(da, sa) && + sta_info_get(rx->sdata, da)) { + xmit_skb = skb; + skb = NULL; + } + + if (xmit_skb) { + /* + * Send to wireless media and increase priority by 256 + * to keep the received priority instead of + * reclassifying the frame (see cfg80211_classify8021d). + */ + xmit_skb->priority += 256; + xmit_skb->protocol = htons(ETH_P_802_3); + skb_reset_network_header(xmit_skb); + skb_reset_mac_header(xmit_skb); + dev_queue_xmit(xmit_skb); + } + + if (!skb) + return; + } + + /* deliver to local stack */ + skb->protocol = eth_type_trans(skb, fast_rx->dev); + memset(skb->cb, 0, sizeof(skb->cb)); + if (rx->list) + list_add_tail(&skb->list, rx->list); + else + netif_receive_skb(skb); + +} + static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, struct ieee80211_fast_rx *fast_rx) { @@ -4274,9 +4385,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, } addrs __aligned(2); struct ieee80211_sta_rx_stats *stats = &sta->rx_stats; - if (fast_rx->uses_rss) - stats = this_cpu_ptr(sta->pcpu_rx_stats); - /* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write * to a common data structure; drivers can implement that per queue * but we don't have that information in mac80211 @@ -4350,32 +4458,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, pskb_trim(skb, skb->len - fast_rx->icv_len)) goto drop; - /* statistics part of ieee80211_rx_h_sta_process() */ - if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { - stats->last_signal = status->signal; - if (!fast_rx->uses_rss) - ewma_signal_add(&sta->rx_stats_avg.signal, - -status->signal); - } - - if (status->chains) { - int i; - - stats->chains = status->chains; - for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { - int signal = status->chain_signal[i]; - - if (!(status->chains & BIT(i))) - continue; - - stats->chain_signal_last[i] = signal; - if (!fast_rx->uses_rss) - ewma_signal_add(&sta->rx_stats_avg.chain_signal[i], - -signal); - } - } - /* end of statistics */ - if (rx->key && !ieee80211_has_protected(hdr->frame_control)) goto drop; @@ -4387,12 +4469,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, return true; } - stats->last_rx = jiffies; - stats->last_rate = sta_stats_encode_rate(status); - - stats->fragments++; - stats->packets++; - /* do the header conversion - first grab the addresses */ ether_addr_copy(addrs.da, skb->data + fast_rx->da_offs); ether_addr_copy(addrs.sa, skb->data + fast_rx->sa_offs); @@ -4401,58 +4477,14 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, /* push the addresses in front */ memcpy(skb_push(skb, sizeof(addrs)), &addrs, sizeof(addrs)); - skb->dev = fast_rx->dev; - - dev_sw_netstats_rx_add(fast_rx->dev, skb->len); - - /* The seqno index has the same property as needed - * for the rx_msdu field, i.e. it is IEEE80211_NUM_TIDS - * for non-QoS-data frames. Here we know it's a data - * frame, so count MSDUs. - */ - u64_stats_update_begin(&stats->syncp); - stats->msdu[rx->seqno_idx]++; - stats->bytes += orig_len; - u64_stats_update_end(&stats->syncp); - - if (fast_rx->internal_forward) { - struct sk_buff *xmit_skb = NULL; - if (is_multicast_ether_addr(addrs.da)) { - xmit_skb = skb_copy(skb, GFP_ATOMIC); - } else if (!ether_addr_equal(addrs.da, addrs.sa) && - sta_info_get(rx->sdata, addrs.da)) { - xmit_skb = skb; - skb = NULL; - } - - if (xmit_skb) { - /* - * Send to wireless media and increase priority by 256 - * to keep the received priority instead of - * reclassifying the frame (see cfg80211_classify8021d). - */ - xmit_skb->priority += 256; - xmit_skb->protocol = htons(ETH_P_802_3); - skb_reset_network_header(xmit_skb); - skb_reset_mac_header(xmit_skb); - dev_queue_xmit(xmit_skb); - } - - if (!skb) - return true; - } - - /* deliver to local stack */ - skb->protocol = eth_type_trans(skb, fast_rx->dev); - memset(skb->cb, 0, sizeof(skb->cb)); - if (rx->list) - list_add_tail(&skb->list, rx->list); - else - netif_receive_skb(skb); + ieee80211_rx_8023(rx, fast_rx, orig_len); return true; drop: dev_kfree_skb(skb); + if (fast_rx->uses_rss) + stats = this_cpu_ptr(sta->pcpu_rx_stats); + stats->dropped++; return true; } @@ -4506,6 +4538,43 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, return true; } +static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw, + struct ieee80211_sta *pubsta, + struct sk_buff *skb, + struct list_head *list) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_fast_rx *fast_rx; + struct ieee80211_rx_data rx; + + memset(&rx, 0, sizeof(rx)); + rx.skb = skb; + rx.local = local; + rx.list = list; + + I802_DEBUG_INC(local->dot11ReceivedFragmentCount); + + /* drop frame if too short for header */ + if (skb->len < sizeof(struct ethhdr)) + goto drop; + + if (!pubsta) + goto drop; + + rx.sta = container_of(pubsta, struct sta_info, sta); + rx.sdata = rx.sta->sdata; + + fast_rx = rcu_dereference(rx.sta->fast_rx); + if (!fast_rx) + goto drop; + + ieee80211_rx_8023(&rx, fast_rx, skb->len); + return; + +drop: + dev_kfree_skb(skb); +} + /* * This is the actual Rx frames handler. as it belongs to Rx path it must * be called with rcu_read_lock protection. @@ -4737,13 +4806,17 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, * if it was previously present. * Also, frames with less than 16 bytes are dropped. */ - skb = ieee80211_rx_monitor(local, skb, rate); + if (!(status->flag & RX_FLAG_8023)) + skb = ieee80211_rx_monitor(local, skb, rate); if (skb) { ieee80211_tpt_led_trig_rx(local, ((struct ieee80211_hdr *)skb->data)->frame_control, skb->len); - __ieee80211_rx_handle_packet(hw, pubsta, skb, list); + if (status->flag & RX_FLAG_8023) + __ieee80211_rx_handle_8023(hw, pubsta, skb, list); + else + __ieee80211_rx_handle_packet(hw, pubsta, skb, list); } kcov_remote_stop(); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 7afd07636b81d..78b9d0c7cc583 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -71,6 +71,7 @@ * until pending frames are delivered * @WLAN_STA_USES_ENCRYPTION: This station was configured for encryption, * so drop all packets without a key later. + * @WLAN_STA_DECAP_OFFLOAD: This station uses rx decap offload * * @NUM_WLAN_STA_FLAGS: number of defined flags */ @@ -102,6 +103,7 @@ enum ieee80211_sta_info_flags { WLAN_STA_MPSP_RECIPIENT, WLAN_STA_PS_DELIVER, WLAN_STA_USES_ENCRYPTION, + WLAN_STA_DECAP_OFFLOAD, NUM_WLAN_STA_FLAGS, }; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 601322e169579..8fcc390564029 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2761,7 +2761,7 @@ DEFINE_EVENT(local_sdata_addr_evt, drv_update_vif_offload, TP_ARGS(local, sdata) ); -TRACE_EVENT(drv_sta_set_4addr, +DECLARE_EVENT_CLASS(sta_flag_evt, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_sta *sta, bool enabled), @@ -2788,6 +2788,22 @@ TRACE_EVENT(drv_sta_set_4addr, ) ); +DEFINE_EVENT(sta_flag_evt, drv_sta_set_4addr, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, bool enabled), + + TP_ARGS(local, sdata, sta, enabled) +); + +DEFINE_EVENT(sta_flag_evt, drv_sta_set_decap_offload, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, bool enabled), + + TP_ARGS(local, sdata, sta, enabled) +); + #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- GitLab From 7dfe20ee92f681ab1342015254ddb77a18f40cdb Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 15 Jan 2021 12:33:29 -0800 Subject: [PATCH 1158/2012] ASoC: qcom: Fix number of HDMI RDMA channels on sc7180 Suspending/resuming with an HDMI dongle attached leads to crashes from an audio regmap. Unable to handle kernel paging request at virtual address ffffffc018068000 Mem abort info: ESR = 0x96000047 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000047 CM = 0, WnR = 1 swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000081b12000 [ffffffc018068000] pgd=0000000275d14003, pud=0000000275d14003, pmd=000000026365d003, pte=0000000000000000 Internal error: Oops: 96000047 [#1] PREEMPT SMP Call trace: regmap_mmio_write32le+0x2c/0x40 regmap_mmio_write+0x48/0x6c _regmap_bus_reg_write+0x34/0x44 _regmap_write+0x100/0x150 regcache_default_sync+0xc0/0x138 regcache_sync+0x188/0x26c lpass_platform_pcmops_resume+0x48/0x54 [snd_soc_lpass_platform] snd_soc_component_resume+0x28/0x40 soc_resume_deferred+0x6c/0x178 process_one_work+0x208/0x3c8 worker_thread+0x23c/0x3e8 kthread+0x144/0x178 ret_from_fork+0x10/0x18 Code: d503201f d50332bf f94002a8 8b344108 (b9000113) I can reliably reproduce this problem by running 'tail' on the registers file in debugfs for the hdmi regmap. # tail /sys/kernel/debug/regmap/62d87000.lpass-lpass_hdmi/registers [ 84.658733] Unable to handle kernel paging request at virtual address ffffffd0128e800c This crash happens because we're trying to read registers from the regmap beyond the length of the mapping created by ioremap(). The number of hdmi_rdma_channels determines the size of the regmap via this code in sound/soc/qcom/lpass-cpu.c: lpass_hdmi_regmap_config.max_register = LPAIF_HDMI_RDMAPER_REG(variant, variant->hdmi_rdma_channels); According to debugfs the size of the regmap is 0x68010 but according to the DTS file posted in [1] the size is only 0x68000 (see the first reg property of the lpass_cpu node). Let's change the number of channels to be 3 instead of 4 so the math works out to have a max register of 0x67010, nicely fitting inside of the region size of 0x68000. Note: I tried to bump up the size of the register region to the next page to include the 0x68010 register but then the tail command caused SErrors with an async abort, implying that the register region doesn't exist or it isn't clocked because the bus is telling us that the register read failed. I reduce the number of channels and played audio through the HDMI channel and it kept working so I think this is correct. Fixes: 2ad63dc8df6b ("ASoC: qcom: sc7180: Add support for audio over DP") Link: https://lore.kernel.org/r/1601448168-18396-2-git-send-email-srivasam@codeaurora.org [1] Cc: V Sujith Kumar Reddy Cc: Srinivasa Rao Cc: Srinivas Kandagatla Cc: Cheng-Yi Chiang Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20210115203329.846824-1-swboyd@chromium.org Signed-off-by: Mark Brown --- sound/soc/qcom/lpass-sc7180.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/qcom/lpass-sc7180.c b/sound/soc/qcom/lpass-sc7180.c index 8c168d3c589e9..735c9dac28f26 100644 --- a/sound/soc/qcom/lpass-sc7180.c +++ b/sound/soc/qcom/lpass-sc7180.c @@ -171,7 +171,7 @@ static struct lpass_variant sc7180_data = { .rdma_channels = 5, .hdmi_rdma_reg_base = 0x64000, .hdmi_rdma_reg_stride = 0x1000, - .hdmi_rdma_channels = 4, + .hdmi_rdma_channels = 3, .dmactl_audif_start = 1, .wrdma_reg_base = 0x18000, .wrdma_reg_stride = 0x1000, -- GitLab From 51dfb6ca3728bd0a0a3c23776a12d2a15a1d2457 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 20 Jan 2021 23:58:44 +0300 Subject: [PATCH 1159/2012] regulator: consumer: Add missing stubs to regulator/consumer.h Add missing stubs to regulator/consumer.h in order to fix COMPILE_TEST of the kernel. In particular this should fix compile-testing of OPP core because of a missing stub for regulator_sync_voltage(). Reported-by: kernel test robot Signed-off-by: Dmitry Osipenko Link: https://lore.kernel.org/r/20210120205844.12658-1-digetx@gmail.com Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 2024944fd2f78..20e84a84fb779 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -331,6 +331,12 @@ regulator_get_exclusive(struct device *dev, const char *id) return ERR_PTR(-ENODEV); } +static inline struct regulator *__must_check +devm_regulator_get_exclusive(struct device *dev, const char *id) +{ + return ERR_PTR(-ENODEV); +} + static inline struct regulator *__must_check regulator_get_optional(struct device *dev, const char *id) { @@ -486,6 +492,11 @@ static inline int regulator_get_voltage(struct regulator *regulator) return -EINVAL; } +static inline int regulator_sync_voltage(struct regulator *regulator) +{ + return -EINVAL; +} + static inline int regulator_is_supported_voltage(struct regulator *regulator, int min_uV, int max_uV) { @@ -578,6 +589,25 @@ static inline int devm_regulator_unregister_notifier(struct regulator *regulator return 0; } +static inline int regulator_suspend_enable(struct regulator_dev *rdev, + suspend_state_t state) +{ + return -EINVAL; +} + +static inline int regulator_suspend_disable(struct regulator_dev *rdev, + suspend_state_t state) +{ + return -EINVAL; +} + +static inline int regulator_set_suspend_voltage(struct regulator *regulator, + int min_uV, int max_uV, + suspend_state_t state) +{ + return -EINVAL; +} + static inline void *regulator_get_drvdata(struct regulator *regulator) { return NULL; -- GitLab From f1864e193dc04c3326522b4c0aa79b1d3653bbf0 Mon Sep 17 00:00:00 2001 From: Philipp Borgers Date: Sat, 19 Dec 2020 18:07:10 +0100 Subject: [PATCH 1160/2012] mac80211: add LDPC encoding to ieee80211_parse_tx_radiotap This patch adds support for LDPC encoding to the radiotap tx parse function. Piror to this change adding the LDPC flag to the radiotap header did not encode frames with LDPC. Signed-off-by: Philipp Borgers Link: https://lore.kernel.org/r/20201219170710.11706-1-borgers@mi.fu-berlin.de Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d981647c28632..45536185d8d7f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2120,6 +2120,10 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, if (mcs_known & IEEE80211_RADIOTAP_MCS_HAVE_BW && mcs_bw == IEEE80211_RADIOTAP_MCS_BW_40) rate_flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; + + if (mcs_known & IEEE80211_RADIOTAP_MCS_HAVE_FEC && + mcs_flags & IEEE80211_RADIOTAP_MCS_FEC_LDPC) + info->flags |= IEEE80211_TX_CTL_LDPC; break; case IEEE80211_RADIOTAP_VHT: -- GitLab From dcf3c8fb32ddbfa3b8227db38aa6746405bd4527 Mon Sep 17 00:00:00 2001 From: Shay Bar Date: Tue, 22 Dec 2020 08:47:14 +0200 Subject: [PATCH 1161/2012] mac80211: 160MHz with extended NSS BW in CSA Upon receiving CSA with 160MHz extended NSS BW from associated AP, STA should set the HT operation_mode based on new_center_freq_seg1 because it is later used as ccfs2 in ieee80211_chandef_vht_oper(). Signed-off-by: Aviad Brikman Signed-off-by: Shay Bar Link: https://lore.kernel.org/r/20201222064714.24888-1-shay.bar@celeno.com Signed-off-by: Johannes Berg --- net/mac80211/spectmgmt.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index ae1cb2c687224..76747bfdaddd0 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -133,16 +133,20 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, } if (wide_bw_chansw_ie) { + u8 new_seg1 = wide_bw_chansw_ie->new_center_freq_seg1; struct ieee80211_vht_operation vht_oper = { .chan_width = wide_bw_chansw_ie->new_channel_width, .center_freq_seg0_idx = wide_bw_chansw_ie->new_center_freq_seg0, - .center_freq_seg1_idx = - wide_bw_chansw_ie->new_center_freq_seg1, + .center_freq_seg1_idx = new_seg1, /* .basic_mcs_set doesn't matter */ }; - struct ieee80211_ht_operation ht_oper = {}; + struct ieee80211_ht_operation ht_oper = { + .operation_mode = + cpu_to_le16(new_seg1 << + IEEE80211_HT_OP_MODE_CCFS2_SHIFT), + }; /* default, for the case of IEEE80211_VHT_CHANWIDTH_USE_HT, * to the previously parsed chandef -- GitLab From 67de8dca50c027ca0fa3b62a488ee5035036a0da Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 20 Jan 2021 21:09:49 -0800 Subject: [PATCH 1162/2012] x86/mmx: Use KFPU_387 for MMX string operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default kernel_fpu_begin() doesn't work on systems that support XMM but haven't yet enabled CR4.OSFXSR. This causes crashes when _mmx_memcpy() is called too early because LDMXCSR generates #UD when the aforementioned bit is clear. Fix it by using kernel_fpu_begin_mask(KFPU_387) explicitly. Fixes: 7ad816762f9b ("x86/fpu: Reset MXCSR to default in kernel_fpu_begin()") Reported-by: Krzysztof Mazur Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Tested-by: Krzysztof Piotr Olędzki Tested-by: Krzysztof Mazur Cc: Link: https://lkml.kernel.org/r/e7bf21855fe99e5f3baa27446e32623358f69e8d.1611205691.git.luto@kernel.org --- arch/x86/lib/mmx_32.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c index 4321fa02e18df..419365c48b2ad 100644 --- a/arch/x86/lib/mmx_32.c +++ b/arch/x86/lib/mmx_32.c @@ -26,6 +26,16 @@ #include #include +/* + * Use KFPU_387. MMX instructions are not affected by MXCSR, + * but both AMD and Intel documentation states that even integer MMX + * operations will result in #MF if an exception is pending in FCW. + * + * EMMS is not needed afterwards because, after calling kernel_fpu_end(), + * any subsequent user of the 387 stack will reinitialize it using + * KFPU_387. + */ + void *_mmx_memcpy(void *to, const void *from, size_t len) { void *p; @@ -37,7 +47,7 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) p = to; i = len >> 6; /* len/64 */ - kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387); __asm__ __volatile__ ( "1: prefetch (%0)\n" /* This set is 28 bytes */ @@ -127,7 +137,7 @@ static void fast_clear_page(void *page) { int i; - kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387); __asm__ __volatile__ ( " pxor %%mm0, %%mm0\n" : : @@ -160,7 +170,7 @@ static void fast_copy_page(void *to, void *from) { int i; - kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387); /* * maybe the prefetch stuff can go before the expensive fnsave... @@ -247,7 +257,7 @@ static void fast_clear_page(void *page) { int i; - kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387); __asm__ __volatile__ ( " pxor %%mm0, %%mm0\n" : : @@ -282,7 +292,7 @@ static void fast_copy_page(void *to, void *from) { int i; - kernel_fpu_begin(); + kernel_fpu_begin_mask(KFPU_387); __asm__ __volatile__ ( "1: prefetch (%0)\n" -- GitLab From 28881922abd786a1e62a4ca77394a84373dd5279 Mon Sep 17 00:00:00 2001 From: Ramon Fontes Date: Sun, 27 Dec 2020 00:11:55 -0300 Subject: [PATCH 1163/2012] mac80211_hwsim: add 6GHz channels Advertise 6GHz channels to mac80211. Signed-off-by: Ramon Fontes Link: https://lore.kernel.org/r/20201227031155.81161-1-ramonreisfontes@gmail.com [reword commit message] Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 74 ++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 3b3fc7c9c91dc..fa7d4c20dc13a 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -311,6 +311,12 @@ static struct net_device *hwsim_mon; /* global monitor netdev */ .hw_value = (_freq), \ } +#define CHAN6G(_freq) { \ + .band = NL80211_BAND_6GHZ, \ + .center_freq = (_freq), \ + .hw_value = (_freq), \ +} + static const struct ieee80211_channel hwsim_channels_2ghz[] = { CHAN2G(2412), /* Channel 1 */ CHAN2G(2417), /* Channel 2 */ @@ -377,6 +383,68 @@ static const struct ieee80211_channel hwsim_channels_5ghz[] = { CHAN5G(5925), /* Channel 185 */ }; +static const struct ieee80211_channel hwsim_channels_6ghz[] = { + CHAN6G(5955), /* Channel 1 */ + CHAN6G(5975), /* Channel 5 */ + CHAN6G(5995), /* Channel 9 */ + CHAN6G(6015), /* Channel 13 */ + CHAN6G(6035), /* Channel 17 */ + CHAN6G(6055), /* Channel 21 */ + CHAN6G(6075), /* Channel 25 */ + CHAN6G(6095), /* Channel 29 */ + CHAN6G(6115), /* Channel 33 */ + CHAN6G(6135), /* Channel 37 */ + CHAN6G(6155), /* Channel 41 */ + CHAN6G(6175), /* Channel 45 */ + CHAN6G(6195), /* Channel 49 */ + CHAN6G(6215), /* Channel 53 */ + CHAN6G(6235), /* Channel 57 */ + CHAN6G(6255), /* Channel 61 */ + CHAN6G(6275), /* Channel 65 */ + CHAN6G(6295), /* Channel 69 */ + CHAN6G(6315), /* Channel 73 */ + CHAN6G(6335), /* Channel 77 */ + CHAN6G(6355), /* Channel 81 */ + CHAN6G(6375), /* Channel 85 */ + CHAN6G(6395), /* Channel 89 */ + CHAN6G(6415), /* Channel 93 */ + CHAN6G(6435), /* Channel 97 */ + CHAN6G(6455), /* Channel 181 */ + CHAN6G(6475), /* Channel 105 */ + CHAN6G(6495), /* Channel 109 */ + CHAN6G(6515), /* Channel 113 */ + CHAN6G(6535), /* Channel 117 */ + CHAN6G(6555), /* Channel 121 */ + CHAN6G(6575), /* Channel 125 */ + CHAN6G(6595), /* Channel 129 */ + CHAN6G(6615), /* Channel 133 */ + CHAN6G(6635), /* Channel 137 */ + CHAN6G(6655), /* Channel 141 */ + CHAN6G(6675), /* Channel 145 */ + CHAN6G(6695), /* Channel 149 */ + CHAN6G(6715), /* Channel 153 */ + CHAN6G(6735), /* Channel 157 */ + CHAN6G(6755), /* Channel 161 */ + CHAN6G(6775), /* Channel 165 */ + CHAN6G(6795), /* Channel 169 */ + CHAN6G(6815), /* Channel 173 */ + CHAN6G(6835), /* Channel 177 */ + CHAN6G(6855), /* Channel 181 */ + CHAN6G(6875), /* Channel 185 */ + CHAN6G(6895), /* Channel 189 */ + CHAN6G(6915), /* Channel 193 */ + CHAN6G(6935), /* Channel 197 */ + CHAN6G(6955), /* Channel 201 */ + CHAN6G(6975), /* Channel 205 */ + CHAN6G(6995), /* Channel 209 */ + CHAN6G(7015), /* Channel 213 */ + CHAN6G(7035), /* Channel 217 */ + CHAN6G(7055), /* Channel 221 */ + CHAN6G(7075), /* Channel 225 */ + CHAN6G(7095), /* Channel 229 */ + CHAN6G(7115), /* Channel 233 */ +}; + #define NUM_S1G_CHANS_US 51 static struct ieee80211_channel hwsim_channels_s1g[NUM_S1G_CHANS_US]; @@ -548,6 +616,7 @@ struct mac80211_hwsim_data { struct ieee80211_supported_band bands[NUM_NL80211_BANDS]; struct ieee80211_channel channels_2ghz[ARRAY_SIZE(hwsim_channels_2ghz)]; struct ieee80211_channel channels_5ghz[ARRAY_SIZE(hwsim_channels_5ghz)]; + struct ieee80211_channel channels_6ghz[ARRAY_SIZE(hwsim_channels_6ghz)]; struct ieee80211_channel channels_s1g[ARRAY_SIZE(hwsim_channels_s1g)]; struct ieee80211_rate rates[ARRAY_SIZE(hwsim_rates)]; struct ieee80211_iface_combination if_combination; @@ -578,7 +647,8 @@ struct mac80211_hwsim_data { struct ieee80211_channel *channel; unsigned long next_start, start, end; } survey_data[ARRAY_SIZE(hwsim_channels_2ghz) + - ARRAY_SIZE(hwsim_channels_5ghz)]; + ARRAY_SIZE(hwsim_channels_5ghz) + + ARRAY_SIZE(hwsim_channels_6ghz)]; struct ieee80211_channel *channel; u64 beacon_int /* beacon interval in us */; @@ -3149,6 +3219,8 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, sizeof(hwsim_channels_2ghz)); memcpy(data->channels_5ghz, hwsim_channels_5ghz, sizeof(hwsim_channels_5ghz)); + memcpy(data->channels_6ghz, hwsim_channels_6ghz, + sizeof(hwsim_channels_6ghz)); memcpy(data->channels_s1g, hwsim_channels_s1g, sizeof(hwsim_channels_s1g)); memcpy(data->rates, hwsim_rates, sizeof(hwsim_rates)); -- GitLab From 2d5e09d05827f9aace60b9711d9680e5da51ca5d Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Tue, 5 Jan 2021 11:08:39 +0800 Subject: [PATCH 1164/2012] mac80211: remove NSS number of 160MHz if not support 160MHz for HE When it does not support 160MHz in HE phy capabilities information, it should not treat the NSS number of 160MHz as a valid number, otherwise the final NSS will be set to 0. Signed-off-by: Wen Gong Link: https://lore.kernel.org/r/1609816120-9411-2-git-send-email-wgong@codeaurora.org Signed-off-by: Johannes Berg --- net/mac80211/vht.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index c3ca973737742..e856f90921375 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -484,6 +484,7 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) void ieee80211_sta_set_rx_nss(struct sta_info *sta) { u8 ht_rx_nss = 0, vht_rx_nss = 0, he_rx_nss = 0, rx_nss; + bool support_160; /* if we received a notification already don't overwrite it */ if (sta->sta.rx_nss) @@ -514,7 +515,13 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta) } } - he_rx_nss = min(rx_mcs_80, rx_mcs_160); + support_160 = he_cap->he_cap_elem.phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; + + if (support_160) + he_rx_nss = min(rx_mcs_80, rx_mcs_160); + else + he_rx_nss = rx_mcs_80; } if (sta->sta.ht_cap.ht_supported) { -- GitLab From 97784481757fba7570121a70dd37ca74a29f50a8 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 20 Jan 2021 23:22:02 -0800 Subject: [PATCH 1165/2012] lightnvm: fix memory leak when submit fails The allocated page is not released if error occurs in nvm_submit_io_sync_raw(). __free_page() is moved ealier to avoid possible memory leak issue. Fixes: aff3fb18f957 ("lightnvm: move bad block and chunk state logic to core") Signed-off-by: Pan Bian Signed-off-by: Jens Axboe --- drivers/lightnvm/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index c1bcac71008c6..28ddcaa5358b1 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -844,11 +844,10 @@ static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa) rqd.ppa_addr = generic_to_dev_addr(dev, ppa); ret = nvm_submit_io_sync_raw(dev, &rqd); + __free_page(page); if (ret) return ret; - __free_page(page); - return rqd.error; } -- GitLab From 139bc8a6146d92822c866cf2fd410159c56b3648 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 21 Jan 2021 12:08:15 +0000 Subject: [PATCH 1166/2012] KVM: Forbid the use of tagged userspace addresses for memslots The use of a tagged address could be pretty confusing for the whole memslot infrastructure as well as the MMU notifiers. Forbid it altogether, as it never quite worked the first place. Cc: stable@vger.kernel.org Reported-by: Rick Edgecombe Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier --- Documentation/virt/kvm/api.rst | 3 +++ virt/kvm/kvm_main.c | 1 + 2 files changed, 4 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 4e5316ed10e90..c347b7083abf4 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1269,6 +1269,9 @@ field userspace_addr, which must point at user addressable memory for the entire memory slot size. Any object may back this memory, including anonymous memory, ordinary files, and hugetlbfs. +On architectures that support a form of address tagging, userspace_addr must +be an untagged address. + It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr be identical. This allows large pages in the guest to be backed by large pages in the host. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2541a17ff1c45..a9abaf5f8e53c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1290,6 +1290,7 @@ int __kvm_set_memory_region(struct kvm *kvm, return -EINVAL; /* We can read the guest memory with __xxx_user() later on. */ if ((mem->userspace_addr & (PAGE_SIZE - 1)) || + (mem->userspace_addr != untagged_addr(mem->userspace_addr)) || !access_ok((void __user *)(unsigned long)mem->userspace_addr, mem->memory_size)) return -EINVAL; -- GitLab From 348fe1ca5ccdca0f8c285e2ab99004fdcd531430 Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Tue, 5 Jan 2021 14:32:29 -0500 Subject: [PATCH 1167/2012] drm/amd/display: DCN2X Find Secondary Pipe properly in MPO + ODM Case [WHY] Previously as MPO + ODM Combine was not supported, finding secondary pipes for each case was mutually exclusive. Now that both are supported at the same time, both cases should be taken into account when finding a secondary pipe. [HOW] If a secondary pipe cannot be found based on previous bottom pipe, search for a second pipe using next_odm_pipe instead. Tested-by: Daniel Wheeler Signed-off-by: Sung Lee Reviewed-by: Dmytro Laktyushkin Acked-by: Anson Jacob Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.10.x --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index e04ecf0fc0dbc..5ed18cac57e8d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2517,8 +2517,7 @@ struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc, * if this primary pipe has a bottom pipe in prev. state * and if the bottom pipe is still available (which it should be), * pick that pipe as secondary - * Same logic applies for ODM pipes. Since mpo is not allowed with odm - * check in else case. + * Same logic applies for ODM pipes */ if (dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe) { preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].bottom_pipe->pipe_idx; @@ -2526,7 +2525,9 @@ struct pipe_ctx *dcn20_find_secondary_pipe(struct dc *dc, secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; secondary_pipe->pipe_idx = preferred_pipe_idx; } - } else if (dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe) { + } + if (secondary_pipe == NULL && + dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe) { preferred_pipe_idx = dc->current_state->res_ctx.pipe_ctx[primary_pipe->pipe_idx].next_odm_pipe->pipe_idx; if (res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) { secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx]; -- GitLab From acc214bfafbafcd29d5d25d1ede5f11c14ffc147 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 19 Jan 2021 13:35:21 +0800 Subject: [PATCH 1168/2012] drm/amdgpu: remove gpu info firmware of green sardine The ip discovery is supported on green sardine, it doesn't need gpu info firmware anymore. Signed-off-by: Huang Rui Reviewed-by: Prike Liang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.10.x --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 087afab67e221..cab1ebaf6d629 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -81,7 +81,6 @@ MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/green_sardine_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 -- GitLab From bdfc6fd6c8df1a9d481c4417df571e94a33168bf Mon Sep 17 00:00:00 2001 From: Jake Wang Date: Fri, 8 Jan 2021 12:27:51 -0500 Subject: [PATCH 1169/2012] drm/amd/display: Update dram_clock_change_latency for DCN2.1 [WHY] dram clock change latencies get updated using ddr4 latency table, but that update does not happen before validation. This value should not be the default and should be number received from df for better mode support. This may cause a PState hang on high refresh panels with short vblanks such as on 1080p 360hz or 300hz panels. [HOW] Update latency from 23.84 to 11.72. Tested-by: Daniel Wheeler Signed-off-by: Jake Wang Reviewed-by: Sung Lee Acked-by: Anson Jacob Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 1c88d2edd381c..b000b43a820d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -296,7 +296,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .num_banks = 8, .num_chans = 4, .vmm_page_size_bytes = 4096, - .dram_clock_change_latency_us = 23.84, + .dram_clock_change_latency_us = 11.72, .return_bus_width_bytes = 64, .dispclk_dppclk_vco_speed_mhz = 3600, .xfc_bus_transport_time_us = 4, -- GitLab From 8bc3d461d0a95bbcc2a0a908bbadc87e198a86a8 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Thu, 29 Oct 2020 17:45:19 -0400 Subject: [PATCH 1170/2012] drm/amd/display: Allow PSTATE chnage when no displays are enabled [Why] When no displays are currently enabled, display driver should not disallow PSTATE switching. [How] Allow PSTATE switching if either the active configuration supports it, or there are no active displays. Tested-by: Daniel Wheeler Signed-off-by: Aric Cyr Reviewed-by: Jun Lei Acked-by: Anson Jacob Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index 5b466f440d671..ab98c259ef695 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -251,6 +251,7 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base, struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; bool force_reset = false; bool update_uclk = false; + bool p_state_change_support; if (dc->work_arounds.skip_clock_update || !clk_mgr->smu_present) return; @@ -291,8 +292,9 @@ static void dcn3_update_clocks(struct clk_mgr *clk_mgr_base, clk_mgr_base->clks.socclk_khz = new_clocks->socclk_khz; clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; - if (should_update_pstate_support(safe_to_lower, new_clocks->p_state_change_support, clk_mgr_base->clks.p_state_change_support)) { - clk_mgr_base->clks.p_state_change_support = new_clocks->p_state_change_support; + p_state_change_support = new_clocks->p_state_change_support || (display_count == 0); + if (should_update_pstate_support(safe_to_lower, p_state_change_support, clk_mgr_base->clks.p_state_change_support)) { + clk_mgr_base->clks.p_state_change_support = p_state_change_support; /* to disable P-State switching, set UCLK min = max */ if (!clk_mgr_base->clks.p_state_change_support) -- GitLab From 4716a7c50c5c66d6ddc42401e1e0ba13b492e105 Mon Sep 17 00:00:00 2001 From: Bing Guo Date: Mon, 4 Jan 2021 14:09:41 -0500 Subject: [PATCH 1171/2012] drm/amd/display: Change function decide_dp_link_settings to avoid infinite looping Why: Function decide_dp_link_settings() loops infinitely when required bandwidth can't be supported. How: Check the required bandwidth against verified_link_cap before trying to find a link setting for it. Tested-by: Daniel Wheeler Signed-off-by: Bing Guo Reviewed-by: Jun Lei Acked-by: Anson Jacob Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 1bd1a09352906..28b4b084199c2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2399,6 +2399,9 @@ static bool decide_dp_link_settings(struct dc_link *link, struct dc_link_setting initial_link_setting; uint32_t link_bw; + if (req_bw > dc_link_bandwidth_kbps(link, &link->verified_link_cap)) + return false; + /* search for the minimum link setting that: * 1. is supported according to the link training result * 2. could support the b/w requested by the timing -- GitLab From c74f865f14318217350aa33363577cb95b06eb82 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Sun, 13 Dec 2020 10:59:01 -0500 Subject: [PATCH 1172/2012] drm/amd/display: Use hardware sequencer functions for PG control [Why & How] These can differ per ASIC or not be present. Don't call the dcn20 ones directly but rather the ones defined by the ASIC init table. Tested-by: Daniel Wheeler Signed-off-by: Nicholas Kazlauskas Reviewed-by: Eric Yang Acked-by: Anson Jacob Signed-off-by: Alex Deucher --- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 18 ++++++++++++++---- .../gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 9 +++++++-- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index cfc130e2d6fd0..017b67b830e66 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -647,8 +647,13 @@ static void power_on_plane( if (REG(DC_IP_REQUEST_CNTL)) { REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); - hws->funcs.dpp_pg_control(hws, plane_id, true); - hws->funcs.hubp_pg_control(hws, plane_id, true); + + if (hws->funcs.dpp_pg_control) + hws->funcs.dpp_pg_control(hws, plane_id, true); + + if (hws->funcs.hubp_pg_control) + hws->funcs.hubp_pg_control(hws, plane_id, true); + REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); DC_LOG_DEBUG( @@ -1082,8 +1087,13 @@ void dcn10_plane_atomic_power_down(struct dc *dc, if (REG(DC_IP_REQUEST_CNTL)) { REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); - hws->funcs.dpp_pg_control(hws, dpp->inst, false); - hws->funcs.hubp_pg_control(hws, hubp->inst, false); + + if (hws->funcs.dpp_pg_control) + hws->funcs.dpp_pg_control(hws, dpp->inst, false); + + if (hws->funcs.hubp_pg_control) + hws->funcs.hubp_pg_control(hws, hubp->inst, false); + dpp->funcs->dpp_reset(dpp); REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index cb822df21b7c5..480d928cb1ca6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1062,8 +1062,13 @@ static void dcn20_power_on_plane( if (REG(DC_IP_REQUEST_CNTL)) { REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); - dcn20_dpp_pg_control(hws, pipe_ctx->plane_res.dpp->inst, true); - dcn20_hubp_pg_control(hws, pipe_ctx->plane_res.hubp->inst, true); + + if (hws->funcs.dpp_pg_control) + hws->funcs.dpp_pg_control(hws, pipe_ctx->plane_res.dpp->inst, true); + + if (hws->funcs.hubp_pg_control) + hws->funcs.hubp_pg_control(hws, pipe_ctx->plane_res.hubp->inst, true); + REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); DC_LOG_DEBUG( -- GitLab From 4b08d8c78360241d270396a9de6eb774e88acd00 Mon Sep 17 00:00:00 2001 From: Vladimir Stempen Date: Mon, 4 Jan 2021 12:05:26 -0500 Subject: [PATCH 1173/2012] drm/amd/display: Fixed corruptions on HPDRX link loss restore [why] Heavy corruption or blank screen reported on wake, with 6k display connected and FEC enabled [how] When Disable/Enable stream for display pipes on HPDRX, DC should take into account ODM split pipes. Tested-by: Daniel Wheeler Signed-off-by: Vladimir Stempen Reviewed-by: Aric Cyr Acked-by: Anson Jacob Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 28b4b084199c2..f95bade596242 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3048,14 +3048,14 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off && - pipe_ctx->stream->link == link) + pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe) core_link_disable_stream(pipe_ctx); } for (i = 0; i < MAX_PIPES; i++) { pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off && - pipe_ctx->stream->link == link) + pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe) core_link_enable_stream(link->dc->current_state, pipe_ctx); } -- GitLab From 51e87da7d4014f49769dcf60b8626a81492df2c4 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Thu, 17 Dec 2020 13:55:46 +0800 Subject: [PATCH 1174/2012] drm/amdgpu/pm: no need GPU status set since mmnbif_gpu_BIF_DOORBELL_FENCE_CNTL added in FSDL In the renoir there is no need GpuChangeState message set to exit gfxoff in the s0i3 resume since mmnbif_gpu_BIF_DOORBELL_FENCE_CNTL has been added in the s0i3 FSDL. Signed-off-by: Prike Liang Reviewed-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index f743685a20e8a..9a96970380160 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -1121,7 +1121,7 @@ static ssize_t renoir_get_gpu_metrics(struct smu_context *smu, static int renoir_gfx_state_change_set(struct smu_context *smu, uint32_t state) { - return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GpuChangeState, state, NULL); + return 0; } static const struct pptable_funcs renoir_ppt_funcs = { -- GitLab From 8f0d60fe8bf24fd79892a1a22f16c0629d5af6d3 Mon Sep 17 00:00:00 2001 From: Jinzhou Su Date: Mon, 18 Jan 2021 19:14:27 +0800 Subject: [PATCH 1175/2012] drm/amdgpu: modify GCR_GENERAL_CNTL for Vangogh GCR_GENERAL_CNTL is defined differently in gc_10_1_0_offset.h and gc_10_3_0_offset.h. Update GCR_GENERAL_CNTL for Vangogh. Signed-off-by: Jinzhou Su Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 619d34c041ee0..346963e3cf731 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -119,6 +119,8 @@ #define mmVGT_ESGS_RING_SIZE_Vangogh_BASE_IDX 1 #define mmSPI_CONFIG_CNTL_Vangogh 0x2440 #define mmSPI_CONFIG_CNTL_Vangogh_BASE_IDX 1 +#define mmGCR_GENERAL_CNTL_Vangogh 0x1580 +#define mmGCR_GENERAL_CNTL_Vangogh_BASE_IDX 0 #define mmCP_HYP_PFP_UCODE_ADDR 0x5814 #define mmCP_HYP_PFP_UCODE_ADDR_BASE_IDX 1 @@ -3244,7 +3246,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000142), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), -- GitLab From 39263a2f886817a376fc27ba9af14c5053f0934b Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Mon, 18 Jan 2021 17:05:00 +0800 Subject: [PATCH 1176/2012] drm/amdgpu: update mmhub mgcg&ls for mmhub_v2_3 Starting from vangogh, the ATCL2 and DAGB0 registers relative to mgcg/ls has changed. For MGCG: Replace mmMM_ATC_L2_MISC_CG with mmMM_ATC_L2_CGTT_CLK_CTRL. For MGLS: Replace mmMM_ATC_L2_MISC_CG with mmMM_ATC_L2_CGTT_CLK_CTRL. Add DAGB0_(WR/RD)_CGTT_CLK_CTRL registers. Signed-off-by: Aaron Liu Acked-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 84 ++++++++++++++++++------- 1 file changed, 61 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 07104a1de3082..1961745e89c73 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -491,12 +491,11 @@ mmhub_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *adev, { uint32_t def, data, def1, data1; - def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); + def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_CGTT_CLK_CTRL); def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { - data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; - + data &= ~MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK; data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | @@ -505,8 +504,7 @@ mmhub_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *adev, DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); } else { - data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK; - + data |= MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK; data1 |= (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | @@ -516,7 +514,7 @@ mmhub_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *adev, } if (def != data) - WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); + WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_CGTT_CLK_CTRL, data); if (def1 != data1) WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1); } @@ -525,17 +523,44 @@ static void mmhub_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev, bool enable) { - uint32_t def, data; - - def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) - data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; - else - data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + uint32_t def, data, def1, data1, def2, data2; + + def = data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_CGTT_CLK_CTRL); + def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_WR_CGTT_CLK_CTRL); + def2 = data2 = RREG32_SOC15(MMHUB, 0, mmDAGB0_RD_CGTT_CLK_CTRL); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) { + data &= ~MM_ATC_L2_CGTT_CLK_CTRL__MGLS_OVERRIDE_MASK; + data1 &= !(DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_MASK | + DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_WRITE_MASK | + DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_READ_MASK | + DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_RETURN_MASK | + DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_REGISTER_MASK); + data2 &= !(DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_MASK | + DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_WRITE_MASK | + DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_READ_MASK | + DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_RETURN_MASK | + DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_REGISTER_MASK); + } else { + data |= MM_ATC_L2_CGTT_CLK_CTRL__MGLS_OVERRIDE_MASK; + data1 |= (DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_MASK | + DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_WRITE_MASK | + DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_READ_MASK | + DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_RETURN_MASK | + DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_REGISTER_MASK); + data2 |= (DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_MASK | + DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_WRITE_MASK | + DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_READ_MASK | + DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_RETURN_MASK | + DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_REGISTER_MASK); + } if (def != data) - WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data); + WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_CGTT_CLK_CTRL, data); + if (def1 != data1) + WREG32_SOC15(MMHUB, 0, mmDAGB0_WR_CGTT_CLK_CTRL, data1); + if (def2 != data2) + WREG32_SOC15(MMHUB, 0, mmDAGB0_RD_CGTT_CLK_CTRL, data2); } static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev, @@ -554,26 +579,39 @@ static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev, static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u32 *flags) { - int data, data1; + int data, data1, data2, data3; if (amdgpu_sriov_vf(adev)) *flags = 0; - data = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG); - data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + data = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + data1 = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_CGTT_CLK_CTRL); + data2 = RREG32_SOC15(MMHUB, 0, mmDAGB0_WR_CGTT_CLK_CTRL); + data3 = RREG32_SOC15(MMHUB, 0, mmDAGB0_RD_CGTT_CLK_CTRL); /* AMD_CG_SUPPORT_MC_MGCG */ - if ((data & MM_ATC_L2_MISC_CG__ENABLE_MASK) && - !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + if (!(data & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | - DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))) - *flags |= AMD_CG_SUPPORT_MC_MGCG; + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK)) + && !(data1 & MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK)) { + *flags |= AMD_CG_SUPPORT_MC_MGCG; + } /* AMD_CG_SUPPORT_MC_LS */ - if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) + if (!(data1 & MM_ATC_L2_CGTT_CLK_CTRL__MGLS_OVERRIDE_MASK) + && !(data2 & (DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_MASK | + DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_WRITE_MASK | + DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_READ_MASK | + DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_RETURN_MASK | + DAGB0_WR_CGTT_CLK_CTRL__LS_OVERRIDE_REGISTER_MASK)) + && !(data3 & (DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_MASK | + DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_WRITE_MASK | + DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_READ_MASK | + DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_RETURN_MASK | + DAGB0_RD_CGTT_CLK_CTRL__LS_OVERRIDE_REGISTER_MASK))) *flags |= AMD_CG_SUPPORT_MC_LS; } -- GitLab From 9d5ae6f3c50a6f718b6d4be3c7b0828966e01b05 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 20 Jan 2021 14:49:07 +0100 Subject: [PATCH 1177/2012] libceph: fix "Boolean result is used in bitwise operation" warning This line dates back to 2013, but cppcheck complained because commit 2f713615ddd9 ("libceph: move msgr1 protocol implementation to its own file") moved it. Add parenthesis to silence the warning. Reported-by: kernel test robot Signed-off-by: Ilya Dryomov --- net/ceph/messenger_v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index 04f653b3c8973..2cb5ffdf071af 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -1100,7 +1100,7 @@ static int read_partial_message(struct ceph_connection *con) if (ret < 0) return ret; - BUG_ON(!con->in_msg ^ skip); + BUG_ON((!con->in_msg) ^ skip); if (skip) { /* skip this message */ dout("alloc_msg said skip message\n"); -- GitLab From 4eaad21a6ac9865df7f31983232ed5928450458d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jan 2021 21:46:29 +0100 Subject: [PATCH 1178/2012] kernfs: implement ->read_iter Switch kernfs to implement the read_iter method instead of plain old read to prepare to supporting splice and sendfile again. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210120204631.274206-2-hch@lst.de Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index f277d023ebcd1..8276e4c8722d8 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "kernfs-internal.h" @@ -180,11 +181,10 @@ static const struct seq_operations kernfs_seq_ops = { * it difficult to use seq_file. Implement simplistic custom buffering for * bin files. */ -static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, - char __user *user_buf, size_t count, - loff_t *ppos) +static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) { - ssize_t len = min_t(size_t, count, PAGE_SIZE); + struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); + ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE); const struct kernfs_ops *ops; char *buf; @@ -210,7 +210,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, of->event = atomic_read(&of->kn->attr.open->event); ops = kernfs_ops(of->kn); if (ops->read) - len = ops->read(of, buf, len, *ppos); + len = ops->read(of, buf, len, iocb->ki_pos); else len = -EINVAL; @@ -220,12 +220,12 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, if (len < 0) goto out_free; - if (copy_to_user(user_buf, buf, len)) { + if (copy_to_iter(buf, len, iter) != len) { len = -EFAULT; goto out_free; } - *ppos += len; + iocb->ki_pos += len; out_free: if (buf == of->prealloc_buf) @@ -235,22 +235,11 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, return len; } -/** - * kernfs_fop_read - kernfs vfs read callback - * @file: file pointer - * @user_buf: data to write - * @count: number of bytes - * @ppos: starting offset - */ -static ssize_t kernfs_fop_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) +static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter) { - struct kernfs_open_file *of = kernfs_of(file); - - if (of->kn->flags & KERNFS_HAS_SEQ_SHOW) - return seq_read(file, user_buf, count, ppos); - else - return kernfs_file_direct_read(of, user_buf, count, ppos); + if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW) + return seq_read_iter(iocb, iter); + return kernfs_file_read_iter(iocb, iter); } /** @@ -960,7 +949,7 @@ void kernfs_notify(struct kernfs_node *kn) EXPORT_SYMBOL_GPL(kernfs_notify); const struct file_operations kernfs_file_fops = { - .read = kernfs_fop_read, + .read_iter = kernfs_fop_read_iter, .write = kernfs_fop_write, .llseek = generic_file_llseek, .mmap = kernfs_fop_mmap, -- GitLab From cc099e0b399889c6485c88368b19824b087c9f8c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jan 2021 21:46:30 +0100 Subject: [PATCH 1179/2012] kernfs: implement ->write_iter Switch kernfs to implement the write_iter method instead of plain old write to prepare to supporting splice and sendfile again. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210120204631.274206-3-hch@lst.de Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 8276e4c8722d8..b1a5cccf189ec 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -242,13 +242,7 @@ static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter) return kernfs_file_read_iter(iocb, iter); } -/** - * kernfs_fop_write - kernfs vfs write callback - * @file: file pointer - * @user_buf: data to write - * @count: number of bytes - * @ppos: starting offset - * +/* * Copy data in from userland and pass it to the matching kernfs write * operation. * @@ -258,20 +252,18 @@ static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter) * modify only the the value you're changing, then write entire buffer * back. */ -static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, - size_t count, loff_t *ppos) +static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) { - struct kernfs_open_file *of = kernfs_of(file); + struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); + ssize_t len = iov_iter_count(iter); const struct kernfs_ops *ops; - ssize_t len; char *buf; if (of->atomic_write_len) { - len = count; if (len > of->atomic_write_len) return -E2BIG; } else { - len = min_t(size_t, count, PAGE_SIZE); + len = min_t(size_t, len, PAGE_SIZE); } buf = of->prealloc_buf; @@ -282,7 +274,7 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, if (!buf) return -ENOMEM; - if (copy_from_user(buf, user_buf, len)) { + if (copy_from_iter(buf, len, iter) != len) { len = -EFAULT; goto out_free; } @@ -301,7 +293,7 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, ops = kernfs_ops(of->kn); if (ops->write) - len = ops->write(of, buf, len, *ppos); + len = ops->write(of, buf, len, iocb->ki_pos); else len = -EINVAL; @@ -309,7 +301,7 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, mutex_unlock(&of->mutex); if (len > 0) - *ppos += len; + iocb->ki_pos += len; out_free: if (buf == of->prealloc_buf) @@ -662,7 +654,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) /* * Write path needs to atomic_write_len outside active reference. - * Cache it in open_file. See kernfs_fop_write() for details. + * Cache it in open_file. See kernfs_fop_write_iter() for details. */ of->atomic_write_len = ops->atomic_write_len; @@ -950,7 +942,7 @@ EXPORT_SYMBOL_GPL(kernfs_notify); const struct file_operations kernfs_file_fops = { .read_iter = kernfs_fop_read_iter, - .write = kernfs_fop_write, + .write_iter = kernfs_fop_write_iter, .llseek = generic_file_llseek, .mmap = kernfs_fop_mmap, .open = kernfs_fop_open, -- GitLab From f2d6c2708bd84ca953fa6b6ca5717e79eb0140c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jan 2021 21:46:31 +0100 Subject: [PATCH 1180/2012] kernfs: wire up ->splice_read and ->splice_write Wire up the splice_read and splice_write methods to the default helpers using ->read_iter and ->write_iter now that those are implemented for kernfs. This restores support to use splice and sendfile on kernfs files. Fixes: 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops") Reported-by: Siddharth Gupta Tested-by: Siddharth Gupta Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210120204631.274206-4-hch@lst.de Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index b1a5cccf189ec..c757193121475 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -949,6 +949,8 @@ const struct file_operations kernfs_file_fops = { .release = kernfs_fop_release, .poll = kernfs_fop_poll, .fsync = noop_fsync, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, }; /** -- GitLab From 3d1cf435e201d1fd63e4346b141881aed086effd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 15 Jan 2021 19:30:51 +0100 Subject: [PATCH 1181/2012] driver core: Extend device_is_dependent() If the device passed as the target (second argument) to device_is_dependent() is not completely registered (that is, it has been initialized, but not added yet), but the parent pointer of it is set, it may be missing from the list of the parent's children and device_for_each_child() called by device_is_dependent() cannot be relied on to catch that dependency. For this reason, modify device_is_dependent() to check the ancestors of the target device by following its parent pointer in addition to the device_for_each_child() walk. Fixes: 9ed9895370ae ("driver core: Functional dependencies tracking support") Reported-by: Stephan Gerhold Tested-by: Stephan Gerhold Reviewed-by: Saravana Kannan Signed-off-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/17705994.d592GUb2YH@kreacher Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 25e08e5f40bd9..3819fd012e271 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -208,6 +208,16 @@ int device_links_read_lock_held(void) #endif #endif /* !CONFIG_SRCU */ +static bool device_is_ancestor(struct device *dev, struct device *target) +{ + while (target->parent) { + target = target->parent; + if (dev == target) + return true; + } + return false; +} + /** * device_is_dependent - Check if one device depends on another one * @dev: Device to check dependencies for. @@ -221,7 +231,12 @@ int device_is_dependent(struct device *dev, void *target) struct device_link *link; int ret; - if (dev == target) + /* + * The "ancestors" check is needed to catch the case when the target + * device has not been completely initialized yet and it is still + * missing from the list of children of its parent device. + */ + if (dev == target || device_is_ancestor(dev, target)) return 1; ret = device_for_each_child(dev, target, device_is_dependent); -- GitLab From 927633a6d20af319d986f3e42c3ef9f6d7835008 Mon Sep 17 00:00:00 2001 From: Wang Hui Date: Fri, 15 Jan 2021 22:59:16 +0300 Subject: [PATCH 1182/2012] stm class: Fix module init return on allocation failure In stm_heartbeat_init(): return value gets reset after the first iteration by stm_source_register_device(), so allocation failures after that will, after a clean up, return success. Fix that. Fixes: 119291853038 ("stm class: Add heartbeat stm source device") Reported-by: Hulk Robot Signed-off-by: Wang Hui Signed-off-by: Alexander Shishkin Link: https://lore.kernel.org/r/20210115195917.3184-2-alexander.shishkin@linux.intel.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/stm/heartbeat.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hwtracing/stm/heartbeat.c b/drivers/hwtracing/stm/heartbeat.c index 3e7df1c0477f7..81d7b21d31ec2 100644 --- a/drivers/hwtracing/stm/heartbeat.c +++ b/drivers/hwtracing/stm/heartbeat.c @@ -64,7 +64,7 @@ static void stm_heartbeat_unlink(struct stm_source_data *data) static int stm_heartbeat_init(void) { - int i, ret = -ENOMEM; + int i, ret; if (nr_devs < 0 || nr_devs > STM_HEARTBEAT_MAX) return -EINVAL; @@ -72,8 +72,10 @@ static int stm_heartbeat_init(void) for (i = 0; i < nr_devs; i++) { stm_heartbeat[i].data.name = kasprintf(GFP_KERNEL, "heartbeat.%d", i); - if (!stm_heartbeat[i].data.name) + if (!stm_heartbeat[i].data.name) { + ret = -ENOMEM; goto fail_unregister; + } stm_heartbeat[i].data.nr_chans = 1; stm_heartbeat[i].data.link = stm_heartbeat_link; -- GitLab From cb5c681ab9037e25fcca20689c82cf034566d610 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 15 Jan 2021 22:59:17 +0300 Subject: [PATCH 1183/2012] intel_th: pci: Add Alder Lake-P support This adds support for the Trace Hub in Alder Lake-P. Signed-off-by: Alexander Shishkin Link: https://lore.kernel.org/r/20210115195917.3184-3-alexander.shishkin@linux.intel.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 52acd77438ede..251e75c9ba9d0 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -268,6 +268,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7aa6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Alder Lake-P */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x51a6), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Alder Lake CPU */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f), -- GitLab From c1c3ba1f78354a20222d291ed6fedd17b7a74fd7 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Thu, 21 Jan 2021 18:16:43 +0100 Subject: [PATCH 1184/2012] ASoC: Intel: Skylake: skl-topology: Fix OOPs ib skl_tplg_complete If dobj->control is not initialized we end up in an OOPs during skl_tplg_complete: [ 26.553358] BUG: kernel NULL pointer dereference, address: 0000000000000078 [ 26.561151] #PF: supervisor read access in kernel mode [ 26.566897] #PF: error_code(0x0000) - not-present page [ 26.572642] PGD 0 P4D 0 [ 26.575479] Oops: 0000 [#1] PREEMPT SMP PTI [ 26.580158] CPU: 2 PID: 2082 Comm: udevd Tainted: G C 5.4.81 #4 [ 26.588232] Hardware name: HP Soraka/Soraka, BIOS Google_Soraka.10431.106.0 12/03/2019 [ 26.597082] RIP: 0010:skl_tplg_complete+0x70/0x144 [snd_soc_skl] Fixes: 2d744ecf2b98 ("ASoC: Intel: Skylake: Automatic DMIC format configuration according to information from NHL") Signed-off-by: Ricardo Ribalda Reviewed-by: Cezary Rojewski Tested-by: Lukasz Majczak Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210121171644.131059-1-ribalda@chromium.org Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-topology.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index ae466cd592922..1ef30ca45410e 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -3619,15 +3619,16 @@ static void skl_tplg_complete(struct snd_soc_component *component) list_for_each_entry(dobj, &component->dobj_list, list) { struct snd_kcontrol *kcontrol = dobj->control.kcontrol; - struct soc_enum *se = - (struct soc_enum *)kcontrol->private_value; - char **texts = dobj->control.dtexts; + struct soc_enum *se; + char **texts; char chan_text[4]; - if (dobj->type != SND_SOC_DOBJ_ENUM || - dobj->control.kcontrol->put != - skl_tplg_multi_config_set_dmic) + if (dobj->type != SND_SOC_DOBJ_ENUM || !kcontrol || + kcontrol->put != skl_tplg_multi_config_set_dmic) continue; + + se = (struct soc_enum *)kcontrol->private_value; + texts = dobj->control.dtexts; sprintf(chan_text, "c%d", mach->mach_params.dmic_num); for (i = 0; i < se->items; i++) { -- GitLab From 1d8fe0648e118fd495a2cb393a34eb8d428e7808 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Thu, 21 Jan 2021 18:16:44 +0100 Subject: [PATCH 1185/2012] ASoC: Intel: Skylake: Zero snd_ctl_elem_value Clear struct snd_ctl_elem_value before calling ->put() to avoid any data leak. Signed-off-by: Ricardo Ribalda Reviewed-by: Cezary Rojewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210121171644.131059-2-ribalda@chromium.org Signed-off-by: Mark Brown --- sound/soc/intel/skylake/skl-topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c index 1ef30ca45410e..b824086203b9e 100644 --- a/sound/soc/intel/skylake/skl-topology.c +++ b/sound/soc/intel/skylake/skl-topology.c @@ -3632,7 +3632,7 @@ static void skl_tplg_complete(struct snd_soc_component *component) sprintf(chan_text, "c%d", mach->mach_params.dmic_num); for (i = 0; i < se->items; i++) { - struct snd_ctl_elem_value val; + struct snd_ctl_elem_value val = {}; if (strstr(texts[i], chan_text)) { val.value.enumerated.item[0] = i; -- GitLab From 9354f1b421f76f8368be13954f87d07bcbd6fffe Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Sun, 17 Jan 2021 09:39:37 +0200 Subject: [PATCH 1186/2012] habanalabs: zero pci counters packet before submit to FW Driver does not zero some pci counters packets before sending to FW. This causes an out of sync PI/CI between driver and FW. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 20f77f58edef5..c9a12980218ac 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -402,6 +402,10 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, } counters->rx_throughput = result; + memset(&pkt, 0, sizeof(pkt)); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + /* Fetch PCI tx counter */ pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -414,6 +418,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, counters->tx_throughput = result; /* Fetch PCI replay counter */ + memset(&pkt, 0, sizeof(pkt)); pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET << CPUCP_PKT_CTL_OPCODE_SHIFT); -- GitLab From f8abaf379bfe19600f96ae79a6759eb37039ae05 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 18 Jan 2021 13:19:51 +0200 Subject: [PATCH 1187/2012] habanalabs: fix backward compatibility of idle check Need to take the lower 32 bits of the driver's 64-bit idle mask and put it in the legacy 32-bit variable that the userspace reads to know the idle mask. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 12efbd9d2e3a0..d25892d61ec9d 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -133,6 +133,8 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args) hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, &hw_idle.busy_engines_mask_ext, NULL); + hw_idle.busy_engines_mask = + lower_32_bits(hw_idle.busy_engines_mask_ext); return copy_to_user(out, &hw_idle, min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0; -- GitLab From 2dc4a6d79168e7e426e8ddf8e7219c9ffd13b2b1 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 18 Jan 2021 21:39:46 +0200 Subject: [PATCH 1188/2012] habanalabs: disable FW events on device removal When device is removed, we need to make sure the F/W won't send us any more events because during the remove process we disable the interrupts. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 1ea57d86caa37..69d04eca767f5 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1487,6 +1487,15 @@ void hl_device_fini(struct hl_device *hdev) } } + /* Disable PCI access from device F/W so it won't send us additional + * interrupts. We disable MSI/MSI-X at the halt_engines function and we + * can't have the F/W sending us interrupts after that. We need to + * disable the access here because if the device is marked disable, the + * message won't be send. Also, in case of heartbeat, the device CPU is + * marked as disable so this message won't be sent + */ + hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); + /* Mark device as disabled */ hdev->disabled = true; -- GitLab From e020ff611ba9be54e959e6b548038f8a020da1c9 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Sun, 10 Jan 2021 09:54:07 -0800 Subject: [PATCH 1189/2012] driver core: Fix device link device name collision The device link device's name was of the form: -- This can cause name collision as reported here [1] as device names are not globally unique. Since device names have to be unique within the bus/class, add the bus/class name as a prefix to the device names used to construct the device link device name. So the devuce link device's name will be of the form: :--: [1] - https://lore.kernel.org/lkml/20201229033440.32142-1-michael@walle.cc/ Fixes: 287905e68dd2 ("driver core: Expose device link details in sysfs") Cc: stable@vger.kernel.org Reported-by: Michael Walle Tested-by: Michael Walle Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20210110175408.1465657-1-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-class-devlink | 4 +-- .../ABI/testing/sysfs-devices-consumer | 5 ++-- .../ABI/testing/sysfs-devices-supplier | 5 ++-- drivers/base/core.c | 27 ++++++++++--------- include/linux/device.h | 12 +++++++++ 5 files changed, 35 insertions(+), 18 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-devlink b/Documentation/ABI/testing/sysfs-class-devlink index b662f747c83eb..8a21ce515f61f 100644 --- a/Documentation/ABI/testing/sysfs-class-devlink +++ b/Documentation/ABI/testing/sysfs-class-devlink @@ -5,8 +5,8 @@ Description: Provide a place in sysfs for the device link objects in the kernel at any given time. The name of a device link directory, denoted as ... above, is of the form -- - where is the supplier device name and is - the consumer device name. + where is the supplier bus:device name and + is the consumer bus:device name. What: /sys/class/devlink/.../auto_remove_on Date: May 2020 diff --git a/Documentation/ABI/testing/sysfs-devices-consumer b/Documentation/ABI/testing/sysfs-devices-consumer index 1f06d74d1c3cc..0809fda092e66 100644 --- a/Documentation/ABI/testing/sysfs-devices-consumer +++ b/Documentation/ABI/testing/sysfs-devices-consumer @@ -4,5 +4,6 @@ Contact: Saravana Kannan Description: The /sys/devices/.../consumer: are symlinks to device links where this device is the supplier. denotes the - name of the consumer in that device link. There can be zero or - more of these symlinks for a given device. + name of the consumer in that device link and is of the form + bus:device name. There can be zero or more of these symlinks + for a given device. diff --git a/Documentation/ABI/testing/sysfs-devices-supplier b/Documentation/ABI/testing/sysfs-devices-supplier index a919e0db5e902..207f5972e98d8 100644 --- a/Documentation/ABI/testing/sysfs-devices-supplier +++ b/Documentation/ABI/testing/sysfs-devices-supplier @@ -4,5 +4,6 @@ Contact: Saravana Kannan Description: The /sys/devices/.../supplier: are symlinks to device links where this device is the consumer. denotes the - name of the supplier in that device link. There can be zero or - more of these symlinks for a given device. + name of the supplier in that device link and is of the form + bus:device name. There can be zero or more of these symlinks + for a given device. diff --git a/drivers/base/core.c b/drivers/base/core.c index 3819fd012e271..78f6db169d476 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -471,7 +471,9 @@ static int devlink_add_symlinks(struct device *dev, struct device *con = link->consumer; char *buf; - len = max(strlen(dev_name(sup)), strlen(dev_name(con))); + len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)), + strlen(dev_bus_name(con)) + strlen(dev_name(con))); + len += strlen(":"); len += strlen("supplier:") + 1; buf = kzalloc(len, GFP_KERNEL); if (!buf) @@ -485,12 +487,12 @@ static int devlink_add_symlinks(struct device *dev, if (ret) goto err_con; - snprintf(buf, len, "consumer:%s", dev_name(con)); + snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); ret = sysfs_create_link(&sup->kobj, &link->link_dev.kobj, buf); if (ret) goto err_con_dev; - snprintf(buf, len, "supplier:%s", dev_name(sup)); + snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); ret = sysfs_create_link(&con->kobj, &link->link_dev.kobj, buf); if (ret) goto err_sup_dev; @@ -498,7 +500,7 @@ static int devlink_add_symlinks(struct device *dev, goto out; err_sup_dev: - snprintf(buf, len, "consumer:%s", dev_name(con)); + snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); sysfs_remove_link(&sup->kobj, buf); err_con_dev: sysfs_remove_link(&link->link_dev.kobj, "consumer"); @@ -521,7 +523,9 @@ static void devlink_remove_symlinks(struct device *dev, sysfs_remove_link(&link->link_dev.kobj, "consumer"); sysfs_remove_link(&link->link_dev.kobj, "supplier"); - len = max(strlen(dev_name(sup)), strlen(dev_name(con))); + len = max(strlen(dev_bus_name(sup)) + strlen(dev_name(sup)), + strlen(dev_bus_name(con)) + strlen(dev_name(con))); + len += strlen(":"); len += strlen("supplier:") + 1; buf = kzalloc(len, GFP_KERNEL); if (!buf) { @@ -529,9 +533,9 @@ static void devlink_remove_symlinks(struct device *dev, return; } - snprintf(buf, len, "supplier:%s", dev_name(sup)); + snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); sysfs_remove_link(&con->kobj, buf); - snprintf(buf, len, "consumer:%s", dev_name(con)); + snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); sysfs_remove_link(&sup->kobj, buf); kfree(buf); } @@ -752,8 +756,9 @@ struct device_link *device_link_add(struct device *consumer, link->link_dev.class = &devlink_class; device_set_pm_not_required(&link->link_dev); - dev_set_name(&link->link_dev, "%s--%s", - dev_name(supplier), dev_name(consumer)); + dev_set_name(&link->link_dev, "%s:%s--%s:%s", + dev_bus_name(supplier), dev_name(supplier), + dev_bus_name(consumer), dev_name(consumer)); if (device_register(&link->link_dev)) { put_device(consumer); put_device(supplier); @@ -1823,9 +1828,7 @@ const char *dev_driver_string(const struct device *dev) * never change once they are set, so they don't need special care. */ drv = READ_ONCE(dev->driver); - return drv ? drv->name : - (dev->bus ? dev->bus->name : - (dev->class ? dev->class->name : "")); + return drv ? drv->name : dev_bus_name(dev); } EXPORT_SYMBOL(dev_driver_string); diff --git a/include/linux/device.h b/include/linux/device.h index 89bb8b84173e4..1779f90eeb4cb 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -609,6 +609,18 @@ static inline const char *dev_name(const struct device *dev) return kobject_name(&dev->kobj); } +/** + * dev_bus_name - Return a device's bus/class name, if at all possible + * @dev: struct device to get the bus/class name of + * + * Will return the name of the bus/class the device is attached to. If it is + * not attached to a bus/class, an empty string will be returned. + */ +static inline const char *dev_bus_name(const struct device *dev) +{ + return dev->bus ? dev->bus->name : (dev->class ? dev->class->name : ""); +} + __printf(2, 3) int dev_set_name(struct device *dev, const char *name, ...); #ifdef CONFIG_NUMA -- GitLab From 2d06dfecb132a1cc2e374a44eae83b5c4356b8b4 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 20 Jan 2021 06:02:31 -0500 Subject: [PATCH 1190/2012] dm integrity: fix a crash if "recalculate" used without "internal_hash" Recalculate can only be specified with internal_hash. Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 81df019ab284a..cce203adcf777 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -4235,6 +4235,12 @@ try_smaller_buffer: r = -ENOMEM; goto bad; } + } else { + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { + ti->error = "Recalculate can only be specified with internal_hash"; + r = -EINVAL; + goto bad; + } } ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, -- GitLab From 5ed66306eab6953197c88e082d9ecc0b35e21538 Mon Sep 17 00:00:00 2001 From: George Cherian Date: Tue, 19 Jan 2021 15:31:19 +0530 Subject: [PATCH 1191/2012] octeontx2-af: Add devlink health reporters for NIX Add health reporters for RVU NIX block. NIX Health reporters handle following HW event groups - GENERAL events - ERROR events - RAS events - RVU event Output: # devlink health pci/0002:01:00.0: reporter hw_npa_intr state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true reporter hw_npa_gen state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true reporter hw_npa_err state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true reporter hw_npa_ras state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true reporter hw_nix_intr state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true reporter hw_nix_gen state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true reporter hw_nix_err state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true reporter hw_nix_ras state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true # devlink health dump show pci/0002:01:00.0 reporter hw_nix_intr NIX_AF_RVU: NIX RVU Interrupt Reg : 1 Unmap Slot Error # devlink health dump show pci/0002:01:00.0 reporter hw_nix_gen NIX_AF_GENERAL: NIX General Interrupt Reg : 1 Rx multicast pkt drop Each reporter dump shows the Register value and the description of the cause. Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Jerin Jacob Signed-off-by: George Cherian Signed-off-by: Jakub Kicinski --- .../marvell/octeontx2/af/rvu_devlink.c | 652 +++++++++++++++++- .../marvell/octeontx2/af/rvu_devlink.h | 27 + .../marvell/octeontx2/af/rvu_struct.h | 10 + 3 files changed, 688 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index bc0e4113370e3..10a98bcb7c54e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -52,6 +52,650 @@ static bool rvu_common_request_irq(struct rvu *rvu, int offset, return rvu->irq_allocated[offset]; } +static void rvu_nix_intr_work(struct work_struct *work) +{ + struct rvu_nix_health_reporters *rvu_nix_health_reporter; + + rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, intr_work); + devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_intr_reporter, + "NIX_AF_RVU Error", + rvu_nix_health_reporter->nix_event_ctx); +} + +static irqreturn_t rvu_nix_af_rvu_intr_handler(int irq, void *rvu_irq) +{ + struct rvu_nix_event_ctx *nix_event_context; + struct rvu_devlink *rvu_dl = rvu_irq; + struct rvu *rvu; + int blkaddr; + u64 intr; + + rvu = rvu_dl->rvu; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return IRQ_NONE; + + nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + intr = rvu_read64(rvu, blkaddr, NIX_AF_RVU_INT); + nix_event_context->nix_af_rvu_int = intr; + + /* Clear interrupts */ + rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT, intr); + rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1C, ~0ULL); + queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->intr_work); + + return IRQ_HANDLED; +} + +static void rvu_nix_gen_work(struct work_struct *work) +{ + struct rvu_nix_health_reporters *rvu_nix_health_reporter; + + rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, gen_work); + devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_gen_reporter, + "NIX_AF_GEN Error", + rvu_nix_health_reporter->nix_event_ctx); +} + +static irqreturn_t rvu_nix_af_rvu_gen_handler(int irq, void *rvu_irq) +{ + struct rvu_nix_event_ctx *nix_event_context; + struct rvu_devlink *rvu_dl = rvu_irq; + struct rvu *rvu; + int blkaddr; + u64 intr; + + rvu = rvu_dl->rvu; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return IRQ_NONE; + + nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + intr = rvu_read64(rvu, blkaddr, NIX_AF_GEN_INT); + nix_event_context->nix_af_rvu_gen = intr; + + /* Clear interrupts */ + rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT, intr); + rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1C, ~0ULL); + queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->gen_work); + + return IRQ_HANDLED; +} + +static void rvu_nix_err_work(struct work_struct *work) +{ + struct rvu_nix_health_reporters *rvu_nix_health_reporter; + + rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, err_work); + devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_err_reporter, + "NIX_AF_ERR Error", + rvu_nix_health_reporter->nix_event_ctx); +} + +static irqreturn_t rvu_nix_af_rvu_err_handler(int irq, void *rvu_irq) +{ + struct rvu_nix_event_ctx *nix_event_context; + struct rvu_devlink *rvu_dl = rvu_irq; + struct rvu *rvu; + int blkaddr; + u64 intr; + + rvu = rvu_dl->rvu; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return IRQ_NONE; + + nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + intr = rvu_read64(rvu, blkaddr, NIX_AF_ERR_INT); + nix_event_context->nix_af_rvu_err = intr; + + /* Clear interrupts */ + rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT, intr); + rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1C, ~0ULL); + queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->err_work); + + return IRQ_HANDLED; +} + +static void rvu_nix_ras_work(struct work_struct *work) +{ + struct rvu_nix_health_reporters *rvu_nix_health_reporter; + + rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, ras_work); + devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_ras_reporter, + "NIX_AF_RAS Error", + rvu_nix_health_reporter->nix_event_ctx); +} + +static irqreturn_t rvu_nix_af_rvu_ras_handler(int irq, void *rvu_irq) +{ + struct rvu_nix_event_ctx *nix_event_context; + struct rvu_devlink *rvu_dl = rvu_irq; + struct rvu *rvu; + int blkaddr; + u64 intr; + + rvu = rvu_dl->rvu; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return IRQ_NONE; + + nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + intr = rvu_read64(rvu, blkaddr, NIX_AF_ERR_INT); + nix_event_context->nix_af_rvu_ras = intr; + + /* Clear interrupts */ + rvu_write64(rvu, blkaddr, NIX_AF_RAS, intr); + rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1C, ~0ULL); + queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->ras_work); + + return IRQ_HANDLED; +} + +static void rvu_nix_unregister_interrupts(struct rvu *rvu) +{ + struct rvu_devlink *rvu_dl = rvu->rvu_dl; + int offs, i, blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return; + + offs = rvu_read64(rvu, blkaddr, NIX_PRIV_AF_INT_CFG) & 0x3ff; + if (!offs) + return; + + rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1C, ~0ULL); + rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1C, ~0ULL); + rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1C, ~0ULL); + rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1C, ~0ULL); + + if (rvu->irq_allocated[offs + NIX_AF_INT_VEC_RVU]) { + free_irq(pci_irq_vector(rvu->pdev, offs + NIX_AF_INT_VEC_RVU), + rvu_dl); + rvu->irq_allocated[offs + NIX_AF_INT_VEC_RVU] = false; + } + + for (i = NIX_AF_INT_VEC_AF_ERR; i < NIX_AF_INT_VEC_CNT; i++) + if (rvu->irq_allocated[offs + i]) { + free_irq(pci_irq_vector(rvu->pdev, offs + i), rvu_dl); + rvu->irq_allocated[offs + i] = false; + } +} + +static int rvu_nix_register_interrupts(struct rvu *rvu) +{ + int blkaddr, base; + bool rc; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return blkaddr; + + /* Get NIX AF MSIX vectors offset. */ + base = rvu_read64(rvu, blkaddr, NIX_PRIV_AF_INT_CFG) & 0x3ff; + if (!base) { + dev_warn(rvu->dev, + "Failed to get NIX%d NIX_AF_INT vector offsets\n", + blkaddr - BLKADDR_NIX0); + return 0; + } + /* Register and enable NIX_AF_RVU_INT interrupt */ + rc = rvu_common_request_irq(rvu, base + NIX_AF_INT_VEC_RVU, + "NIX_AF_RVU_INT", + rvu_nix_af_rvu_intr_handler); + if (!rc) + goto err; + rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1S, ~0ULL); + + /* Register and enable NIX_AF_GEN_INT interrupt */ + rc = rvu_common_request_irq(rvu, base + NIX_AF_INT_VEC_GEN, + "NIX_AF_GEN_INT", + rvu_nix_af_rvu_gen_handler); + if (!rc) + goto err; + rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1S, ~0ULL); + + /* Register and enable NIX_AF_ERR_INT interrupt */ + rc = rvu_common_request_irq(rvu, base + NIX_AF_INT_VEC_AF_ERR, + "NIX_AF_ERR_INT", + rvu_nix_af_rvu_err_handler); + if (!rc) + goto err; + rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1S, ~0ULL); + + /* Register and enable NIX_AF_RAS interrupt */ + rc = rvu_common_request_irq(rvu, base + NIX_AF_INT_VEC_POISON, + "NIX_AF_RAS", + rvu_nix_af_rvu_ras_handler); + if (!rc) + goto err; + rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1S, ~0ULL); + + return 0; +err: + rvu_nix_unregister_interrupts(rvu); + return rc; +} + +static int rvu_nix_report_show(struct devlink_fmsg *fmsg, void *ctx, + enum nix_af_rvu_health health_reporter) +{ + struct rvu_nix_event_ctx *nix_event_context; + u64 intr_val; + int err; + + nix_event_context = ctx; + switch (health_reporter) { + case NIX_AF_RVU_INTR: + intr_val = nix_event_context->nix_af_rvu_int; + err = rvu_report_pair_start(fmsg, "NIX_AF_RVU"); + if (err) + return err; + err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX RVU Interrupt Reg ", + nix_event_context->nix_af_rvu_int); + if (err) + return err; + if (intr_val & BIT_ULL(0)) { + err = devlink_fmsg_string_put(fmsg, "\n\tUnmap Slot Error"); + if (err) + return err; + } + err = rvu_report_pair_end(fmsg); + if (err) + return err; + break; + case NIX_AF_RVU_GEN: + intr_val = nix_event_context->nix_af_rvu_gen; + err = rvu_report_pair_start(fmsg, "NIX_AF_GENERAL"); + if (err) + return err; + err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX General Interrupt Reg ", + nix_event_context->nix_af_rvu_gen); + if (err) + return err; + if (intr_val & BIT_ULL(0)) { + err = devlink_fmsg_string_put(fmsg, "\n\tRx multicast pkt drop"); + if (err) + return err; + } + if (intr_val & BIT_ULL(1)) { + err = devlink_fmsg_string_put(fmsg, "\n\tRx mirror pkt drop"); + if (err) + return err; + } + if (intr_val & BIT_ULL(4)) { + err = devlink_fmsg_string_put(fmsg, "\n\tSMQ flush done"); + if (err) + return err; + } + err = rvu_report_pair_end(fmsg); + if (err) + return err; + break; + case NIX_AF_RVU_ERR: + intr_val = nix_event_context->nix_af_rvu_err; + err = rvu_report_pair_start(fmsg, "NIX_AF_ERR"); + if (err) + return err; + err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX Error Interrupt Reg ", + nix_event_context->nix_af_rvu_err); + if (err) + return err; + if (intr_val & BIT_ULL(14)) { + err = devlink_fmsg_string_put(fmsg, "\n\tFault on NIX_AQ_INST_S read"); + if (err) + return err; + } + if (intr_val & BIT_ULL(13)) { + err = devlink_fmsg_string_put(fmsg, "\n\tFault on NIX_AQ_RES_S write"); + if (err) + return err; + } + if (intr_val & BIT_ULL(12)) { + err = devlink_fmsg_string_put(fmsg, "\n\tAQ Doorbell Error"); + if (err) + return err; + } + if (intr_val & BIT_ULL(6)) { + err = devlink_fmsg_string_put(fmsg, "\n\tRx on unmapped PF_FUNC"); + if (err) + return err; + } + if (intr_val & BIT_ULL(5)) { + err = devlink_fmsg_string_put(fmsg, "\n\tRx multicast replication error"); + if (err) + return err; + } + if (intr_val & BIT_ULL(4)) { + err = devlink_fmsg_string_put(fmsg, "\n\tFault on NIX_RX_MCE_S read"); + if (err) + return err; + } + if (intr_val & BIT_ULL(3)) { + err = devlink_fmsg_string_put(fmsg, "\n\tFault on multicast WQE read"); + if (err) + return err; + } + if (intr_val & BIT_ULL(2)) { + err = devlink_fmsg_string_put(fmsg, "\n\tFault on mirror WQE read"); + if (err) + return err; + } + if (intr_val & BIT_ULL(1)) { + err = devlink_fmsg_string_put(fmsg, "\n\tFault on mirror pkt write"); + if (err) + return err; + } + if (intr_val & BIT_ULL(0)) { + err = devlink_fmsg_string_put(fmsg, "\n\tFault on multicast pkt write"); + if (err) + return err; + } + err = rvu_report_pair_end(fmsg); + if (err) + return err; + break; + case NIX_AF_RVU_RAS: + intr_val = nix_event_context->nix_af_rvu_err; + err = rvu_report_pair_start(fmsg, "NIX_AF_RAS"); + if (err) + return err; + err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX RAS Interrupt Reg ", + nix_event_context->nix_af_rvu_err); + if (err) + return err; + err = devlink_fmsg_string_put(fmsg, "\n\tPoison Data on:"); + if (err) + return err; + if (intr_val & BIT_ULL(34)) { + err = devlink_fmsg_string_put(fmsg, "\n\tNIX_AQ_INST_S"); + if (err) + return err; + } + if (intr_val & BIT_ULL(33)) { + err = devlink_fmsg_string_put(fmsg, "\n\tNIX_AQ_RES_S"); + if (err) + return err; + } + if (intr_val & BIT_ULL(32)) { + err = devlink_fmsg_string_put(fmsg, "\n\tHW ctx"); + if (err) + return err; + } + if (intr_val & BIT_ULL(4)) { + err = devlink_fmsg_string_put(fmsg, "\n\tPacket from mirror buffer"); + if (err) + return err; + } + if (intr_val & BIT_ULL(3)) { + err = devlink_fmsg_string_put(fmsg, "\n\tPacket from multicast buffer"); + + if (err) + return err; + } + if (intr_val & BIT_ULL(2)) { + err = devlink_fmsg_string_put(fmsg, "\n\tWQE read from mirror buffer"); + if (err) + return err; + } + if (intr_val & BIT_ULL(1)) { + err = devlink_fmsg_string_put(fmsg, "\n\tWQE read from multicast buffer"); + if (err) + return err; + } + if (intr_val & BIT_ULL(0)) { + err = devlink_fmsg_string_put(fmsg, "\n\tNIX_RX_MCE_S read"); + if (err) + return err; + } + err = rvu_report_pair_end(fmsg); + if (err) + return err; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int rvu_hw_nix_intr_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *ctx, + struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_devlink *rvu_dl = rvu->rvu_dl; + struct rvu_nix_event_ctx *nix_ctx; + + nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + + return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_INTR) : + rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_INTR); +} + +static int rvu_hw_nix_intr_recover(struct devlink_health_reporter *reporter, + void *ctx, struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_nix_event_ctx *nix_event_ctx = ctx; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return blkaddr; + + if (nix_event_ctx->nix_af_rvu_int) + rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1S, ~0ULL); + + return 0; +} + +static int rvu_hw_nix_gen_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *ctx, + struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_devlink *rvu_dl = rvu->rvu_dl; + struct rvu_nix_event_ctx *nix_ctx; + + nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + + return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_GEN) : + rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_GEN); +} + +static int rvu_hw_nix_gen_recover(struct devlink_health_reporter *reporter, + void *ctx, struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_nix_event_ctx *nix_event_ctx = ctx; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return blkaddr; + + if (nix_event_ctx->nix_af_rvu_gen) + rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1S, ~0ULL); + + return 0; +} + +static int rvu_hw_nix_err_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *ctx, + struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_devlink *rvu_dl = rvu->rvu_dl; + struct rvu_nix_event_ctx *nix_ctx; + + nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + + return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_ERR) : + rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_ERR); +} + +static int rvu_hw_nix_err_recover(struct devlink_health_reporter *reporter, + void *ctx, struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_nix_event_ctx *nix_event_ctx = ctx; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return blkaddr; + + if (nix_event_ctx->nix_af_rvu_err) + rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1S, ~0ULL); + + return 0; +} + +static int rvu_hw_nix_ras_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *ctx, + struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_devlink *rvu_dl = rvu->rvu_dl; + struct rvu_nix_event_ctx *nix_ctx; + + nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + + return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_RAS) : + rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_RAS); +} + +static int rvu_hw_nix_ras_recover(struct devlink_health_reporter *reporter, + void *ctx, struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_nix_event_ctx *nix_event_ctx = ctx; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return blkaddr; + + if (nix_event_ctx->nix_af_rvu_int) + rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1S, ~0ULL); + + return 0; +} + +RVU_REPORTERS(hw_nix_intr); +RVU_REPORTERS(hw_nix_gen); +RVU_REPORTERS(hw_nix_err); +RVU_REPORTERS(hw_nix_ras); + +static void rvu_nix_health_reporters_destroy(struct rvu_devlink *rvu_dl); + +static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) +{ + struct rvu_nix_health_reporters *rvu_reporters; + struct rvu_nix_event_ctx *nix_event_context; + struct rvu *rvu = rvu_dl->rvu; + + rvu_reporters = kzalloc(sizeof(*rvu_reporters), GFP_KERNEL); + if (!rvu_reporters) + return -ENOMEM; + + rvu_dl->rvu_nix_health_reporter = rvu_reporters; + nix_event_context = kzalloc(sizeof(*nix_event_context), GFP_KERNEL); + if (!nix_event_context) + return -ENOMEM; + + rvu_reporters->nix_event_ctx = nix_event_context; + rvu_reporters->rvu_hw_nix_intr_reporter = + devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_intr_reporter_ops, 0, rvu); + if (IS_ERR(rvu_reporters->rvu_hw_nix_intr_reporter)) { + dev_warn(rvu->dev, "Failed to create hw_nix_intr reporter, err=%ld\n", + PTR_ERR(rvu_reporters->rvu_hw_nix_intr_reporter)); + return PTR_ERR(rvu_reporters->rvu_hw_nix_intr_reporter); + } + + rvu_reporters->rvu_hw_nix_gen_reporter = + devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_gen_reporter_ops, 0, rvu); + if (IS_ERR(rvu_reporters->rvu_hw_nix_gen_reporter)) { + dev_warn(rvu->dev, "Failed to create hw_nix_gen reporter, err=%ld\n", + PTR_ERR(rvu_reporters->rvu_hw_nix_gen_reporter)); + return PTR_ERR(rvu_reporters->rvu_hw_nix_gen_reporter); + } + + rvu_reporters->rvu_hw_nix_err_reporter = + devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_err_reporter_ops, 0, rvu); + if (IS_ERR(rvu_reporters->rvu_hw_nix_err_reporter)) { + dev_warn(rvu->dev, "Failed to create hw_nix_err reporter, err=%ld\n", + PTR_ERR(rvu_reporters->rvu_hw_nix_err_reporter)); + return PTR_ERR(rvu_reporters->rvu_hw_nix_err_reporter); + } + + rvu_reporters->rvu_hw_nix_ras_reporter = + devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_ras_reporter_ops, 0, rvu); + if (IS_ERR(rvu_reporters->rvu_hw_nix_ras_reporter)) { + dev_warn(rvu->dev, "Failed to create hw_nix_ras reporter, err=%ld\n", + PTR_ERR(rvu_reporters->rvu_hw_nix_ras_reporter)); + return PTR_ERR(rvu_reporters->rvu_hw_nix_ras_reporter); + } + + rvu_dl->devlink_wq = create_workqueue("rvu_devlink_wq"); + if (!rvu_dl->devlink_wq) + goto err; + + INIT_WORK(&rvu_reporters->intr_work, rvu_nix_intr_work); + INIT_WORK(&rvu_reporters->gen_work, rvu_nix_gen_work); + INIT_WORK(&rvu_reporters->err_work, rvu_nix_err_work); + INIT_WORK(&rvu_reporters->ras_work, rvu_nix_ras_work); + + return 0; +err: + rvu_nix_health_reporters_destroy(rvu_dl); + return -ENOMEM; +} + +static int rvu_nix_health_reporters_create(struct rvu_devlink *rvu_dl) +{ + struct rvu *rvu = rvu_dl->rvu; + int err; + + err = rvu_nix_register_reporters(rvu_dl); + if (err) { + dev_warn(rvu->dev, "Failed to create nix reporter, err =%d\n", + err); + return err; + } + rvu_nix_register_interrupts(rvu); + + return 0; +} + +static void rvu_nix_health_reporters_destroy(struct rvu_devlink *rvu_dl) +{ + struct rvu_nix_health_reporters *nix_reporters; + struct rvu *rvu = rvu_dl->rvu; + + nix_reporters = rvu_dl->rvu_nix_health_reporter; + + if (!nix_reporters->rvu_hw_nix_ras_reporter) + return; + if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_intr_reporter)) + devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_intr_reporter); + + if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_gen_reporter)) + devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_gen_reporter); + + if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_err_reporter)) + devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_err_reporter); + + if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_ras_reporter)) + devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_ras_reporter); + + rvu_nix_unregister_interrupts(rvu); + kfree(rvu_dl->rvu_nix_health_reporter->nix_event_ctx); + kfree(rvu_dl->rvu_nix_health_reporter); +} + static void rvu_npa_intr_work(struct work_struct *work) { struct rvu_npa_health_reporters *rvu_npa_health_reporter; @@ -698,9 +1342,14 @@ static void rvu_npa_health_reporters_destroy(struct rvu_devlink *rvu_dl) static int rvu_health_reporters_create(struct rvu *rvu) { struct rvu_devlink *rvu_dl; + int err; rvu_dl = rvu->rvu_dl; - return rvu_npa_health_reporters_create(rvu_dl); + err = rvu_npa_health_reporters_create(rvu_dl); + if (err) + return err; + + return rvu_nix_health_reporters_create(rvu_dl); } static void rvu_health_reporters_destroy(struct rvu *rvu) @@ -712,6 +1361,7 @@ static void rvu_health_reporters_destroy(struct rvu *rvu) rvu_dl = rvu->rvu_dl; rvu_npa_health_reporters_destroy(rvu_dl); + rvu_nix_health_reporters_destroy(rvu_dl); } static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h index d7578fa92ac13..471e57dedb20a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h @@ -41,11 +41,38 @@ struct rvu_npa_health_reporters { struct work_struct ras_work; }; +enum nix_af_rvu_health { + NIX_AF_RVU_INTR, + NIX_AF_RVU_GEN, + NIX_AF_RVU_ERR, + NIX_AF_RVU_RAS, +}; + +struct rvu_nix_event_ctx { + u64 nix_af_rvu_int; + u64 nix_af_rvu_gen; + u64 nix_af_rvu_err; + u64 nix_af_rvu_ras; +}; + +struct rvu_nix_health_reporters { + struct rvu_nix_event_ctx *nix_event_ctx; + struct devlink_health_reporter *rvu_hw_nix_intr_reporter; + struct work_struct intr_work; + struct devlink_health_reporter *rvu_hw_nix_gen_reporter; + struct work_struct gen_work; + struct devlink_health_reporter *rvu_hw_nix_err_reporter; + struct work_struct err_work; + struct devlink_health_reporter *rvu_hw_nix_ras_reporter; + struct work_struct ras_work; +}; + struct rvu_devlink { struct devlink *dl; struct rvu *rvu; struct workqueue_struct *devlink_wq; struct rvu_npa_health_reporters *rvu_npa_health_reporter; + struct rvu_nix_health_reporters *rvu_nix_health_reporter; }; /* Devlink APIs */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h index e2153d47c3739..5e15f4fc11e3d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h @@ -74,6 +74,16 @@ enum npa_af_int_vec_e { NPA_AF_INT_VEC_CNT = 0x5, }; +/* NIX Admin function Interrupt Vector Enumeration */ +enum nix_af_int_vec_e { + NIX_AF_INT_VEC_RVU = 0x0, + NIX_AF_INT_VEC_GEN = 0x1, + NIX_AF_INT_VEC_AQ_DONE = 0x2, + NIX_AF_INT_VEC_AF_ERR = 0x3, + NIX_AF_INT_VEC_POISON = 0x4, + NIX_AF_INT_VEC_CNT = 0x5, +}; + /** * RVU PF Interrupt Vector Enumeration */ -- GitLab From d41b3365bda7845b28c5a06eef19be0b353cf128 Mon Sep 17 00:00:00 2001 From: George Cherian Date: Tue, 19 Jan 2021 15:31:20 +0530 Subject: [PATCH 1192/2012] docs: octeontx2: Add Documentation for NIX health reporters Add devlink health reporter documentation for NIX block. Signed-off-by: George Cherian Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeontx2.rst | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst index 61e850460e18f..dd5cd69467bea 100644 --- a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst +++ b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst @@ -217,3 +217,73 @@ For example:: NPA_AF_ERR: NPA Error Interrupt Reg : 4096 AQ Doorbell Error + + +NIX Reporters +------------- +The NIX reporters are responsible for reporting and recovering the following group of errors: + +1. GENERAL events + + - Receive mirror/multicast packet drop due to insufficient buffer. + - SMQ Flush operation. + +2. ERROR events + + - Memory Fault due to WQE read/write from multicast/mirror buffer. + - Receive multicast/mirror replication list error. + - Receive packet on an unmapped PF. + - Fault due to NIX_AQ_INST_S read or NIX_AQ_RES_S write. + - AQ Doorbell Error. + +3. RAS events + + - RAS Error Reporting for NIX Receive Multicast/Mirror Entry Structure. + - RAS Error Reporting for WQE/Packet Data read from Multicast/Mirror Buffer.. + - RAS Error Reporting for NIX_AQ_INST_S/NIX_AQ_RES_S. + +4. RVU events + + - Error due to unmapped slot. + +Sample Output:: + + ~# ./devlink health + pci/0002:01:00.0: + reporter hw_npa_intr + state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true + reporter hw_npa_gen + state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true + reporter hw_npa_err + state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true + reporter hw_npa_ras + state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true + reporter hw_nix_intr + state healthy error 1121 recover 1121 last_dump_date 2021-01-19 last_dump_time 05:42:26 grace_period 0 auto_recover true auto_dump true + reporter hw_nix_gen + state healthy error 949 recover 949 last_dump_date 2021-01-19 last_dump_time 05:42:43 grace_period 0 auto_recover true auto_dump true + reporter hw_nix_err + state healthy error 1147 recover 1147 last_dump_date 2021-01-19 last_dump_time 05:42:59 grace_period 0 auto_recover true auto_dump true + reporter hw_nix_ras + state healthy error 409 recover 409 last_dump_date 2021-01-19 last_dump_time 05:43:16 grace_period 0 auto_recover true auto_dump true + +Each reporter dumps the + + - Error Type + - Error Register value + - Reason in words + +For example:: + + ~# devlink health dump show pci/0002:01:00.0 reporter hw_nix_intr + NIX_AF_RVU: + NIX RVU Interrupt Reg : 1 + Unmap Slot Error + ~# devlink health dump show pci/0002:01:00.0 reporter hw_nix_gen + NIX_AF_GENERAL: + NIX General Interrupt Reg : 1 + Rx multicast pkt drop + ~# devlink health dump show pci/0002:01:00.0 reporter hw_nix_err + NIX_AF_ERR: + NIX Error Interrupt Reg : 64 + Rx on unmapped PF_FUNC -- GitLab From 5c02406428d5219c367c5f53457698c58bc5f917 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 20 Jan 2021 13:59:11 -0500 Subject: [PATCH 1193/2012] dm integrity: conditionally disable "recalculate" feature Otherwise a malicious user could (ab)use the "recalculate" feature that makes dm-integrity calculate the checksums in the background while the device is already usable. When the system restarts before all checksums have been calculated, the calculation continues where it was interrupted even if the recalculate feature is not requested the next time the dm device is set up. Disable recalculating if we use internal_hash or journal_hash with a key (e.g. HMAC) and we don't have the "legacy_recalculate" flag. This may break activation of a volume, created by an older kernel, that is not yet fully recalculated -- if this happens, the user should add the "legacy_recalculate" flag to constructor parameters. Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Reported-by: Daniel Glockner Signed-off-by: Mike Snitzer --- .../device-mapper/dm-integrity.rst | 12 ++++++--- drivers/md/dm-integrity.c | 26 +++++++++++++++++-- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/device-mapper/dm-integrity.rst b/Documentation/admin-guide/device-mapper/dm-integrity.rst index 4e6f504474ac2..2cc5488acbd97 100644 --- a/Documentation/admin-guide/device-mapper/dm-integrity.rst +++ b/Documentation/admin-guide/device-mapper/dm-integrity.rst @@ -177,14 +177,20 @@ bitmap_flush_interval:number The bitmap flush interval in milliseconds. The metadata buffers are synchronized when this interval expires. +allow_discards + Allow block discard requests (a.k.a. TRIM) for the integrity device. + Discards are only allowed to devices using internal hash. + fix_padding Use a smaller padding of the tag area that is more space-efficient. If this option is not present, large padding is used - that is for compatibility with older kernels. -allow_discards - Allow block discard requests (a.k.a. TRIM) for the integrity device. - Discards are only allowed to devices using internal hash. +legacy_recalculate + Allow recalculating of volumes with HMAC keys. This is disabled by + default for security reasons - an attacker could modify the volume, + set recalc_sector to zero, and the kernel would not detect the + modification. The journal mode (D/J), buffer_sectors, journal_watermark, commit_time and allow_discards can be changed when reloading the target (load an inactive diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index cce203adcf777..b64fede032dc5 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -257,8 +257,9 @@ struct dm_integrity_c { bool journal_uptodate; bool just_formatted; bool recalculate_flag; - bool fix_padding; bool discard; + bool fix_padding; + bool legacy_recalculate; struct alg_spec internal_hash_alg; struct alg_spec journal_crypt_alg; @@ -386,6 +387,14 @@ static int dm_integrity_failed(struct dm_integrity_c *ic) return READ_ONCE(ic->failed); } +static bool dm_integrity_disable_recalculate(struct dm_integrity_c *ic) +{ + if ((ic->internal_hash_alg.key || ic->journal_mac_alg.key) && + !ic->legacy_recalculate) + return true; + return false; +} + static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i, unsigned j, unsigned char seq) { @@ -3140,6 +3149,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, arg_count += !!ic->journal_crypt_alg.alg_string; arg_count += !!ic->journal_mac_alg.alg_string; arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0; + arg_count += ic->legacy_recalculate; DMEMIT("%s %llu %u %c %u", ic->dev->name, ic->start, ic->tag_size, ic->mode, arg_count); if (ic->meta_dev) @@ -3163,6 +3173,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, } if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0) DMEMIT(" fix_padding"); + if (ic->legacy_recalculate) + DMEMIT(" legacy_recalculate"); #define EMIT_ALG(a, n) \ do { \ @@ -3792,7 +3804,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) unsigned extra_args; struct dm_arg_set as; static const struct dm_arg _args[] = { - {0, 15, "Invalid number of feature args"}, + {0, 16, "Invalid number of feature args"}, }; unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; bool should_write_sb; @@ -3940,6 +3952,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) ic->discard = true; } else if (!strcmp(opt_string, "fix_padding")) { ic->fix_padding = true; + } else if (!strcmp(opt_string, "legacy_recalculate")) { + ic->legacy_recalculate = true; } else { r = -EINVAL; ti->error = "Invalid argument"; @@ -4243,6 +4257,14 @@ try_smaller_buffer: } } + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && + le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors && + dm_integrity_disable_recalculate(ic)) { + ti->error = "Recalculating with HMAC is disabled for security reasons - if you really need it, use the argument \"legacy_recalculate\""; + r = -EOPNOTSUPP; + goto bad; + } + ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev, 1U << (SECTOR_SHIFT + ic->log2_buffer_sectors), 1, 0, NULL, NULL); if (IS_ERR(ic->bufio)) { -- GitLab From 004b8ae9e2de55ca7857ba8471209dd3179e088c Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Tue, 19 Jan 2021 20:40:15 +0000 Subject: [PATCH 1194/2012] dm crypt: fix copy and paste bug in crypt_alloc_req_aead In commit d68b29584c25 ("dm crypt: use GFP_ATOMIC when allocating crypto requests from softirq") code was incorrectly copy and pasted from crypt_alloc_req_skcipher()'s crypto request allocation code to crypt_alloc_req_aead(). It is OK from runtime perspective as both simple encryption request pointer and AEAD request pointer are part of a union, but may confuse code reviewers. Fixes: d68b29584c25 ("dm crypt: use GFP_ATOMIC when allocating crypto requests from softirq") Cc: stable@vger.kernel.org # v5.9+ Reported-by: Pavel Machek Signed-off-by: Ignat Korchagin Signed-off-by: Mike Snitzer --- drivers/md/dm-crypt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 8c874710f0bcf..5a55617a08e68 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1481,9 +1481,9 @@ static int crypt_alloc_req_skcipher(struct crypt_config *cc, static int crypt_alloc_req_aead(struct crypt_config *cc, struct convert_context *ctx) { - if (!ctx->r.req) { - ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO); - if (!ctx->r.req) + if (!ctx->r.req_aead) { + ctx->r.req_aead = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO); + if (!ctx->r.req_aead) return -ENOMEM; } -- GitLab From 809b1e4945774c9ec5619a8f4e2189b7b3833c0c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Thu, 21 Jan 2021 18:50:56 +0100 Subject: [PATCH 1195/2012] dm: avoid filesystem lookup in dm_get_dev_t() This reverts commit 644bda6f3460 ("dm table: fall back to getting device using name_to_dev_t()") dm_get_dev_t() is just used to convert an arbitrary 'path' string into a dev_t. It doesn't presume that the device is present; that check will be done later, as the only caller is dm_get_device(), which does a dm_get_table_device() later on, which will properly open the device. So if the path string already _is_ in major:minor representation we can convert it directly, avoiding a recursion into the filesystem to lookup the block device. This avoids a hang in multipath_message() when the filesystem is inaccessible. Fixes: 644bda6f3460 ("dm table: fall back to getting device using name_to_dev_t()") Cc: stable@vger.kernel.org Signed-off-by: Hannes Reinecke Signed-off-by: Martin Wilck Reviewed-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 188f41287f180..4acf2342f7adf 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -363,14 +363,23 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, { int r; dev_t dev; + unsigned int major, minor; + char dummy; struct dm_dev_internal *dd; struct dm_table *t = ti->table; BUG_ON(!t); - dev = dm_get_dev_t(path); - if (!dev) - return -ENODEV; + if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) { + /* Extract the major/minor numbers */ + dev = MKDEV(major, minor); + if (MAJOR(dev) != major || MINOR(dev) != minor) + return -EOVERFLOW; + } else { + dev = dm_get_dev_t(path); + if (!dev) + return -ENODEV; + } dd = find_device(&t->devices, dev); if (!dd) { -- GitLab From 0a950ce029c855060ceaea5a5eac511497c5619e Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:07:46 +0100 Subject: [PATCH 1196/2012] ethernet: ucc_geth: remove unused read of temoder field In theory, such a read-after-write might be required by the hardware, but nothing in the data sheet suggests that to be the case. The name test also suggests that it's some debug leftover. Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 6d853f018d531..d4b775870f4e9 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -2359,7 +2359,6 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) u32 init_enet_pram_offset, cecr_subblock, command; u32 ifstat, i, j, size, l2qt, l3qt; u16 temoder = UCC_GETH_TEMODER_INIT; - u16 test; u8 function_code = 0; u8 __iomem *endOfRing; u8 numThreadsRxNumerical, numThreadsTxNumerical; @@ -2667,8 +2666,6 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) temoder |= ((ug_info->numQueuesTx - 1) << TEMODER_NUM_OF_QUEUES_SHIFT); out_be16(&ugeth->p_tx_glbl_pram->temoder, temoder); - test = in_be16(&ugeth->p_tx_glbl_pram->temoder); - /* Function code register value to be used later */ function_code = UCC_BMR_BO_BE | UCC_BMR_GBL; /* Required for QE */ -- GitLab From e8e507a8ac90d48053dfdea9d4855495b0204956 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:07:47 +0100 Subject: [PATCH 1197/2012] soc: fsl: qe: make cpm_muram_offset take a const void* argument Allow passing const-qualified pointers without requiring a cast in the caller. Signed-off-by: Rasmus Villemoes Acked-by: Li Yang Signed-off-by: Jakub Kicinski --- drivers/soc/fsl/qe/qe_common.c | 2 +- include/soc/fsl/qe/qe.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/soc/fsl/qe/qe_common.c b/drivers/soc/fsl/qe/qe_common.c index 497a7e0fd0272..b81cd1b2e4a05 100644 --- a/drivers/soc/fsl/qe/qe_common.c +++ b/drivers/soc/fsl/qe/qe_common.c @@ -223,7 +223,7 @@ void __iomem *cpm_muram_addr(unsigned long offset) } EXPORT_SYMBOL(cpm_muram_addr); -unsigned long cpm_muram_offset(void __iomem *addr) +unsigned long cpm_muram_offset(const void __iomem *addr) { return addr - (void __iomem *)muram_vbase; } diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h index 3feddfec9f87d..8ee3747433c0c 100644 --- a/include/soc/fsl/qe/qe.h +++ b/include/soc/fsl/qe/qe.h @@ -102,7 +102,7 @@ s32 cpm_muram_alloc(unsigned long size, unsigned long align); void cpm_muram_free(s32 offset); s32 cpm_muram_alloc_fixed(unsigned long offset, unsigned long size); void __iomem *cpm_muram_addr(unsigned long offset); -unsigned long cpm_muram_offset(void __iomem *addr); +unsigned long cpm_muram_offset(const void __iomem *addr); dma_addr_t cpm_muram_dma(void __iomem *addr); #else static inline s32 cpm_muram_alloc(unsigned long size, @@ -126,7 +126,7 @@ static inline void __iomem *cpm_muram_addr(unsigned long offset) return NULL; } -static inline unsigned long cpm_muram_offset(void __iomem *addr) +static inline unsigned long cpm_muram_offset(const void __iomem *addr) { return -ENOSYS; } -- GitLab From 155ea0dc8dcb6066aaf4af5addd005b8968ce820 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:07:48 +0100 Subject: [PATCH 1198/2012] soc: fsl: qe: store muram_vbase as a void pointer instead of u8 The two functions cpm_muram_offset() and cpm_muram_dma() both need a cast currently, one casts muram_vbase to do the pointer arithmetic on void pointers, the other casts the passed-in address u8*. It's simpler and more consistent to just always use void* and drop all the casting. Signed-off-by: Rasmus Villemoes Acked-by: Li Yang Signed-off-by: Jakub Kicinski --- drivers/soc/fsl/qe/qe_common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/soc/fsl/qe/qe_common.c b/drivers/soc/fsl/qe/qe_common.c index b81cd1b2e4a05..236d962d45f10 100644 --- a/drivers/soc/fsl/qe/qe_common.c +++ b/drivers/soc/fsl/qe/qe_common.c @@ -27,7 +27,7 @@ static struct gen_pool *muram_pool; static spinlock_t cpm_muram_lock; -static u8 __iomem *muram_vbase; +static void __iomem *muram_vbase; static phys_addr_t muram_pbase; struct muram_block { @@ -225,7 +225,7 @@ EXPORT_SYMBOL(cpm_muram_addr); unsigned long cpm_muram_offset(const void __iomem *addr) { - return addr - (void __iomem *)muram_vbase; + return addr - muram_vbase; } EXPORT_SYMBOL(cpm_muram_offset); @@ -235,6 +235,6 @@ EXPORT_SYMBOL(cpm_muram_offset); */ dma_addr_t cpm_muram_dma(void __iomem *addr) { - return muram_pbase + ((u8 __iomem *)addr - muram_vbase); + return muram_pbase + (addr - muram_vbase); } EXPORT_SYMBOL(cpm_muram_dma); -- GitLab From 186b8daffb4ec2dabb8a3d93b329b16152a5a100 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:07:49 +0100 Subject: [PATCH 1199/2012] soc: fsl: qe: add cpm_muram_free_addr() helper Add a helper that takes a virtual address rather than the muram offset. This will be used in a couple of places to avoid having to store both the offset and the virtual address, as well as removing NULL checks from the callers. Signed-off-by: Rasmus Villemoes Acked-by: Li Yang Signed-off-by: Jakub Kicinski --- drivers/soc/fsl/qe/qe_common.c | 12 ++++++++++++ include/soc/fsl/qe/qe.h | 5 +++++ 2 files changed, 17 insertions(+) diff --git a/drivers/soc/fsl/qe/qe_common.c b/drivers/soc/fsl/qe/qe_common.c index 236d962d45f10..654e9246ce6b9 100644 --- a/drivers/soc/fsl/qe/qe_common.c +++ b/drivers/soc/fsl/qe/qe_common.c @@ -238,3 +238,15 @@ dma_addr_t cpm_muram_dma(void __iomem *addr) return muram_pbase + (addr - muram_vbase); } EXPORT_SYMBOL(cpm_muram_dma); + +/* + * As cpm_muram_free, but takes the virtual address rather than the + * muram offset. + */ +void cpm_muram_free_addr(const void __iomem *addr) +{ + if (!addr) + return; + cpm_muram_free(cpm_muram_offset(addr)); +} +EXPORT_SYMBOL(cpm_muram_free_addr); diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h index 8ee3747433c0c..66f1afc393d17 100644 --- a/include/soc/fsl/qe/qe.h +++ b/include/soc/fsl/qe/qe.h @@ -104,6 +104,7 @@ s32 cpm_muram_alloc_fixed(unsigned long offset, unsigned long size); void __iomem *cpm_muram_addr(unsigned long offset); unsigned long cpm_muram_offset(const void __iomem *addr); dma_addr_t cpm_muram_dma(void __iomem *addr); +void cpm_muram_free_addr(const void __iomem *addr); #else static inline s32 cpm_muram_alloc(unsigned long size, unsigned long align) @@ -135,6 +136,9 @@ static inline dma_addr_t cpm_muram_dma(void __iomem *addr) { return 0; } +static inline void cpm_muram_free_addr(const void __iomem *addr) +{ +} #endif /* defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE) */ /* QE PIO */ @@ -239,6 +243,7 @@ static inline int qe_alive_during_sleep(void) #define qe_muram_addr cpm_muram_addr #define qe_muram_offset cpm_muram_offset #define qe_muram_dma cpm_muram_dma +#define qe_muram_free_addr cpm_muram_free_addr #ifdef CONFIG_PPC32 #define qe_iowrite8(val, addr) out_8(addr, val) -- GitLab From 03588e92c07fc57c048309004788f3fe3a7da926 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:07:50 +0100 Subject: [PATCH 1200/2012] ethernet: ucc_geth: use qe_muram_free_addr() This removes the explicit NULL checks, and allows us to stop storing at least some of the _offset values separately. Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 77 ++++++++++------------- 1 file changed, 33 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index d4b775870f4e9..14c58667992e8 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1921,50 +1921,39 @@ static void ucc_geth_memclean(struct ucc_geth_private *ugeth) ugeth->uccf = NULL; } - if (ugeth->p_thread_data_tx) { - qe_muram_free(ugeth->thread_dat_tx_offset); - ugeth->p_thread_data_tx = NULL; - } - if (ugeth->p_thread_data_rx) { - qe_muram_free(ugeth->thread_dat_rx_offset); - ugeth->p_thread_data_rx = NULL; - } - if (ugeth->p_exf_glbl_param) { - qe_muram_free(ugeth->exf_glbl_param_offset); - ugeth->p_exf_glbl_param = NULL; - } - if (ugeth->p_rx_glbl_pram) { - qe_muram_free(ugeth->rx_glbl_pram_offset); - ugeth->p_rx_glbl_pram = NULL; - } - if (ugeth->p_tx_glbl_pram) { - qe_muram_free(ugeth->tx_glbl_pram_offset); - ugeth->p_tx_glbl_pram = NULL; - } - if (ugeth->p_send_q_mem_reg) { - qe_muram_free(ugeth->send_q_mem_reg_offset); - ugeth->p_send_q_mem_reg = NULL; - } - if (ugeth->p_scheduler) { - qe_muram_free(ugeth->scheduler_offset); - ugeth->p_scheduler = NULL; - } - if (ugeth->p_tx_fw_statistics_pram) { - qe_muram_free(ugeth->tx_fw_statistics_pram_offset); - ugeth->p_tx_fw_statistics_pram = NULL; - } - if (ugeth->p_rx_fw_statistics_pram) { - qe_muram_free(ugeth->rx_fw_statistics_pram_offset); - ugeth->p_rx_fw_statistics_pram = NULL; - } - if (ugeth->p_rx_irq_coalescing_tbl) { - qe_muram_free(ugeth->rx_irq_coalescing_tbl_offset); - ugeth->p_rx_irq_coalescing_tbl = NULL; - } - if (ugeth->p_rx_bd_qs_tbl) { - qe_muram_free(ugeth->rx_bd_qs_tbl_offset); - ugeth->p_rx_bd_qs_tbl = NULL; - } + qe_muram_free_addr(ugeth->p_thread_data_tx); + ugeth->p_thread_data_tx = NULL; + + qe_muram_free_addr(ugeth->p_thread_data_rx); + ugeth->p_thread_data_rx = NULL; + + qe_muram_free_addr(ugeth->p_exf_glbl_param); + ugeth->p_exf_glbl_param = NULL; + + qe_muram_free_addr(ugeth->p_rx_glbl_pram); + ugeth->p_rx_glbl_pram = NULL; + + qe_muram_free_addr(ugeth->p_tx_glbl_pram); + ugeth->p_tx_glbl_pram = NULL; + + qe_muram_free_addr(ugeth->p_send_q_mem_reg); + ugeth->p_send_q_mem_reg = NULL; + + qe_muram_free_addr(ugeth->p_scheduler); + ugeth->p_scheduler = NULL; + + qe_muram_free_addr(ugeth->p_tx_fw_statistics_pram); + ugeth->p_tx_fw_statistics_pram = NULL; + + qe_muram_free_addr(ugeth->p_rx_fw_statistics_pram); + ugeth->p_rx_fw_statistics_pram = NULL; + + qe_muram_free_addr(ugeth->p_rx_irq_coalescing_tbl); + ugeth->p_rx_irq_coalescing_tbl = NULL; + + qe_muram_free_addr(ugeth->p_rx_bd_qs_tbl); + ugeth->p_rx_bd_qs_tbl = NULL; + if (ugeth->p_init_enet_param_shadow) { return_init_enet_entries(ugeth, &(ugeth->p_init_enet_param_shadow-> -- GitLab From 0a71c415297fd7d408834a4585ae7c757bebbe4f Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:07:51 +0100 Subject: [PATCH 1201/2012] ethernet: ucc_geth: remove unnecessary memset_io() calls These buffers have all just been handed out from qe_muram_alloc(), aka cpm_muram_alloc(), and the helper cpm_muram_alloc_common() already does memset_io(cpm_muram_addr(start), 0, size); Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 14c58667992e8..be997b5595770 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -2506,9 +2506,6 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) ugeth->p_tx_glbl_pram = (struct ucc_geth_tx_global_pram __iomem *) qe_muram_addr(ugeth-> tx_glbl_pram_offset); - /* Zero out p_tx_glbl_pram */ - memset_io((void __iomem *)ugeth->p_tx_glbl_pram, 0, sizeof(struct ucc_geth_tx_global_pram)); - /* Fill global PRAM */ /* TQPTR */ @@ -2596,8 +2593,6 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) scheduler_offset); out_be32(&ugeth->p_tx_glbl_pram->schedulerbasepointer, ugeth->scheduler_offset); - /* Zero out p_scheduler */ - memset_io((void __iomem *)ugeth->p_scheduler, 0, sizeof(struct ucc_geth_scheduler)); /* Set values in scheduler */ out_be32(&ugeth->p_scheduler->mblinterval, @@ -2640,9 +2635,6 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) ugeth->p_tx_fw_statistics_pram = (struct ucc_geth_tx_firmware_statistics_pram __iomem *) qe_muram_addr(ugeth->tx_fw_statistics_pram_offset); - /* Zero out p_tx_fw_statistics_pram */ - memset_io((void __iomem *)ugeth->p_tx_fw_statistics_pram, - 0, sizeof(struct ucc_geth_tx_firmware_statistics_pram)); } /* temoder */ @@ -2675,9 +2667,6 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) ugeth->p_rx_glbl_pram = (struct ucc_geth_rx_global_pram __iomem *) qe_muram_addr(ugeth-> rx_glbl_pram_offset); - /* Zero out p_rx_glbl_pram */ - memset_io((void __iomem *)ugeth->p_rx_glbl_pram, 0, sizeof(struct ucc_geth_rx_global_pram)); - /* Fill global PRAM */ /* RQPTR */ @@ -2715,9 +2704,6 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) ugeth->p_rx_fw_statistics_pram = (struct ucc_geth_rx_firmware_statistics_pram __iomem *) qe_muram_addr(ugeth->rx_fw_statistics_pram_offset); - /* Zero out p_rx_fw_statistics_pram */ - memset_io((void __iomem *)ugeth->p_rx_fw_statistics_pram, 0, - sizeof(struct ucc_geth_rx_firmware_statistics_pram)); } /* intCoalescingPtr */ @@ -2803,11 +2789,6 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) (struct ucc_geth_rx_bd_queues_entry __iomem *) qe_muram_addr(ugeth-> rx_bd_qs_tbl_offset); out_be32(&ugeth->p_rx_glbl_pram->rbdqptr, ugeth->rx_bd_qs_tbl_offset); - /* Zero out p_rx_bd_qs_tbl */ - memset_io((void __iomem *)ugeth->p_rx_bd_qs_tbl, - 0, - ug_info->numQueuesRx * (sizeof(struct ucc_geth_rx_bd_queues_entry) + - sizeof(struct ucc_geth_rx_prefetched_bds))); /* Setup the table */ /* Assume BD rings are already established */ -- GitLab From 830c8ddc66df5074075933a172b6698acbadbc7a Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:07:52 +0100 Subject: [PATCH 1202/2012] ethernet: ucc_geth: replace kmalloc+memset by kzalloc Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index be997b5595770..74ee2ed2fbbb6 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -2904,14 +2904,11 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) * allocated resources can be released when the channel is freed. */ if (!(ugeth->p_init_enet_param_shadow = - kmalloc(sizeof(struct ucc_geth_init_pram), GFP_KERNEL))) { + kzalloc(sizeof(struct ucc_geth_init_pram), GFP_KERNEL))) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate memory for p_UccInitEnetParamShadows\n"); return -ENOMEM; } - /* Zero out *p_init_enet_param_shadow */ - memset((char *)ugeth->p_init_enet_param_shadow, - 0, sizeof(struct ucc_geth_init_pram)); /* Fill shadow InitEnet command parameter structure */ -- GitLab From 7d9fe90036f75a766dce76df997d4067e22b93c6 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:07:53 +0100 Subject: [PATCH 1203/2012] ethernet: ucc_geth: remove {rx,tx}_glbl_pram_offset from struct ucc_geth_private These fields are only used within ucc_geth_startup(), so they might as well be local variables in that function rather than being stashed in struct ucc_geth_private. Aside from making that struct a tiny bit smaller, it also shortens some lines (getting rid of pointless casts while here), and fixes the problems with using IS_ERR_VALUE() on a u32 as explained in commit 800cd6fb76f0 ("soc: fsl: qe: change return type of cpm_muram_alloc() to s32"). Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 21 +++++++++------------ drivers/net/ethernet/freescale/ucc_geth.h | 2 -- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 74ee2ed2fbbb6..75466489bf9a7 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -2351,6 +2351,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) u8 function_code = 0; u8 __iomem *endOfRing; u8 numThreadsRxNumerical, numThreadsTxNumerical; + s32 rx_glbl_pram_offset, tx_glbl_pram_offset; ugeth_vdbg("%s: IN", __func__); uccf = ugeth->uccf; @@ -2495,17 +2496,15 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) */ /* Tx global PRAM */ /* Allocate global tx parameter RAM page */ - ugeth->tx_glbl_pram_offset = + tx_glbl_pram_offset = qe_muram_alloc(sizeof(struct ucc_geth_tx_global_pram), UCC_GETH_TX_GLOBAL_PRAM_ALIGNMENT); - if (IS_ERR_VALUE(ugeth->tx_glbl_pram_offset)) { + if (tx_glbl_pram_offset < 0) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate DPRAM memory for p_tx_glbl_pram\n"); return -ENOMEM; } - ugeth->p_tx_glbl_pram = - (struct ucc_geth_tx_global_pram __iomem *) qe_muram_addr(ugeth-> - tx_glbl_pram_offset); + ugeth->p_tx_glbl_pram = qe_muram_addr(tx_glbl_pram_offset); /* Fill global PRAM */ /* TQPTR */ @@ -2656,17 +2655,15 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) /* Rx global PRAM */ /* Allocate global rx parameter RAM page */ - ugeth->rx_glbl_pram_offset = + rx_glbl_pram_offset = qe_muram_alloc(sizeof(struct ucc_geth_rx_global_pram), UCC_GETH_RX_GLOBAL_PRAM_ALIGNMENT); - if (IS_ERR_VALUE(ugeth->rx_glbl_pram_offset)) { + if (rx_glbl_pram_offset < 0) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate DPRAM memory for p_rx_glbl_pram\n"); return -ENOMEM; } - ugeth->p_rx_glbl_pram = - (struct ucc_geth_rx_global_pram __iomem *) qe_muram_addr(ugeth-> - rx_glbl_pram_offset); + ugeth->p_rx_glbl_pram = qe_muram_addr(rx_glbl_pram_offset); /* Fill global PRAM */ /* RQPTR */ @@ -2928,7 +2925,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) ((u32) ug_info->numThreadsTx) << ENET_INIT_PARAM_TGF_SHIFT; ugeth->p_init_enet_param_shadow->rgftgfrxglobal |= - ugeth->rx_glbl_pram_offset | ug_info->riscRx; + rx_glbl_pram_offset | ug_info->riscRx; if ((ug_info->largestexternallookupkeysize != QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_NONE) && (ug_info->largestexternallookupkeysize != @@ -2966,7 +2963,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) } ugeth->p_init_enet_param_shadow->txglobal = - ugeth->tx_glbl_pram_offset | ug_info->riscTx; + tx_glbl_pram_offset | ug_info->riscTx; if ((ret_val = fill_init_enet_entries(ugeth, &(ugeth->p_init_enet_param_shadow-> diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index 11d4bf5dc21f7..6b86217038a3b 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -1165,9 +1165,7 @@ struct ucc_geth_private { struct ucc_geth_exf_global_pram __iomem *p_exf_glbl_param; u32 exf_glbl_param_offset; struct ucc_geth_rx_global_pram __iomem *p_rx_glbl_pram; - u32 rx_glbl_pram_offset; struct ucc_geth_tx_global_pram __iomem *p_tx_glbl_pram; - u32 tx_glbl_pram_offset; struct ucc_geth_send_queue_mem_region __iomem *p_send_q_mem_reg; u32 send_q_mem_reg_offset; struct ucc_geth_thread_data_tx __iomem *p_thread_data_tx; -- GitLab From 632e3f2d9922c04fc179660693417b6d4a9007f1 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:07:54 +0100 Subject: [PATCH 1204/2012] ethernet: ucc_geth: factor out parsing of {rx,tx}-clock{,-name} properties Reduce the code duplication a bit by moving the parsing of rx-clock-name and the fallback handling to a helper function. Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 80 ++++++++++------------- 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 75466489bf9a7..75d1fb0496986 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3646,6 +3646,36 @@ static const struct net_device_ops ucc_geth_netdev_ops = { #endif }; +static int ucc_geth_parse_clock(struct device_node *np, const char *which, + enum qe_clock *out) +{ + const char *sprop; + char buf[24]; + + snprintf(buf, sizeof(buf), "%s-clock-name", which); + sprop = of_get_property(np, buf, NULL); + if (sprop) { + *out = qe_clock_source(sprop); + } else { + u32 val; + + snprintf(buf, sizeof(buf), "%s-clock", which); + if (of_property_read_u32(np, buf, &val)) { + /* If both *-clock-name and *-clock are missing, + * we want to tell people to use *-clock-name. + */ + pr_err("missing %s-clock-name property\n", buf); + return -EINVAL; + } + *out = val; + } + if (*out < QE_CLK_NONE || *out > QE_CLK24) { + pr_err("invalid %s property\n", buf); + return -EINVAL; + } + return 0; +} + static int ucc_geth_probe(struct platform_device* ofdev) { struct device *device = &ofdev->dev; @@ -3656,7 +3686,6 @@ static int ucc_geth_probe(struct platform_device* ofdev) struct resource res; int err, ucc_num, max_speed = 0; const unsigned int *prop; - const char *sprop; const void *mac_addr; phy_interface_t phy_interface; static const int enet_to_speed[] = { @@ -3695,49 +3724,12 @@ static int ucc_geth_probe(struct platform_device* ofdev) ug_info->uf_info.ucc_num = ucc_num; - sprop = of_get_property(np, "rx-clock-name", NULL); - if (sprop) { - ug_info->uf_info.rx_clock = qe_clock_source(sprop); - if ((ug_info->uf_info.rx_clock < QE_CLK_NONE) || - (ug_info->uf_info.rx_clock > QE_CLK24)) { - pr_err("invalid rx-clock-name property\n"); - return -EINVAL; - } - } else { - prop = of_get_property(np, "rx-clock", NULL); - if (!prop) { - /* If both rx-clock-name and rx-clock are missing, - we want to tell people to use rx-clock-name. */ - pr_err("missing rx-clock-name property\n"); - return -EINVAL; - } - if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) { - pr_err("invalid rx-clock property\n"); - return -EINVAL; - } - ug_info->uf_info.rx_clock = *prop; - } - - sprop = of_get_property(np, "tx-clock-name", NULL); - if (sprop) { - ug_info->uf_info.tx_clock = qe_clock_source(sprop); - if ((ug_info->uf_info.tx_clock < QE_CLK_NONE) || - (ug_info->uf_info.tx_clock > QE_CLK24)) { - pr_err("invalid tx-clock-name property\n"); - return -EINVAL; - } - } else { - prop = of_get_property(np, "tx-clock", NULL); - if (!prop) { - pr_err("missing tx-clock-name property\n"); - return -EINVAL; - } - if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) { - pr_err("invalid tx-clock property\n"); - return -EINVAL; - } - ug_info->uf_info.tx_clock = *prop; - } + err = ucc_geth_parse_clock(np, "rx", &ug_info->uf_info.rx_clock); + if (err) + return err; + err = ucc_geth_parse_clock(np, "tx", &ug_info->uf_info.tx_clock); + if (err) + return err; err = of_address_to_resource(np, 0, &res); if (err) -- GitLab From b0292e086beeb741601bd984e10f2e2a585c20ae Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:07:55 +0100 Subject: [PATCH 1205/2012] ethernet: ucc_geth: constify ugeth_primary_info Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 75d1fb0496986..65ef7ae38912a 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -70,7 +70,7 @@ static struct { module_param_named(debug, debug.msg_enable, int, 0); MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., 0xffff=all)"); -static struct ucc_geth_info ugeth_primary_info = { +static const struct ucc_geth_info ugeth_primary_info = { .uf_info = { .bd_mem_part = MEM_PART_SYSTEM, .rtsm = UCC_FAST_SEND_IDLES_BETWEEN_FRAMES, -- GitLab From baff4311c40ddae7bfc144dc628d4b11c19f0366 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:07:56 +0100 Subject: [PATCH 1206/2012] ethernet: ucc_geth: don't statically allocate eight ucc_geth_info struct ucc_geth_info is somewhat large, and on systems with only one or two UCC instances, that just wastes a few KB of memory. So allocate and populate a chunk of memory at probe time instead of initializing them all during driver init. Note that the existing "ug_info == NULL" check was dead code, as the address of some static array element can obviously never be NULL. Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 32 +++++++++-------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 65ef7ae38912a..67b93d60243e8 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -157,8 +157,6 @@ static const struct ucc_geth_info ugeth_primary_info = { .riscRx = QE_RISC_ALLOCATION_RISC1_AND_RISC2, }; -static struct ucc_geth_info ugeth_info[8]; - #ifdef DEBUG static void mem_disp(u8 *addr, int size) { @@ -3715,25 +3713,23 @@ static int ucc_geth_probe(struct platform_device* ofdev) if ((ucc_num < 0) || (ucc_num > 7)) return -ENODEV; - ug_info = &ugeth_info[ucc_num]; - if (ug_info == NULL) { - if (netif_msg_probe(&debug)) - pr_err("[%d] Missing additional data!\n", ucc_num); - return -ENODEV; - } + ug_info = kmalloc(sizeof(*ug_info), GFP_KERNEL); + if (ug_info == NULL) + return -ENOMEM; + memcpy(ug_info, &ugeth_primary_info, sizeof(*ug_info)); ug_info->uf_info.ucc_num = ucc_num; err = ucc_geth_parse_clock(np, "rx", &ug_info->uf_info.rx_clock); if (err) - return err; + goto err_free_info; err = ucc_geth_parse_clock(np, "tx", &ug_info->uf_info.tx_clock); if (err) - return err; + goto err_free_info; err = of_address_to_resource(np, 0, &res); if (err) - return -EINVAL; + goto err_free_info; ug_info->uf_info.regs = res.start; ug_info->uf_info.irq = irq_of_parse_and_map(np, 0); @@ -3746,7 +3742,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) */ err = of_phy_register_fixed_link(np); if (err) - return err; + goto err_free_info; ug_info->phy_node = of_node_get(np); } @@ -3877,6 +3873,8 @@ err_deregister_fixed_link: of_phy_deregister_fixed_link(np); of_node_put(ug_info->tbi_node); of_node_put(ug_info->phy_node); +err_free_info: + kfree(ug_info); return err; } @@ -3893,6 +3891,7 @@ static int ucc_geth_remove(struct platform_device* ofdev) of_phy_deregister_fixed_link(np); of_node_put(ugeth->ug_info->tbi_node); of_node_put(ugeth->ug_info->phy_node); + kfree(ugeth->ug_info); free_netdev(dev); return 0; @@ -3921,17 +3920,10 @@ static struct platform_driver ucc_geth_driver = { static int __init ucc_geth_init(void) { - int i, ret; - if (netif_msg_drv(&debug)) pr_info(DRV_DESC "\n"); - for (i = 0; i < 8; i++) - memcpy(&(ugeth_info[i]), &ugeth_primary_info, - sizeof(ugeth_primary_info)); - - ret = platform_driver_register(&ucc_geth_driver); - return ret; + return platform_driver_register(&ucc_geth_driver); } static void __exit ucc_geth_exit(void) -- GitLab From b29fafd3570b21c484e08e949bc868bfc12f0df1 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:07:57 +0100 Subject: [PATCH 1207/2012] ethernet: ucc_geth: use UCC_GETH_{RX,TX}_BD_RING_ALIGNMENT macros directly These macros both have the value 32, there's no point first initializing align to a lower value. If anything, one could throw in a BUILD_BUG_ON(UCC_GETH_TX_BD_RING_ALIGNMENT < 4), but it's not worth it - lots of code depends on named constants having sensible values. Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 67b93d60243e8..2369a5ede680b 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -2196,9 +2196,8 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth) UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT) length += UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT; if (uf_info->bd_mem_part == MEM_PART_SYSTEM) { - u32 align = 4; - if (UCC_GETH_TX_BD_RING_ALIGNMENT > 4) - align = UCC_GETH_TX_BD_RING_ALIGNMENT; + u32 align = UCC_GETH_TX_BD_RING_ALIGNMENT; + ugeth->tx_bd_ring_offset[j] = (u32) kmalloc((u32) (length + align), GFP_KERNEL); @@ -2274,9 +2273,8 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth) for (j = 0; j < ug_info->numQueuesRx; j++) { length = ug_info->bdRingLenRx[j] * sizeof(struct qe_bd); if (uf_info->bd_mem_part == MEM_PART_SYSTEM) { - u32 align = 4; - if (UCC_GETH_RX_BD_RING_ALIGNMENT > 4) - align = UCC_GETH_RX_BD_RING_ALIGNMENT; + u32 align = UCC_GETH_RX_BD_RING_ALIGNMENT; + ugeth->rx_bd_ring_offset[j] = (u32) kmalloc((u32) (length + align), GFP_KERNEL); if (ugeth->rx_bd_ring_offset[j] != 0) -- GitLab From 64a99fe596f9cb2af2c23c64352817ff8cf662bb Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:07:58 +0100 Subject: [PATCH 1208/2012] ethernet: ucc_geth: remove bd_mem_part and all associated code The bd_mem_part member of ucc_geth_info always has the value MEM_PART_SYSTEM, and AFAICT, there has never been any code setting it to any other value. Moreover, muram is a somewhat precious resource, so there's no point using that when normal memory serves just as well. Apart from removing a lot of dead code, this is also motivated by wanting to clean up the "store result from kmalloc() in a u32" mess. Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 108 ++++++---------------- include/soc/fsl/qe/qe.h | 6 -- include/soc/fsl/qe/ucc_fast.h | 1 - 3 files changed, 29 insertions(+), 86 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 2369a5ede680b..1e9d2f3f47a33 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -72,7 +72,6 @@ MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., 0xffff=all)"); static const struct ucc_geth_info ugeth_primary_info = { .uf_info = { - .bd_mem_part = MEM_PART_SYSTEM, .rtsm = UCC_FAST_SEND_IDLES_BETWEEN_FRAMES, .max_rx_buf_length = 1536, /* adjusted at startup if max-speed 1000 */ @@ -1854,12 +1853,7 @@ static void ucc_geth_free_rx(struct ucc_geth_private *ugeth) kfree(ugeth->rx_skbuff[i]); - if (ugeth->ug_info->uf_info.bd_mem_part == - MEM_PART_SYSTEM) - kfree((void *)ugeth->rx_bd_ring_offset[i]); - else if (ugeth->ug_info->uf_info.bd_mem_part == - MEM_PART_MURAM) - qe_muram_free(ugeth->rx_bd_ring_offset[i]); + kfree((void *)ugeth->rx_bd_ring_offset[i]); ugeth->p_rx_bd_ring[i] = NULL; } } @@ -1897,12 +1891,7 @@ static void ucc_geth_free_tx(struct ucc_geth_private *ugeth) kfree(ugeth->tx_skbuff[i]); if (ugeth->p_tx_bd_ring[i]) { - if (ugeth->ug_info->uf_info.bd_mem_part == - MEM_PART_SYSTEM) - kfree((void *)ugeth->tx_bd_ring_offset[i]); - else if (ugeth->ug_info->uf_info.bd_mem_part == - MEM_PART_MURAM) - qe_muram_free(ugeth->tx_bd_ring_offset[i]); + kfree((void *)ugeth->tx_bd_ring_offset[i]); ugeth->p_tx_bd_ring[i] = NULL; } } @@ -2060,13 +2049,6 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) ug_info = ugeth->ug_info; uf_info = &ug_info->uf_info; - if (!((uf_info->bd_mem_part == MEM_PART_SYSTEM) || - (uf_info->bd_mem_part == MEM_PART_MURAM))) { - if (netif_msg_probe(ugeth)) - pr_err("Bad memory partition value\n"); - return -EINVAL; - } - /* Rx BD lengths */ for (i = 0; i < ug_info->numQueuesRx; i++) { if ((ug_info->bdRingLenRx[i] < UCC_GETH_RX_BD_RING_SIZE_MIN) || @@ -2186,6 +2168,8 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth) /* Allocate Tx bds */ for (j = 0; j < ug_info->numQueuesTx; j++) { + u32 align = UCC_GETH_TX_BD_RING_ALIGNMENT; + /* Allocate in multiple of UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT, according to spec */ @@ -2195,25 +2179,15 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth) if ((ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)) % UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT) length += UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT; - if (uf_info->bd_mem_part == MEM_PART_SYSTEM) { - u32 align = UCC_GETH_TX_BD_RING_ALIGNMENT; - - ugeth->tx_bd_ring_offset[j] = - (u32) kmalloc((u32) (length + align), GFP_KERNEL); - - if (ugeth->tx_bd_ring_offset[j] != 0) - ugeth->p_tx_bd_ring[j] = - (u8 __iomem *)((ugeth->tx_bd_ring_offset[j] + - align) & ~(align - 1)); - } else if (uf_info->bd_mem_part == MEM_PART_MURAM) { - ugeth->tx_bd_ring_offset[j] = - qe_muram_alloc(length, - UCC_GETH_TX_BD_RING_ALIGNMENT); - if (!IS_ERR_VALUE(ugeth->tx_bd_ring_offset[j])) - ugeth->p_tx_bd_ring[j] = - (u8 __iomem *) qe_muram_addr(ugeth-> - tx_bd_ring_offset[j]); - } + + ugeth->tx_bd_ring_offset[j] = + (u32) kmalloc((u32) (length + align), GFP_KERNEL); + + if (ugeth->tx_bd_ring_offset[j] != 0) + ugeth->p_tx_bd_ring[j] = + (u8 __iomem *)((ugeth->tx_bd_ring_offset[j] + + align) & ~(align - 1)); + if (!ugeth->p_tx_bd_ring[j]) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate memory for Tx bd rings\n"); @@ -2271,25 +2245,16 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth) /* Allocate Rx bds */ for (j = 0; j < ug_info->numQueuesRx; j++) { + u32 align = UCC_GETH_RX_BD_RING_ALIGNMENT; + length = ug_info->bdRingLenRx[j] * sizeof(struct qe_bd); - if (uf_info->bd_mem_part == MEM_PART_SYSTEM) { - u32 align = UCC_GETH_RX_BD_RING_ALIGNMENT; - - ugeth->rx_bd_ring_offset[j] = - (u32) kmalloc((u32) (length + align), GFP_KERNEL); - if (ugeth->rx_bd_ring_offset[j] != 0) - ugeth->p_rx_bd_ring[j] = - (u8 __iomem *)((ugeth->rx_bd_ring_offset[j] + - align) & ~(align - 1)); - } else if (uf_info->bd_mem_part == MEM_PART_MURAM) { - ugeth->rx_bd_ring_offset[j] = - qe_muram_alloc(length, - UCC_GETH_RX_BD_RING_ALIGNMENT); - if (!IS_ERR_VALUE(ugeth->rx_bd_ring_offset[j])) - ugeth->p_rx_bd_ring[j] = - (u8 __iomem *) qe_muram_addr(ugeth-> - rx_bd_ring_offset[j]); - } + ugeth->rx_bd_ring_offset[j] = + (u32) kmalloc((u32) (length + align), GFP_KERNEL); + if (ugeth->rx_bd_ring_offset[j] != 0) + ugeth->p_rx_bd_ring[j] = + (u8 __iomem *)((ugeth->rx_bd_ring_offset[j] + + align) & ~(align - 1)); + if (!ugeth->p_rx_bd_ring[j]) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate memory for Rx bd rings\n"); @@ -2554,20 +2519,11 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) endOfRing = ugeth->p_tx_bd_ring[i] + (ug_info->bdRingLenTx[i] - 1) * sizeof(struct qe_bd); - if (ugeth->ug_info->uf_info.bd_mem_part == MEM_PART_SYSTEM) { - out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].bd_ring_base, - (u32) virt_to_phys(ugeth->p_tx_bd_ring[i])); - out_be32(&ugeth->p_send_q_mem_reg->sqqd[i]. - last_bd_completed_address, - (u32) virt_to_phys(endOfRing)); - } else if (ugeth->ug_info->uf_info.bd_mem_part == - MEM_PART_MURAM) { - out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].bd_ring_base, - (u32)qe_muram_dma(ugeth->p_tx_bd_ring[i])); - out_be32(&ugeth->p_send_q_mem_reg->sqqd[i]. - last_bd_completed_address, - (u32)qe_muram_dma(endOfRing)); - } + out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].bd_ring_base, + (u32) virt_to_phys(ugeth->p_tx_bd_ring[i])); + out_be32(&ugeth->p_send_q_mem_reg->sqqd[i]. + last_bd_completed_address, + (u32) virt_to_phys(endOfRing)); } /* schedulerbasepointer */ @@ -2786,14 +2742,8 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) /* Setup the table */ /* Assume BD rings are already established */ for (i = 0; i < ug_info->numQueuesRx; i++) { - if (ugeth->ug_info->uf_info.bd_mem_part == MEM_PART_SYSTEM) { - out_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr, - (u32) virt_to_phys(ugeth->p_rx_bd_ring[i])); - } else if (ugeth->ug_info->uf_info.bd_mem_part == - MEM_PART_MURAM) { - out_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr, - (u32)qe_muram_dma(ugeth->p_rx_bd_ring[i])); - } + out_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr, + (u32) virt_to_phys(ugeth->p_rx_bd_ring[i])); /* rest of fields handled by QE */ } diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h index 66f1afc393d17..4925a1b59dc9c 100644 --- a/include/soc/fsl/qe/qe.h +++ b/include/soc/fsl/qe/qe.h @@ -27,12 +27,6 @@ #define QE_NUM_OF_BRGS 16 #define QE_NUM_OF_PORTS 1024 -/* Memory partitions -*/ -#define MEM_PART_SYSTEM 0 -#define MEM_PART_SECONDARY 1 -#define MEM_PART_MURAM 2 - /* Clocks and BRGs */ enum qe_clock { QE_CLK_NONE = 0, diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h index dc4e79468094d..9696a5b9b5d1b 100644 --- a/include/soc/fsl/qe/ucc_fast.h +++ b/include/soc/fsl/qe/ucc_fast.h @@ -146,7 +146,6 @@ struct ucc_fast_info { resource_size_t regs; int irq; u32 uccm_mask; - int bd_mem_part; int brkpt_support; int grant_support; int tsa; -- GitLab From 33deb13c87e561c3b566c60aede124a5e23981c1 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:07:59 +0100 Subject: [PATCH 1209/2012] ethernet: ucc_geth: replace kmalloc_array()+for loop by kcalloc() Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 1e9d2f3f47a33..621a9e3e4b65a 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -2203,8 +2203,8 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth) for (j = 0; j < ug_info->numQueuesTx; j++) { /* Setup the skbuff rings */ ugeth->tx_skbuff[j] = - kmalloc_array(ugeth->ug_info->bdRingLenTx[j], - sizeof(struct sk_buff *), GFP_KERNEL); + kcalloc(ugeth->ug_info->bdRingLenTx[j], + sizeof(struct sk_buff *), GFP_KERNEL); if (ugeth->tx_skbuff[j] == NULL) { if (netif_msg_ifup(ugeth)) @@ -2212,9 +2212,6 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth) return -ENOMEM; } - for (i = 0; i < ugeth->ug_info->bdRingLenTx[j]; i++) - ugeth->tx_skbuff[j][i] = NULL; - ugeth->skb_curtx[j] = ugeth->skb_dirtytx[j] = 0; bd = ugeth->confBd[j] = ugeth->txBd[j] = ugeth->p_tx_bd_ring[j]; for (i = 0; i < ug_info->bdRingLenTx[j]; i++) { @@ -2266,8 +2263,8 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth) for (j = 0; j < ug_info->numQueuesRx; j++) { /* Setup the skbuff rings */ ugeth->rx_skbuff[j] = - kmalloc_array(ugeth->ug_info->bdRingLenRx[j], - sizeof(struct sk_buff *), GFP_KERNEL); + kcalloc(ugeth->ug_info->bdRingLenRx[j], + sizeof(struct sk_buff *), GFP_KERNEL); if (ugeth->rx_skbuff[j] == NULL) { if (netif_msg_ifup(ugeth)) @@ -2275,9 +2272,6 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth) return -ENOMEM; } - for (i = 0; i < ugeth->ug_info->bdRingLenRx[j]; i++) - ugeth->rx_skbuff[j][i] = NULL; - ugeth->skb_currx[j] = 0; bd = ugeth->rxBd[j] = ugeth->p_rx_bd_ring[j]; for (i = 0; i < ug_info->bdRingLenRx[j]; i++) { -- GitLab From 634b5bd7318725202ffad6cbf034b006970b1112 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:08:00 +0100 Subject: [PATCH 1210/2012] ethernet: ucc_geth: add helper to replace repeated switch statements The translation from the ucc_geth_num_of_threads enum value to the actual count can be written somewhat more compactly with a small lookup table, allowing us to replace the four switch statements. Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 100 +++++----------------- 1 file changed, 22 insertions(+), 78 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 621a9e3e4b65a..960b19fc4fb87 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -70,6 +70,20 @@ static struct { module_param_named(debug, debug.msg_enable, int, 0); MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., 0xffff=all)"); +static int ucc_geth_thread_count(enum ucc_geth_num_of_threads idx) +{ + static const u8 count[] = { + [UCC_GETH_NUM_OF_THREADS_1] = 1, + [UCC_GETH_NUM_OF_THREADS_2] = 2, + [UCC_GETH_NUM_OF_THREADS_4] = 4, + [UCC_GETH_NUM_OF_THREADS_6] = 6, + [UCC_GETH_NUM_OF_THREADS_8] = 8, + }; + if (idx >= ARRAY_SIZE(count)) + return 0; + return count[idx]; +} + static const struct ucc_geth_info ugeth_primary_info = { .uf_info = { .rtsm = UCC_FAST_SEND_IDLES_BETWEEN_FRAMES, @@ -668,32 +682,12 @@ static void dump_regs(struct ucc_geth_private *ugeth) in_be32(&ugeth->ug_regs->scam)); if (ugeth->p_thread_data_tx) { - int numThreadsTxNumerical; - switch (ugeth->ug_info->numThreadsTx) { - case UCC_GETH_NUM_OF_THREADS_1: - numThreadsTxNumerical = 1; - break; - case UCC_GETH_NUM_OF_THREADS_2: - numThreadsTxNumerical = 2; - break; - case UCC_GETH_NUM_OF_THREADS_4: - numThreadsTxNumerical = 4; - break; - case UCC_GETH_NUM_OF_THREADS_6: - numThreadsTxNumerical = 6; - break; - case UCC_GETH_NUM_OF_THREADS_8: - numThreadsTxNumerical = 8; - break; - default: - numThreadsTxNumerical = 0; - break; - } + int count = ucc_geth_thread_count(ugeth->ug_info->numThreadsTx); pr_info("Thread data TXs:\n"); pr_info("Base address: 0x%08x\n", (u32)ugeth->p_thread_data_tx); - for (i = 0; i < numThreadsTxNumerical; i++) { + for (i = 0; i < count; i++) { pr_info("Thread data TX[%d]:\n", i); pr_info("Base address: 0x%08x\n", (u32)&ugeth->p_thread_data_tx[i]); @@ -702,32 +696,12 @@ static void dump_regs(struct ucc_geth_private *ugeth) } } if (ugeth->p_thread_data_rx) { - int numThreadsRxNumerical; - switch (ugeth->ug_info->numThreadsRx) { - case UCC_GETH_NUM_OF_THREADS_1: - numThreadsRxNumerical = 1; - break; - case UCC_GETH_NUM_OF_THREADS_2: - numThreadsRxNumerical = 2; - break; - case UCC_GETH_NUM_OF_THREADS_4: - numThreadsRxNumerical = 4; - break; - case UCC_GETH_NUM_OF_THREADS_6: - numThreadsRxNumerical = 6; - break; - case UCC_GETH_NUM_OF_THREADS_8: - numThreadsRxNumerical = 8; - break; - default: - numThreadsRxNumerical = 0; - break; - } + int count = ucc_geth_thread_count(ugeth->ug_info->numThreadsRx); pr_info("Thread data RX:\n"); pr_info("Base address: 0x%08x\n", (u32)ugeth->p_thread_data_rx); - for (i = 0; i < numThreadsRxNumerical; i++) { + for (i = 0; i < count; i++) { pr_info("Thread data RX[%d]:\n", i); pr_info("Base address: 0x%08x\n", (u32)&ugeth->p_thread_data_rx[i]); @@ -2315,45 +2289,15 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) uf_regs = uccf->uf_regs; ug_regs = ugeth->ug_regs; - switch (ug_info->numThreadsRx) { - case UCC_GETH_NUM_OF_THREADS_1: - numThreadsRxNumerical = 1; - break; - case UCC_GETH_NUM_OF_THREADS_2: - numThreadsRxNumerical = 2; - break; - case UCC_GETH_NUM_OF_THREADS_4: - numThreadsRxNumerical = 4; - break; - case UCC_GETH_NUM_OF_THREADS_6: - numThreadsRxNumerical = 6; - break; - case UCC_GETH_NUM_OF_THREADS_8: - numThreadsRxNumerical = 8; - break; - default: + numThreadsRxNumerical = ucc_geth_thread_count(ug_info->numThreadsRx); + if (!numThreadsRxNumerical) { if (netif_msg_ifup(ugeth)) pr_err("Bad number of Rx threads value\n"); return -EINVAL; } - switch (ug_info->numThreadsTx) { - case UCC_GETH_NUM_OF_THREADS_1: - numThreadsTxNumerical = 1; - break; - case UCC_GETH_NUM_OF_THREADS_2: - numThreadsTxNumerical = 2; - break; - case UCC_GETH_NUM_OF_THREADS_4: - numThreadsTxNumerical = 4; - break; - case UCC_GETH_NUM_OF_THREADS_6: - numThreadsTxNumerical = 6; - break; - case UCC_GETH_NUM_OF_THREADS_8: - numThreadsTxNumerical = 8; - break; - default: + numThreadsTxNumerical = ucc_geth_thread_count(ug_info->numThreadsTx); + if (!numThreadsTxNumerical) { if (netif_msg_ifup(ugeth)) pr_err("Bad number of Tx threads value\n"); return -EINVAL; -- GitLab From 53f49d86ea21084e7a1789a5256c170f27e02714 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:08:01 +0100 Subject: [PATCH 1211/2012] ethernet: ucc_geth: inform the compiler that numQueues is always 1 The numQueuesTx and numQueuesRx members of struct ucc_geth_info are never set to anything but 1, and never have been. It's unclear how well the code supporting multiple queues would work. Until somebody wants to play with enabling that, help the compiler eliminate a lot of dead code and loops that are not really loops by creating static inline helpers. If and when the numQueuesTx/numQueuesRx fields are re-introduced, it suffices to update those helper to return the appropriate field. This cuts the .text segment of ucc_geth.o by 8%. Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 76 +++++++++++++---------- drivers/net/ethernet/freescale/ucc_geth.h | 2 - 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 960b19fc4fb87..9be1d4455a6bf 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -84,6 +84,16 @@ static int ucc_geth_thread_count(enum ucc_geth_num_of_threads idx) return count[idx]; } +static inline int ucc_geth_tx_queues(const struct ucc_geth_info *info) +{ + return 1; +} + +static inline int ucc_geth_rx_queues(const struct ucc_geth_info *info) +{ + return 1; +} + static const struct ucc_geth_info ugeth_primary_info = { .uf_info = { .rtsm = UCC_FAST_SEND_IDLES_BETWEEN_FRAMES, @@ -103,8 +113,6 @@ static const struct ucc_geth_info ugeth_primary_info = { .tcrc = UCC_FAST_16_BIT_CRC, .synl = UCC_FAST_SYNC_LEN_NOT_USED, }, - .numQueuesTx = 1, - .numQueuesRx = 1, .extendedFilteringChainPointer = ((uint32_t) NULL), .typeorlen = 3072 /*1536 */ , .nonBackToBackIfgPart1 = 0x40, @@ -569,7 +577,7 @@ static void dump_bds(struct ucc_geth_private *ugeth) int i; int length; - for (i = 0; i < ugeth->ug_info->numQueuesTx; i++) { + for (i = 0; i < ucc_geth_tx_queues(ugeth->ug_info); i++) { if (ugeth->p_tx_bd_ring[i]) { length = (ugeth->ug_info->bdRingLenTx[i] * @@ -578,7 +586,7 @@ static void dump_bds(struct ucc_geth_private *ugeth) mem_disp(ugeth->p_tx_bd_ring[i], length); } } - for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) { + for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) { if (ugeth->p_rx_bd_ring[i]) { length = (ugeth->ug_info->bdRingLenRx[i] * @@ -876,7 +884,7 @@ static void dump_regs(struct ucc_geth_private *ugeth) if (ugeth->p_send_q_mem_reg) { pr_info("Send Q memory registers:\n"); pr_info("Base address: 0x%08x\n", (u32)ugeth->p_send_q_mem_reg); - for (i = 0; i < ugeth->ug_info->numQueuesTx; i++) { + for (i = 0; i < ucc_geth_tx_queues(ugeth->ug_info); i++) { pr_info("SQQD[%d]:\n", i); pr_info("Base address: 0x%08x\n", (u32)&ugeth->p_send_q_mem_reg->sqqd[i]); @@ -908,7 +916,7 @@ static void dump_regs(struct ucc_geth_private *ugeth) pr_info("RX IRQ coalescing tables:\n"); pr_info("Base address: 0x%08x\n", (u32)ugeth->p_rx_irq_coalescing_tbl); - for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) { + for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) { pr_info("RX IRQ coalescing table entry[%d]:\n", i); pr_info("Base address: 0x%08x\n", (u32)&ugeth->p_rx_irq_coalescing_tbl-> @@ -930,7 +938,7 @@ static void dump_regs(struct ucc_geth_private *ugeth) if (ugeth->p_rx_bd_qs_tbl) { pr_info("RX BD QS tables:\n"); pr_info("Base address: 0x%08x\n", (u32)ugeth->p_rx_bd_qs_tbl); - for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) { + for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) { pr_info("RX BD QS table[%d]:\n", i); pr_info("Base address: 0x%08x\n", (u32)&ugeth->p_rx_bd_qs_tbl[i]); @@ -1806,7 +1814,7 @@ static void ucc_geth_free_rx(struct ucc_geth_private *ugeth) ug_info = ugeth->ug_info; uf_info = &ug_info->uf_info; - for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) { + for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) { if (ugeth->p_rx_bd_ring[i]) { /* Return existing data buffers in ring */ bd = ugeth->p_rx_bd_ring[i]; @@ -1846,7 +1854,7 @@ static void ucc_geth_free_tx(struct ucc_geth_private *ugeth) ug_info = ugeth->ug_info; uf_info = &ug_info->uf_info; - for (i = 0; i < ugeth->ug_info->numQueuesTx; i++) { + for (i = 0; i < ucc_geth_tx_queues(ugeth->ug_info); i++) { bd = ugeth->p_tx_bd_ring[i]; if (!bd) continue; @@ -2024,7 +2032,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) uf_info = &ug_info->uf_info; /* Rx BD lengths */ - for (i = 0; i < ug_info->numQueuesRx; i++) { + for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) { if ((ug_info->bdRingLenRx[i] < UCC_GETH_RX_BD_RING_SIZE_MIN) || (ug_info->bdRingLenRx[i] % UCC_GETH_RX_BD_RING_SIZE_ALIGNMENT)) { @@ -2035,7 +2043,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) } /* Tx BD lengths */ - for (i = 0; i < ug_info->numQueuesTx; i++) { + for (i = 0; i < ucc_geth_tx_queues(ug_info); i++) { if (ug_info->bdRingLenTx[i] < UCC_GETH_TX_BD_RING_SIZE_MIN) { if (netif_msg_probe(ugeth)) pr_err("Tx BD ring length must be no smaller than 2\n"); @@ -2052,14 +2060,14 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) } /* num Tx queues */ - if (ug_info->numQueuesTx > NUM_TX_QUEUES) { + if (ucc_geth_tx_queues(ug_info) > NUM_TX_QUEUES) { if (netif_msg_probe(ugeth)) pr_err("number of tx queues too large\n"); return -EINVAL; } /* num Rx queues */ - if (ug_info->numQueuesRx > NUM_RX_QUEUES) { + if (ucc_geth_rx_queues(ug_info) > NUM_RX_QUEUES) { if (netif_msg_probe(ugeth)) pr_err("number of rx queues too large\n"); return -EINVAL; @@ -2067,7 +2075,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) /* l2qt */ for (i = 0; i < UCC_GETH_VLAN_PRIORITY_MAX; i++) { - if (ug_info->l2qt[i] >= ug_info->numQueuesRx) { + if (ug_info->l2qt[i] >= ucc_geth_rx_queues(ug_info)) { if (netif_msg_probe(ugeth)) pr_err("VLAN priority table entry must not be larger than number of Rx queues\n"); return -EINVAL; @@ -2076,7 +2084,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) /* l3qt */ for (i = 0; i < UCC_GETH_IP_PRIORITY_MAX; i++) { - if (ug_info->l3qt[i] >= ug_info->numQueuesRx) { + if (ug_info->l3qt[i] >= ucc_geth_rx_queues(ug_info)) { if (netif_msg_probe(ugeth)) pr_err("IP priority table entry must not be larger than number of Rx queues\n"); return -EINVAL; @@ -2099,10 +2107,10 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) /* Generate uccm_mask for receive */ uf_info->uccm_mask = ug_info->eventRegMask & UCCE_OTHER;/* Errors */ - for (i = 0; i < ug_info->numQueuesRx; i++) + for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) uf_info->uccm_mask |= (UCC_GETH_UCCE_RXF0 << i); - for (i = 0; i < ug_info->numQueuesTx; i++) + for (i = 0; i < ucc_geth_tx_queues(ug_info); i++) uf_info->uccm_mask |= (UCC_GETH_UCCE_TXB0 << i); /* Initialize the general fast UCC block. */ if (ucc_fast_init(uf_info, &ugeth->uccf)) { @@ -2141,7 +2149,7 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth) uf_info = &ug_info->uf_info; /* Allocate Tx bds */ - for (j = 0; j < ug_info->numQueuesTx; j++) { + for (j = 0; j < ucc_geth_tx_queues(ug_info); j++) { u32 align = UCC_GETH_TX_BD_RING_ALIGNMENT; /* Allocate in multiple of @@ -2174,7 +2182,7 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth) } /* Init Tx bds */ - for (j = 0; j < ug_info->numQueuesTx; j++) { + for (j = 0; j < ucc_geth_tx_queues(ug_info); j++) { /* Setup the skbuff rings */ ugeth->tx_skbuff[j] = kcalloc(ugeth->ug_info->bdRingLenTx[j], @@ -2215,7 +2223,7 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth) uf_info = &ug_info->uf_info; /* Allocate Rx bds */ - for (j = 0; j < ug_info->numQueuesRx; j++) { + for (j = 0; j < ucc_geth_rx_queues(ug_info); j++) { u32 align = UCC_GETH_RX_BD_RING_ALIGNMENT; length = ug_info->bdRingLenRx[j] * sizeof(struct qe_bd); @@ -2234,7 +2242,7 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth) } /* Init Rx bds */ - for (j = 0; j < ug_info->numQueuesRx; j++) { + for (j = 0; j < ucc_geth_rx_queues(ug_info); j++) { /* Setup the skbuff rings */ ugeth->rx_skbuff[j] = kcalloc(ugeth->ug_info->bdRingLenRx[j], @@ -2437,7 +2445,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) /* SQPTR */ /* Size varies with number of Tx queues */ ugeth->send_q_mem_reg_offset = - qe_muram_alloc(ug_info->numQueuesTx * + qe_muram_alloc(ucc_geth_tx_queues(ug_info) * sizeof(struct ucc_geth_send_queue_qd), UCC_GETH_SEND_QUEUE_QUEUE_DESCRIPTOR_ALIGNMENT); if (IS_ERR_VALUE(ugeth->send_q_mem_reg_offset)) { @@ -2453,7 +2461,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) /* Setup the table */ /* Assume BD rings are already established */ - for (i = 0; i < ug_info->numQueuesTx; i++) { + for (i = 0; i < ucc_geth_tx_queues(ug_info); i++) { endOfRing = ugeth->p_tx_bd_ring[i] + (ug_info->bdRingLenTx[i] - 1) * sizeof(struct qe_bd); @@ -2466,7 +2474,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) /* schedulerbasepointer */ - if (ug_info->numQueuesTx > 1) { + if (ucc_geth_tx_queues(ug_info) > 1) { /* scheduler exists only if more than 1 tx queue */ ugeth->scheduler_offset = qe_muram_alloc(sizeof(struct ucc_geth_scheduler), @@ -2529,11 +2537,11 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) /* temoder */ /* Already has speed set */ - if (ug_info->numQueuesTx > 1) + if (ucc_geth_tx_queues(ug_info) > 1) temoder |= TEMODER_SCHEDULER_ENABLE; if (ug_info->ipCheckSumGenerate) temoder |= TEMODER_IP_CHECKSUM_GENERATE; - temoder |= ((ug_info->numQueuesTx - 1) << TEMODER_NUM_OF_QUEUES_SHIFT); + temoder |= ((ucc_geth_tx_queues(ug_info) - 1) << TEMODER_NUM_OF_QUEUES_SHIFT); out_be16(&ugeth->p_tx_glbl_pram->temoder, temoder); /* Function code register value to be used later */ @@ -2597,7 +2605,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) /* Size varies with number of Rx queues */ ugeth->rx_irq_coalescing_tbl_offset = - qe_muram_alloc(ug_info->numQueuesRx * + qe_muram_alloc(ucc_geth_rx_queues(ug_info) * sizeof(struct ucc_geth_rx_interrupt_coalescing_entry) + 4, UCC_GETH_RX_INTERRUPT_COALESCING_ALIGNMENT); if (IS_ERR_VALUE(ugeth->rx_irq_coalescing_tbl_offset)) { @@ -2613,7 +2621,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) ugeth->rx_irq_coalescing_tbl_offset); /* Fill interrupt coalescing table */ - for (i = 0; i < ug_info->numQueuesRx; i++) { + for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) { out_be32(&ugeth->p_rx_irq_coalescing_tbl->coalescingentry[i]. interruptcoalescingmaxvalue, ug_info->interruptcoalescingmaxvalue[i]); @@ -2662,7 +2670,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) /* RBDQPTR */ /* Size varies with number of Rx queues */ ugeth->rx_bd_qs_tbl_offset = - qe_muram_alloc(ug_info->numQueuesRx * + qe_muram_alloc(ucc_geth_rx_queues(ug_info) * (sizeof(struct ucc_geth_rx_bd_queues_entry) + sizeof(struct ucc_geth_rx_prefetched_bds)), UCC_GETH_RX_BD_QUEUES_ALIGNMENT); @@ -2679,7 +2687,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) /* Setup the table */ /* Assume BD rings are already established */ - for (i = 0; i < ug_info->numQueuesRx; i++) { + for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) { out_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr, (u32) virt_to_phys(ugeth->p_rx_bd_ring[i])); /* rest of fields handled by QE */ @@ -2702,7 +2710,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) ug_info-> vlanOperationNonTagged << REMODER_VLAN_OPERATION_NON_TAGGED_SHIFT; remoder |= ug_info->rxQoSMode << REMODER_RX_QOS_MODE_SHIFT; - remoder |= ((ug_info->numQueuesRx - 1) << REMODER_NUM_OF_QUEUES_SHIFT); + remoder |= ((ucc_geth_rx_queues(ug_info) - 1) << REMODER_NUM_OF_QUEUES_SHIFT); if (ug_info->ipCheckSumCheck) remoder |= REMODER_IP_CHECKSUM_CHECK; if (ug_info->ipAddressAlignment) @@ -2861,7 +2869,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) } /* Load Rx bds with buffers */ - for (i = 0; i < ug_info->numQueuesRx; i++) { + for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) { if ((ret_val = rx_bd_buffer_set(ugeth, (u8) i)) != 0) { if (netif_msg_ifup(ugeth)) pr_err("Can not fill Rx bds with buffers\n"); @@ -3132,12 +3140,12 @@ static int ucc_geth_poll(struct napi_struct *napi, int budget) /* Tx event processing */ spin_lock(&ugeth->lock); - for (i = 0; i < ug_info->numQueuesTx; i++) + for (i = 0; i < ucc_geth_tx_queues(ug_info); i++) ucc_geth_tx(ugeth->ndev, i); spin_unlock(&ugeth->lock); howmany = 0; - for (i = 0; i < ug_info->numQueuesRx; i++) + for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) howmany += ucc_geth_rx(ugeth, i, budget - howmany); if (howmany < budget) { diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index 6b86217038a3b..73c1da4641879 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -1076,8 +1076,6 @@ struct ucc_geth_tad_params { /* GETH protocol initialization structure */ struct ucc_geth_info { struct ucc_fast_info uf_info; - u8 numQueuesTx; - u8 numQueuesRx; int ipCheckSumCheck; int ipCheckSumGenerate; int rxExtendedFiltering; -- GitLab From 9b0dfef4755301d9f7fcef63e2f64d23649bebb4 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 19 Jan 2021 16:08:02 +0100 Subject: [PATCH 1212/2012] ethernet: ucc_geth: simplify rx/tx allocations Since kmalloc() is nowadays [1] guaranteed to return naturally aligned (i.e., aligned to the size itself) memory for power-of-2 sizes, we don't need to over-allocate the align amount, compute an aligned address within the allocation, and (for later freeing) also storing the original pointer [2]. Instead, just round up the length we want to allocate to the alignment requirements, then round that up to the next power of 2. In theory, this could allocate up to about twice as much memory as we needed. In practice, (a) kmalloc() would in most cases anyway return a power-of-2-sized allocation and (b) with the default values of the bdRingLen[RT]x fields, the length is already itself a power of 2 greater than the alignment. So we actually end up saving memory compared to the current situtation (e.g. for tx, we currently allocate 128+32 bytes, which kmalloc() likely rounds up to 192 or 256; with this patch, we just allocate 128 bytes.) Also struct ucc_geth_private becomes a little smaller. [1] 59bb47985c1d ("mm, sl[aou]b: guarantee natural alignment for kmalloc(power-of-two)") [2] That storing was anyway done in a u32, which works on 32 bit machines, but is not very elegant and certainly makes a reader of the code pause for a while. Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/ucc_geth.c | 50 ++++++++--------------- drivers/net/ethernet/freescale/ucc_geth.h | 2 - 2 files changed, 17 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 9be1d4455a6bf..ef4e2febeb5bd 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1835,7 +1835,7 @@ static void ucc_geth_free_rx(struct ucc_geth_private *ugeth) kfree(ugeth->rx_skbuff[i]); - kfree((void *)ugeth->rx_bd_ring_offset[i]); + kfree(ugeth->p_rx_bd_ring[i]); ugeth->p_rx_bd_ring[i] = NULL; } } @@ -1872,10 +1872,8 @@ static void ucc_geth_free_tx(struct ucc_geth_private *ugeth) kfree(ugeth->tx_skbuff[i]); - if (ugeth->p_tx_bd_ring[i]) { - kfree((void *)ugeth->tx_bd_ring_offset[i]); - ugeth->p_tx_bd_ring[i] = NULL; - } + kfree(ugeth->p_tx_bd_ring[i]); + ugeth->p_tx_bd_ring[i] = NULL; } } @@ -2150,25 +2148,15 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth) /* Allocate Tx bds */ for (j = 0; j < ucc_geth_tx_queues(ug_info); j++) { - u32 align = UCC_GETH_TX_BD_RING_ALIGNMENT; - - /* Allocate in multiple of - UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT, - according to spec */ - length = ((ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)) - / UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT) - * UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT; - if ((ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)) % - UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT) - length += UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT; - - ugeth->tx_bd_ring_offset[j] = - (u32) kmalloc((u32) (length + align), GFP_KERNEL); - - if (ugeth->tx_bd_ring_offset[j] != 0) - ugeth->p_tx_bd_ring[j] = - (u8 __iomem *)((ugeth->tx_bd_ring_offset[j] + - align) & ~(align - 1)); + u32 align = max(UCC_GETH_TX_BD_RING_ALIGNMENT, + UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT); + u32 alloc; + + length = ug_info->bdRingLenTx[j] * sizeof(struct qe_bd); + alloc = round_up(length, align); + alloc = roundup_pow_of_two(alloc); + + ugeth->p_tx_bd_ring[j] = kmalloc(alloc, GFP_KERNEL); if (!ugeth->p_tx_bd_ring[j]) { if (netif_msg_ifup(ugeth)) @@ -2176,9 +2164,7 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth) return -ENOMEM; } /* Zero unused end of bd ring, according to spec */ - memset_io((void __iomem *)(ugeth->p_tx_bd_ring[j] + - ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)), 0, - length - ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)); + memset(ugeth->p_tx_bd_ring[j] + length, 0, alloc - length); } /* Init Tx bds */ @@ -2225,15 +2211,13 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth) /* Allocate Rx bds */ for (j = 0; j < ucc_geth_rx_queues(ug_info); j++) { u32 align = UCC_GETH_RX_BD_RING_ALIGNMENT; + u32 alloc; length = ug_info->bdRingLenRx[j] * sizeof(struct qe_bd); - ugeth->rx_bd_ring_offset[j] = - (u32) kmalloc((u32) (length + align), GFP_KERNEL); - if (ugeth->rx_bd_ring_offset[j] != 0) - ugeth->p_rx_bd_ring[j] = - (u8 __iomem *)((ugeth->rx_bd_ring_offset[j] + - align) & ~(align - 1)); + alloc = round_up(length, align); + alloc = roundup_pow_of_two(alloc); + ugeth->p_rx_bd_ring[j] = kmalloc(alloc, GFP_KERNEL); if (!ugeth->p_rx_bd_ring[j]) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate memory for Rx bd rings\n"); diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index 73c1da4641879..4294ed096ebbc 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -1181,9 +1181,7 @@ struct ucc_geth_private { struct ucc_geth_rx_bd_queues_entry __iomem *p_rx_bd_qs_tbl; u32 rx_bd_qs_tbl_offset; u8 __iomem *p_tx_bd_ring[NUM_TX_QUEUES]; - u32 tx_bd_ring_offset[NUM_TX_QUEUES]; u8 __iomem *p_rx_bd_ring[NUM_RX_QUEUES]; - u32 rx_bd_ring_offset[NUM_RX_QUEUES]; u8 __iomem *confBd[NUM_TX_QUEUES]; u8 __iomem *txBd[NUM_TX_QUEUES]; u8 __iomem *rxBd[NUM_RX_QUEUES]; -- GitLab From fc705fecf3a0c9128933cc6db59159c050aaca33 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 21 Jan 2021 14:54:46 +0200 Subject: [PATCH 1213/2012] perf evlist: Fix id index for heterogeneous systems perf_evlist__set_sid_idx() updates perf_sample_id with the evlist map index, CPU number and TID. It is passed indexes to the evsel's cpu and thread maps, but references the evlist's maps instead. That results in using incorrect CPU numbers on heterogeneous systems. Fix it by using evsel maps. The id index (PERF_RECORD_ID_INDEX) is used by AUX area tracing when in sampling mode. Having an incorrect CPU number causes the trace data to be attributed to the wrong CPU, and can result in decoder errors because the trace data is then associated with the wrong process. Committer notes: Keep the class prefix convention in the function name, switching from perf_evlist__set_sid_idx() to perf_evsel__set_sid_idx(). Fixes: 3c659eedada2fbf9 ("perf tools: Add id index") Signed-off-by: Adrian Hunter Cc: Jin Yao Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20210121125446.11287-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index cfcdbd7be066e..17465d454a0e3 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -367,21 +367,13 @@ static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, boo return map; } -static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, - struct perf_evsel *evsel, int idx, int cpu, - int thread) +static void perf_evsel__set_sid_idx(struct perf_evsel *evsel, int idx, int cpu, int thread) { struct perf_sample_id *sid = SID(evsel, cpu, thread); sid->idx = idx; - if (evlist->cpus && cpu >= 0) - sid->cpu = evlist->cpus->map[cpu]; - else - sid->cpu = -1; - if (!evsel->system_wide && evlist->threads && thread >= 0) - sid->tid = perf_thread_map__pid(evlist->threads, thread); - else - sid->tid = -1; + sid->cpu = perf_cpu_map__cpu(evsel->cpus, cpu); + sid->tid = perf_thread_map__pid(evsel->threads, thread); } static struct perf_mmap* @@ -500,8 +492,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, fd) < 0) return -1; - perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, - thread); + perf_evsel__set_sid_idx(evsel, idx, cpu, thread); } } -- GitLab From 9c880c24cb0db49d6e62e6d882df1470b0be8038 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 21 Jan 2021 00:18:38 +0800 Subject: [PATCH 1214/2012] perf metricgroup: Fix for metrics containing duration_time Metrics containing duration_time cause a segfault: $ perf stat -v -M L1D_Cache_Fill_BW sleep 1 Using CPUID GenuineIntel-6-3D-4 metric expr 64 * l1d.replacement / 1000000000 / duration_time for L1D_Cache_Fill_BW found event duration_time found event l1d.replacement adding {l1d.replacement}:W,duration_time l1d.replacement -> cpu/umask=0x1,(null)=0x1e8483,event=0x51/ Segmentation fault $ In commit c2337d67199a1ea1 ("perf metricgroup: Fix metrics using aliases covering multiple PMUs"), the logic in find_evsel_group() when iter'ing events was changed to not only select events in same group, but also for aliased PMUs. Checking whether events were for aliased PMUs was done by comparing the event PMU name. This was not safe for duration_time event, which has no associated PMU (and no PMU name), so fix by checking if the event PMU name is set also. Committer testing: Reproduced the bug, then, on a: $ grep -m1 ^'model name' /proc/cpuinfo model name : Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz $ We now get: $ perf stat -M L1D_Cache_Fill_BW sleep 1 Performance counter stats for 'sleep 1': 4,141 l1d.replacement:u 1,001,285,107 ns duration_time:u 1.001285107 seconds time elapsed 0.000000000 seconds user 0.001119000 seconds sys $ Detais from -v: Using CPUID GenuineIntel-6-8E-A metric expr 64 * l1d.replacement / 1000000000 / duration_time for L1D_Cache_Fill_BW found event duration_time found event l1d.replacement adding {l1d.replacement}:W,duration_time l1d.replacement -> cpu/(null)=0x1e8483,umask=0x1,event=0x51/ Control descriptor is not initialized Warning: kernel.perf_event_paranoid=2, trying to fall back to excluding kernel and hypervisor samples Warning: kernel.perf_event_paranoid=2, trying to fall back to excluding kernel and hypervisor samples l1d.replacement:u: 4592 612201 612201 duration_time:u: 1001478621 1001478621 1001478621 Fixes: c2337d67199a1ea1 ("perf metricgroup: Fix metrics using aliases covering multiple PMUs") Reported-by: Joakim Zhang Signed-off-by: John Garry Tested-by: Arnaldo Carvalho de Melo Tested-by: Jiri Olsa Acked-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linuxarm@openeuler.org Link: https://lore.kernel.org/r/1611159518-226883-1-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index ee94d3e8dd654..8cb164ad06c1a 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -162,6 +162,14 @@ static bool contains_event(struct evsel **metric_events, int num_events, return false; } +static bool evsel_same_pmu(struct evsel *ev1, struct evsel *ev2) +{ + if (!ev1->pmu_name || !ev2->pmu_name) + return false; + + return !strcmp(ev1->pmu_name, ev2->pmu_name); +} + /** * Find a group of events in perf_evlist that correspond to those from a parsed * metric expression. Note, as find_evsel_group is called in the same order as @@ -280,8 +288,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, */ if (!has_constraint && ev->leader != metric_events[i]->leader && - !strcmp(ev->leader->pmu_name, - metric_events[i]->leader->pmu_name)) + evsel_same_pmu(ev->leader, metric_events[i]->leader)) break; if (!strcmp(metric_events[i]->name, ev->name)) { set_bit(ev->idx, evlist_used); -- GitLab From 3d6e79ee9e8f8c6604312382c2be1d1bd1cffc9e Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 19 Jan 2021 18:04:15 +0800 Subject: [PATCH 1215/2012] perf metricgroup: Fix system PMU metrics Joakim reports that getting "perf stat" for multiple system PMU metrics segfaults: $ perf stat -a -I 1000 -M imx8mm_ddr_write.all,imx8mm_ddr_write.all Segmentation fault $ While the same works without issue for a single metric. The logic in metricgroup__add_metric_sys_event_iter() is broken, in that add_metric() @m argument should be NULL for each new metric. Fix by not passing a holder for that, and rather make local in metricgroup__add_metric_sys_event_iter(). Fixes: be335ec28efa ("perf metricgroup: Support adding metrics for system PMUs") Reported-by: Joakim Zhang Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linuxarm@openeuler.org Link: https://lore.kernel.org/r/1611050655-44020-1-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 8cb164ad06c1a..e6d3452031e52 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -773,7 +773,6 @@ int __weak arch_get_runtimeparam(struct pmu_event *pe __maybe_unused) struct metricgroup_add_iter_data { struct list_head *metric_list; const char *metric; - struct metric **m; struct expr_ids *ids; int *ret; bool *has_match; @@ -1065,12 +1064,13 @@ static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe, void *data) { struct metricgroup_add_iter_data *d = data; + struct metric *m = NULL; int ret; if (!match_pe_metric(pe, d->metric)) return 0; - ret = add_metric(d->metric_list, pe, d->metric_no_group, d->m, NULL, d->ids); + ret = add_metric(d->metric_list, pe, d->metric_no_group, &m, NULL, d->ids); if (ret) return ret; @@ -1121,7 +1121,6 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group, .metric_list = &list, .metric = metric, .metric_no_group = metric_no_group, - .m = &m, .ids = &ids, .has_match = &has_match, .ret = &ret, -- GitLab From 8adc0a06d68a2e433b960377e515e7a6b19b429f Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 9 Dec 2020 08:58:28 +0800 Subject: [PATCH 1216/2012] perf script: Fix overrun issue for dynamically-allocated PMU type number When unpacking the event which is from dynamic PMU, the array output[OUTPUT_TYPE_MAX] may be overrun. For example, type number of SKL uncore_imc is 10, but OUTPUT_TYPE_MAX is 7 now (OUTPUT_TYPE_MAX = PERF_TYPE_MAX + 1). /* In builtin-script.c */ process_event() { unsigned int type = output_type(attr->type); if (output[type].fields == 0) return; } output[10] is overrun. Create a type OUTPUT_TYPE_OTHER for dynamic PMU events, then output_type(attr->type) will return OUTPUT_TYPE_OTHER here. Note that if PERF_TYPE_MAX ever changed, then there would be a conflict between old perf.data files that had a dynamicaliy allocated PMU number that would then be the same as a fixed PERF_TYPE. Example: # perf record --switch-events -C 0 -e "{cpu-clock,uncore_imc/data_reads/,uncore_imc/data_writes/}:SD" -a -- sleep 1 # perf script Before: swapper 0 [000] 1479253.987551: 277766 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.987797: 246709 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.988127: 329883 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.988273: 146393 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.988523: 249977 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.988877: 354090 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.989023: 145940 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.989383: 359856 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1479253.989523: 140082 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) After: swapper 0 [000] 1397040.402011: 272384 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1397040.402011: 5396 uncore_imc/data_reads/: swapper 0 [000] 1397040.402011: 967 uncore_imc/data_writes/: swapper 0 [000] 1397040.402259: 249153 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1397040.402259: 7231 uncore_imc/data_reads/: swapper 0 [000] 1397040.402259: 1297 uncore_imc/data_writes/: swapper 0 [000] 1397040.402508: 249108 cpu-clock: ffffffff9d4ddb6f cpuidle_enter_state+0xdf ([kernel.kallsyms]) swapper 0 [000] 1397040.402508: 5333 uncore_imc/data_reads/: swapper 0 [000] 1397040.402508: 1008 uncore_imc/data_writes/: Signed-off-by: Jin Yao Acked-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Peter Zijlstra Cc: Kan Liang Cc: Andi Kleen Cc: Alexander Shishkin Cc: Ingo Molnar Link: https://lore.kernel.org/r/20201209005828.21302-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index edacfa98d073b..42dad4a0f8cf4 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -186,6 +186,7 @@ struct output_option { enum { OUTPUT_TYPE_SYNTH = PERF_TYPE_MAX, + OUTPUT_TYPE_OTHER, OUTPUT_TYPE_MAX }; @@ -283,6 +284,18 @@ static struct { .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, + + [OUTPUT_TYPE_OTHER] = { + .user_set = false, + + .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | + PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | + PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, + + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, + }, }; struct evsel_script { @@ -343,8 +356,11 @@ static inline int output_type(unsigned int type) case PERF_TYPE_SYNTH: return OUTPUT_TYPE_SYNTH; default: - return type; + if (type < PERF_TYPE_MAX) + return type; } + + return OUTPUT_TYPE_OTHER; } static inline unsigned int attr_type(unsigned int type) -- GitLab From 4026d80142b644c107586f279fa319ff5c6e0d18 Mon Sep 17 00:00:00 2001 From: George McCollister Date: Wed, 20 Jan 2021 07:53:23 -0600 Subject: [PATCH 1217/2012] MAINTAINERS: add entry for Arrow SpeedChips XRS7000 driver Add myself as maintainer of the Arrow SpeedChips XRS7000 series Ethernet switch driver. Suggested-by: Jakub Kicinski Signed-off-by: George McCollister Link: https://lore.kernel.org/r/20210120135323.73856-1-george.mccollister@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1df56a32d2df9..650deb973913a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2787,6 +2787,14 @@ F: arch/arm64/ F: tools/testing/selftests/arm64/ X: arch/arm64/boot/dts/ +ARROW SPEEDCHIPS XRS7000 SERIES ETHERNET SWITCH DRIVER +M: George McCollister +L: netdev@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml +F: drivers/net/dsa/xrs700x/* +F: net/dsa/tag_xrs700x.c + AS3645A LED FLASH CONTROLLER DRIVER M: Sakari Ailus L: linux-leds@vger.kernel.org -- GitLab From 655cf86548a3938538642a6df27dd359e13c86bd Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 14 Jan 2021 16:32:42 -0600 Subject: [PATCH 1218/2012] objtool: Don't fail the kernel build on fatal errors This is basically a revert of commit 644592d32837 ("objtool: Fail the kernel build on fatal errors"). That change turned out to be more trouble than it's worth. Failing the build is an extreme measure which sometimes gets too much attention and blocks CI build testing. These fatal-type warnings aren't yet as rare as we'd hope, due to the ever-increasing matrix of supported toolchains/plugins and their fast-changing nature as of late. Also, there are more people (and bots) looking for objtool warnings than ever before, so even non-fatal warnings aren't likely to be ignored for long. Suggested-by: Nick Desaulniers Reviewed-by: Miroslav Benes Reviewed-by: Nick Desaulniers Reviewed-by: Kamalesh Babulal Signed-off-by: Josh Poimboeuf --- tools/objtool/check.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5f8d3eed78a18..4bd30315eb62b 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2928,14 +2928,10 @@ int check(struct objtool_file *file) warnings += ret; out: - if (ret < 0) { - /* - * Fatal error. The binary is corrupt or otherwise broken in - * some way, or objtool itself is broken. Fail the kernel - * build. - */ - return ret; - } - + /* + * For now, don't fail the kernel build on fatal warnings. These + * errors are still fairly common due to the growing matrix of + * supported toolchains and their recent pace of change. + */ return 0; } -- GitLab From 1d489151e9f9d1647110277ff77282fe4d96d09b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 14 Jan 2021 16:14:01 -0600 Subject: [PATCH 1219/2012] objtool: Don't fail on missing symbol table Thanks to a recent binutils change which doesn't generate unused symbols, it's now possible for thunk_64.o be completely empty without CONFIG_PREEMPTION: no text, no data, no symbols. We could edit the Makefile to only build that file when CONFIG_PREEMPTION is enabled, but that will likely create confusion if/when the thunks end up getting used by some other code again. Just ignore it and move on. Reported-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Reviewed-by: Miroslav Benes Tested-by: Nathan Chancellor Link: https://github.com/ClangBuiltLinux/linux/issues/1254 Signed-off-by: Josh Poimboeuf --- tools/objtool/elf.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index f9682db33ccab..d8421e1d06bed 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -380,8 +380,11 @@ static int read_symbols(struct elf *elf) symtab = find_section_by_name(elf, ".symtab"); if (!symtab) { - WARN("missing symbol table"); - return -1; + /* + * A missing symbol table is actually possible if it's an empty + * .o file. This can happen for thunk_64.o. + */ + return 0; } symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); -- GitLab From 6e1239c13953f3c2a76e70031f74ddca9ae57cd3 Mon Sep 17 00:00:00 2001 From: Gayatri Kammela Date: Thu, 21 Jan 2021 13:50:04 -0800 Subject: [PATCH 1220/2012] x86/cpu: Add another Alder Lake CPU to the Intel family Add Alder Lake mobile CPU model number to Intel family. Signed-off-by: Gayatri Kammela Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210121215004.11618-1-tony.luck@intel.com --- arch/x86/include/asm/intel-family.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 5e658ba2654a7..9abe842dbd843 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -97,6 +97,7 @@ #define INTEL_FAM6_LAKEFIELD 0x8A #define INTEL_FAM6_ALDERLAKE 0x97 +#define INTEL_FAM6_ALDERLAKE_L 0x9A /* "Small Core" Processors (Atom) */ -- GitLab From fdb6b338d2e5f12dda2d80880ae4800547388bb4 Mon Sep 17 00:00:00 2001 From: Jiapeng Zhong Date: Wed, 20 Jan 2021 15:01:51 +0800 Subject: [PATCH 1221/2012] cxgb4: Assign boolean values to a bool variable Fix the following coccicheck warnings: ./drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c:5142:2-33: WARNING: Assignment of 0/1 to bool variable. Reported-by: Abaci Robot Signed-off-by: Jiapeng Zhong Link: https://lore.kernel.org/r/1611126111-22079-1-git-send-email-abaci-bugfix@linux.alibaba.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 15542661e3d2b..9f1965c80fb1b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5137,7 +5137,7 @@ static int adap_init0(struct adapter *adap, int vpd_skip) /* See if FW supports FW_FILTER2 work request */ if (is_t4(adap->params.chip)) { - adap->params.filter2_wr_support = 0; + adap->params.filter2_wr_support = false; } else { params[0] = FW_PARAM_DEV(FILTER2_WR); ret = t4_query_params(adap, adap->mbox, adap->pf, 0, -- GitLab From 05fcc25662a3bbfc5daa9247132b2d8535053883 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 20 Jan 2021 08:27:14 +0100 Subject: [PATCH 1222/2012] cxgb4: remove bogus CHELSIO_VPD_UNIQUE_ID constant The comment is quite weird, there is no such thing as a vendor-specific VPD id. 0x82 is the value of PCI_VPD_LRDT_ID_STRING. So what we are doing here is simply checking whether the byte at VPD address VPD_BASE is a valid string LRDT, same as what is done a few lines later in the code. LRDT = Large Resource Data Tag, see PCI 2.2 spec, VPD chapter v2: - don't set VPD_BASE / VPD_BASE_OLD separately Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/644ef22f-e86a-5cc1-0f27-f873ab165696@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 98d01a7497ecd..98829e482bfa9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -2689,7 +2689,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) #define VPD_BASE 0x400 #define VPD_BASE_OLD 0 #define VPD_LEN 1024 -#define CHELSIO_VPD_UNIQUE_ID 0x82 /** * t4_eeprom_ptov - translate a physical EEPROM address to virtual @@ -2745,7 +2744,7 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p) { int i, ret = 0, addr; int ec, sn, pn, na; - u8 *vpd, csum; + u8 *vpd, csum, base_val = 0; unsigned int vpdr_len, kw_offset, id_len; vpd = vmalloc(VPD_LEN); @@ -2755,17 +2754,11 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p) /* Card information normally starts at VPD_BASE but early cards had * it at 0. */ - ret = pci_read_vpd(adapter->pdev, VPD_BASE, sizeof(u32), vpd); + ret = pci_read_vpd(adapter->pdev, VPD_BASE, 1, &base_val); if (ret < 0) goto out; - /* The VPD shall have a unique identifier specified by the PCI SIG. - * For chelsio adapters, the identifier is 0x82. The first byte of a VPD - * shall be CHELSIO_VPD_UNIQUE_ID (0x82). The VPD programming software - * is expected to automatically put this entry at the - * beginning of the VPD. - */ - addr = *vpd == CHELSIO_VPD_UNIQUE_ID ? VPD_BASE : VPD_BASE_OLD; + addr = base_val == PCI_VPD_LRDT_ID_STRING ? VPD_BASE : VPD_BASE_OLD; ret = pci_read_vpd(adapter->pdev, addr, VPD_LEN, vpd); if (ret < 0) -- GitLab From 19038523a7353e7413c5428f20376fa3ccd2c8e9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 20 Jan 2021 16:06:31 +0100 Subject: [PATCH 1223/2012] net: remove aurora nb8800 driver The tango4 platform is getting removed, and this driver has no other known users, so it can be removed. Cc: Marc Gonzalez Signed-off-by: Arnd Bergmann Acked-by: Mans Rullgard Link: https://lore.kernel.org/r/20210120150703.1629527-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/Kconfig | 1 - drivers/net/ethernet/Makefile | 1 - drivers/net/ethernet/aurora/Kconfig | 23 - drivers/net/ethernet/aurora/Makefile | 2 - drivers/net/ethernet/aurora/nb8800.c | 1520 -------------------------- drivers/net/ethernet/aurora/nb8800.h | 316 ------ 6 files changed, 1863 deletions(-) delete mode 100644 drivers/net/ethernet/aurora/Kconfig delete mode 100644 drivers/net/ethernet/aurora/Makefile delete mode 100644 drivers/net/ethernet/aurora/nb8800.c delete mode 100644 drivers/net/ethernet/aurora/nb8800.h diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index de50e8b9e6562..ad04660b97b80 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -33,7 +33,6 @@ source "drivers/net/ethernet/apple/Kconfig" source "drivers/net/ethernet/aquantia/Kconfig" source "drivers/net/ethernet/arc/Kconfig" source "drivers/net/ethernet/atheros/Kconfig" -source "drivers/net/ethernet/aurora/Kconfig" source "drivers/net/ethernet/broadcom/Kconfig" source "drivers/net/ethernet/brocade/Kconfig" source "drivers/net/ethernet/cadence/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index f8f38dcb5f8a0..1e7dc8a7762dc 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_NET_VENDOR_APPLE) += apple/ obj-$(CONFIG_NET_VENDOR_AQUANTIA) += aquantia/ obj-$(CONFIG_NET_VENDOR_ARC) += arc/ obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/ -obj-$(CONFIG_NET_VENDOR_AURORA) += aurora/ obj-$(CONFIG_NET_VENDOR_CADENCE) += cadence/ obj-$(CONFIG_NET_VENDOR_BROADCOM) += broadcom/ obj-$(CONFIG_NET_VENDOR_BROCADE) += brocade/ diff --git a/drivers/net/ethernet/aurora/Kconfig b/drivers/net/ethernet/aurora/Kconfig deleted file mode 100644 index 9ee30ea90bfae..0000000000000 --- a/drivers/net/ethernet/aurora/Kconfig +++ /dev/null @@ -1,23 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config NET_VENDOR_AURORA - bool "Aurora VLSI devices" - default y - help - If you have a network (Ethernet) device belonging to this class, - say Y. - - Note that the answer to this question doesn't directly affect the - kernel: saying N will just cause the configurator to skip all - questions about Aurora devices. If you say Y, you will be asked - for your specific device in the following questions. - -if NET_VENDOR_AURORA - -config AURORA_NB8800 - tristate "Aurora AU-NB8800 support" - depends on HAS_DMA - select PHYLIB - help - Support for the AU-NB8800 gigabit Ethernet controller. - -endif diff --git a/drivers/net/ethernet/aurora/Makefile b/drivers/net/ethernet/aurora/Makefile deleted file mode 100644 index f3d599867619d..0000000000000 --- a/drivers/net/ethernet/aurora/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AURORA_NB8800) += nb8800.o diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c deleted file mode 100644 index 5b20185cbd627..0000000000000 --- a/drivers/net/ethernet/aurora/nb8800.c +++ /dev/null @@ -1,1520 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2015 Mans Rullgard - * - * Mostly rewritten, based on driver from Sigma Designs. Original - * copyright notice below. - * - * Driver for tangox SMP864x/SMP865x/SMP867x/SMP868x builtin Ethernet Mac. - * - * Copyright (C) 2005 Maxime Bizon - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "nb8800.h" - -static void nb8800_tx_done(struct net_device *dev); -static int nb8800_dma_stop(struct net_device *dev); - -static inline u8 nb8800_readb(struct nb8800_priv *priv, int reg) -{ - return readb_relaxed(priv->base + reg); -} - -static inline u32 nb8800_readl(struct nb8800_priv *priv, int reg) -{ - return readl_relaxed(priv->base + reg); -} - -static inline void nb8800_writeb(struct nb8800_priv *priv, int reg, u8 val) -{ - writeb_relaxed(val, priv->base + reg); -} - -static inline void nb8800_writew(struct nb8800_priv *priv, int reg, u16 val) -{ - writew_relaxed(val, priv->base + reg); -} - -static inline void nb8800_writel(struct nb8800_priv *priv, int reg, u32 val) -{ - writel_relaxed(val, priv->base + reg); -} - -static inline void nb8800_maskb(struct nb8800_priv *priv, int reg, - u32 mask, u32 val) -{ - u32 old = nb8800_readb(priv, reg); - u32 new = (old & ~mask) | (val & mask); - - if (new != old) - nb8800_writeb(priv, reg, new); -} - -static inline void nb8800_maskl(struct nb8800_priv *priv, int reg, - u32 mask, u32 val) -{ - u32 old = nb8800_readl(priv, reg); - u32 new = (old & ~mask) | (val & mask); - - if (new != old) - nb8800_writel(priv, reg, new); -} - -static inline void nb8800_modb(struct nb8800_priv *priv, int reg, u8 bits, - bool set) -{ - nb8800_maskb(priv, reg, bits, set ? bits : 0); -} - -static inline void nb8800_setb(struct nb8800_priv *priv, int reg, u8 bits) -{ - nb8800_maskb(priv, reg, bits, bits); -} - -static inline void nb8800_clearb(struct nb8800_priv *priv, int reg, u8 bits) -{ - nb8800_maskb(priv, reg, bits, 0); -} - -static inline void nb8800_modl(struct nb8800_priv *priv, int reg, u32 bits, - bool set) -{ - nb8800_maskl(priv, reg, bits, set ? bits : 0); -} - -static inline void nb8800_setl(struct nb8800_priv *priv, int reg, u32 bits) -{ - nb8800_maskl(priv, reg, bits, bits); -} - -static inline void nb8800_clearl(struct nb8800_priv *priv, int reg, u32 bits) -{ - nb8800_maskl(priv, reg, bits, 0); -} - -static int nb8800_mdio_wait(struct mii_bus *bus) -{ - struct nb8800_priv *priv = bus->priv; - u32 val; - - return readl_poll_timeout_atomic(priv->base + NB8800_MDIO_CMD, - val, !(val & MDIO_CMD_GO), 1, 1000); -} - -static int nb8800_mdio_cmd(struct mii_bus *bus, u32 cmd) -{ - struct nb8800_priv *priv = bus->priv; - int err; - - err = nb8800_mdio_wait(bus); - if (err) - return err; - - nb8800_writel(priv, NB8800_MDIO_CMD, cmd); - udelay(10); - nb8800_writel(priv, NB8800_MDIO_CMD, cmd | MDIO_CMD_GO); - - return nb8800_mdio_wait(bus); -} - -static int nb8800_mdio_read(struct mii_bus *bus, int phy_id, int reg) -{ - struct nb8800_priv *priv = bus->priv; - u32 val; - int err; - - err = nb8800_mdio_cmd(bus, MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg)); - if (err) - return err; - - val = nb8800_readl(priv, NB8800_MDIO_STS); - if (val & MDIO_STS_ERR) - return 0xffff; - - return val & 0xffff; -} - -static int nb8800_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val) -{ - u32 cmd = MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg) | - MDIO_CMD_DATA(val) | MDIO_CMD_WR; - - return nb8800_mdio_cmd(bus, cmd); -} - -static void nb8800_mac_tx(struct net_device *dev, bool enable) -{ - struct nb8800_priv *priv = netdev_priv(dev); - - while (nb8800_readl(priv, NB8800_TXC_CR) & TCR_EN) - cpu_relax(); - - nb8800_modb(priv, NB8800_TX_CTL1, TX_EN, enable); -} - -static void nb8800_mac_rx(struct net_device *dev, bool enable) -{ - nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_EN, enable); -} - -static void nb8800_mac_af(struct net_device *dev, bool enable) -{ - nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_AF_EN, enable); -} - -static void nb8800_start_rx(struct net_device *dev) -{ - nb8800_setl(netdev_priv(dev), NB8800_RXC_CR, RCR_EN); -} - -static int nb8800_alloc_rx(struct net_device *dev, unsigned int i, bool napi) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct nb8800_rx_desc *rxd = &priv->rx_descs[i]; - struct nb8800_rx_buf *rxb = &priv->rx_bufs[i]; - int size = L1_CACHE_ALIGN(RX_BUF_SIZE); - dma_addr_t dma_addr; - struct page *page; - unsigned long offset; - void *data; - - data = napi ? napi_alloc_frag(size) : netdev_alloc_frag(size); - if (!data) - return -ENOMEM; - - page = virt_to_head_page(data); - offset = data - page_address(page); - - dma_addr = dma_map_page(&dev->dev, page, offset, RX_BUF_SIZE, - DMA_FROM_DEVICE); - - if (dma_mapping_error(&dev->dev, dma_addr)) { - skb_free_frag(data); - return -ENOMEM; - } - - rxb->page = page; - rxb->offset = offset; - rxd->desc.s_addr = dma_addr; - - return 0; -} - -static void nb8800_receive(struct net_device *dev, unsigned int i, - unsigned int len) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct nb8800_rx_desc *rxd = &priv->rx_descs[i]; - struct page *page = priv->rx_bufs[i].page; - int offset = priv->rx_bufs[i].offset; - void *data = page_address(page) + offset; - dma_addr_t dma = rxd->desc.s_addr; - struct sk_buff *skb; - unsigned int size; - int err; - - size = len <= RX_COPYBREAK ? len : RX_COPYHDR; - - skb = napi_alloc_skb(&priv->napi, size); - if (!skb) { - netdev_err(dev, "rx skb allocation failed\n"); - dev->stats.rx_dropped++; - return; - } - - if (len <= RX_COPYBREAK) { - dma_sync_single_for_cpu(&dev->dev, dma, len, DMA_FROM_DEVICE); - skb_put_data(skb, data, len); - dma_sync_single_for_device(&dev->dev, dma, len, - DMA_FROM_DEVICE); - } else { - err = nb8800_alloc_rx(dev, i, true); - if (err) { - netdev_err(dev, "rx buffer allocation failed\n"); - dev->stats.rx_dropped++; - dev_kfree_skb(skb); - return; - } - - dma_unmap_page(&dev->dev, dma, RX_BUF_SIZE, DMA_FROM_DEVICE); - skb_put_data(skb, data, RX_COPYHDR); - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - offset + RX_COPYHDR, len - RX_COPYHDR, - RX_BUF_SIZE); - } - - skb->protocol = eth_type_trans(skb, dev); - napi_gro_receive(&priv->napi, skb); -} - -static void nb8800_rx_error(struct net_device *dev, u32 report) -{ - if (report & RX_LENGTH_ERR) - dev->stats.rx_length_errors++; - - if (report & RX_FCS_ERR) - dev->stats.rx_crc_errors++; - - if (report & RX_FIFO_OVERRUN) - dev->stats.rx_fifo_errors++; - - if (report & RX_ALIGNMENT_ERROR) - dev->stats.rx_frame_errors++; - - dev->stats.rx_errors++; -} - -static int nb8800_poll(struct napi_struct *napi, int budget) -{ - struct net_device *dev = napi->dev; - struct nb8800_priv *priv = netdev_priv(dev); - struct nb8800_rx_desc *rxd; - unsigned int last = priv->rx_eoc; - unsigned int next; - int work = 0; - - nb8800_tx_done(dev); - -again: - do { - unsigned int len; - - next = (last + 1) % RX_DESC_COUNT; - - rxd = &priv->rx_descs[next]; - - if (!rxd->report) - break; - - len = RX_BYTES_TRANSFERRED(rxd->report); - - if (IS_RX_ERROR(rxd->report)) - nb8800_rx_error(dev, rxd->report); - else - nb8800_receive(dev, next, len); - - dev->stats.rx_packets++; - dev->stats.rx_bytes += len; - - if (rxd->report & RX_MULTICAST_PKT) - dev->stats.multicast++; - - rxd->report = 0; - last = next; - work++; - } while (work < budget); - - if (work) { - priv->rx_descs[last].desc.config |= DESC_EOC; - wmb(); /* ensure new EOC is written before clearing old */ - priv->rx_descs[priv->rx_eoc].desc.config &= ~DESC_EOC; - priv->rx_eoc = last; - nb8800_start_rx(dev); - } - - if (work < budget) { - nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq); - - /* If a packet arrived after we last checked but - * before writing RX_ITR, the interrupt will be - * delayed, so we retrieve it now. - */ - if (priv->rx_descs[next].report) - goto again; - - napi_complete_done(napi, work); - } - - return work; -} - -static void __nb8800_tx_dma_start(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct nb8800_tx_buf *txb; - u32 txc_cr; - - txb = &priv->tx_bufs[priv->tx_queue]; - if (!txb->ready) - return; - - txc_cr = nb8800_readl(priv, NB8800_TXC_CR); - if (txc_cr & TCR_EN) - return; - - nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc); - wmb(); /* ensure desc addr is written before starting DMA */ - nb8800_writel(priv, NB8800_TXC_CR, txc_cr | TCR_EN); - - priv->tx_queue = (priv->tx_queue + txb->chain_len) % TX_DESC_COUNT; -} - -static void nb8800_tx_dma_start(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - - spin_lock_irq(&priv->tx_lock); - __nb8800_tx_dma_start(dev); - spin_unlock_irq(&priv->tx_lock); -} - -static void nb8800_tx_dma_start_irq(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - - spin_lock(&priv->tx_lock); - __nb8800_tx_dma_start(dev); - spin_unlock(&priv->tx_lock); -} - -static netdev_tx_t nb8800_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct nb8800_tx_desc *txd; - struct nb8800_tx_buf *txb; - struct nb8800_dma_desc *desc; - dma_addr_t dma_addr; - unsigned int dma_len; - unsigned int align; - unsigned int next; - bool xmit_more; - - if (atomic_read(&priv->tx_free) <= NB8800_DESC_LOW) { - netif_stop_queue(dev); - return NETDEV_TX_BUSY; - } - - align = (8 - (uintptr_t)skb->data) & 7; - - dma_len = skb->len - align; - dma_addr = dma_map_single(&dev->dev, skb->data + align, - dma_len, DMA_TO_DEVICE); - - if (dma_mapping_error(&dev->dev, dma_addr)) { - netdev_err(dev, "tx dma mapping error\n"); - kfree_skb(skb); - dev->stats.tx_dropped++; - return NETDEV_TX_OK; - } - - xmit_more = netdev_xmit_more(); - if (atomic_dec_return(&priv->tx_free) <= NB8800_DESC_LOW) { - netif_stop_queue(dev); - xmit_more = false; - } - - next = priv->tx_next; - txb = &priv->tx_bufs[next]; - txd = &priv->tx_descs[next]; - desc = &txd->desc[0]; - - next = (next + 1) % TX_DESC_COUNT; - - if (align) { - memcpy(txd->buf, skb->data, align); - - desc->s_addr = - txb->dma_desc + offsetof(struct nb8800_tx_desc, buf); - desc->n_addr = txb->dma_desc + sizeof(txd->desc[0]); - desc->config = DESC_BTS(2) | DESC_DS | align; - - desc++; - } - - desc->s_addr = dma_addr; - desc->n_addr = priv->tx_bufs[next].dma_desc; - desc->config = DESC_BTS(2) | DESC_DS | DESC_EOF | dma_len; - - if (!xmit_more) - desc->config |= DESC_EOC; - - txb->skb = skb; - txb->dma_addr = dma_addr; - txb->dma_len = dma_len; - - if (!priv->tx_chain) { - txb->chain_len = 1; - priv->tx_chain = txb; - } else { - priv->tx_chain->chain_len++; - } - - netdev_sent_queue(dev, skb->len); - - priv->tx_next = next; - - if (!xmit_more) { - smp_wmb(); - priv->tx_chain->ready = true; - priv->tx_chain = NULL; - nb8800_tx_dma_start(dev); - } - - return NETDEV_TX_OK; -} - -static void nb8800_tx_error(struct net_device *dev, u32 report) -{ - if (report & TX_LATE_COLLISION) - dev->stats.collisions++; - - if (report & TX_PACKET_DROPPED) - dev->stats.tx_dropped++; - - if (report & TX_FIFO_UNDERRUN) - dev->stats.tx_fifo_errors++; - - dev->stats.tx_errors++; -} - -static void nb8800_tx_done(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - unsigned int limit = priv->tx_next; - unsigned int done = priv->tx_done; - unsigned int packets = 0; - unsigned int len = 0; - - while (done != limit) { - struct nb8800_tx_desc *txd = &priv->tx_descs[done]; - struct nb8800_tx_buf *txb = &priv->tx_bufs[done]; - struct sk_buff *skb; - - if (!txd->report) - break; - - skb = txb->skb; - len += skb->len; - - dma_unmap_single(&dev->dev, txb->dma_addr, txb->dma_len, - DMA_TO_DEVICE); - - if (IS_TX_ERROR(txd->report)) { - nb8800_tx_error(dev, txd->report); - kfree_skb(skb); - } else { - consume_skb(skb); - } - - dev->stats.tx_packets++; - dev->stats.tx_bytes += TX_BYTES_TRANSFERRED(txd->report); - dev->stats.collisions += TX_EARLY_COLLISIONS(txd->report); - - txb->skb = NULL; - txb->ready = false; - txd->report = 0; - - done = (done + 1) % TX_DESC_COUNT; - packets++; - } - - if (packets) { - smp_mb__before_atomic(); - atomic_add(packets, &priv->tx_free); - netdev_completed_queue(dev, packets, len); - netif_wake_queue(dev); - priv->tx_done = done; - } -} - -static irqreturn_t nb8800_irq(int irq, void *dev_id) -{ - struct net_device *dev = dev_id; - struct nb8800_priv *priv = netdev_priv(dev); - irqreturn_t ret = IRQ_NONE; - u32 val; - - /* tx interrupt */ - val = nb8800_readl(priv, NB8800_TXC_SR); - if (val) { - nb8800_writel(priv, NB8800_TXC_SR, val); - - if (val & TSR_DI) - nb8800_tx_dma_start_irq(dev); - - if (val & TSR_TI) - napi_schedule_irqoff(&priv->napi); - - if (unlikely(val & TSR_DE)) - netdev_err(dev, "TX DMA error\n"); - - /* should never happen with automatic status retrieval */ - if (unlikely(val & TSR_TO)) - netdev_err(dev, "TX Status FIFO overflow\n"); - - ret = IRQ_HANDLED; - } - - /* rx interrupt */ - val = nb8800_readl(priv, NB8800_RXC_SR); - if (val) { - nb8800_writel(priv, NB8800_RXC_SR, val); - - if (likely(val & (RSR_RI | RSR_DI))) { - nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_poll); - napi_schedule_irqoff(&priv->napi); - } - - if (unlikely(val & RSR_DE)) - netdev_err(dev, "RX DMA error\n"); - - /* should never happen with automatic status retrieval */ - if (unlikely(val & RSR_RO)) - netdev_err(dev, "RX Status FIFO overflow\n"); - - ret = IRQ_HANDLED; - } - - return ret; -} - -static void nb8800_mac_config(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - bool gigabit = priv->speed == SPEED_1000; - u32 mac_mode_mask = RGMII_MODE | HALF_DUPLEX | GMAC_MODE; - u32 mac_mode = 0; - u32 slot_time; - u32 phy_clk; - u32 ict; - - if (!priv->duplex) - mac_mode |= HALF_DUPLEX; - - if (gigabit) { - if (phy_interface_is_rgmii(dev->phydev)) - mac_mode |= RGMII_MODE; - - mac_mode |= GMAC_MODE; - phy_clk = 125000000; - - /* Should be 512 but register is only 8 bits */ - slot_time = 255; - } else { - phy_clk = 25000000; - slot_time = 128; - } - - ict = DIV_ROUND_UP(phy_clk, clk_get_rate(priv->clk)); - - nb8800_writeb(priv, NB8800_IC_THRESHOLD, ict); - nb8800_writeb(priv, NB8800_SLOT_TIME, slot_time); - nb8800_maskb(priv, NB8800_MAC_MODE, mac_mode_mask, mac_mode); -} - -static void nb8800_pause_config(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; - u32 rxcr; - - if (priv->pause_aneg) { - if (!phydev || !phydev->link) - return; - - priv->pause_rx = phydev->pause; - priv->pause_tx = phydev->pause ^ phydev->asym_pause; - } - - nb8800_modb(priv, NB8800_RX_CTL, RX_PAUSE_EN, priv->pause_rx); - - rxcr = nb8800_readl(priv, NB8800_RXC_CR); - if (!!(rxcr & RCR_FL) == priv->pause_tx) - return; - - if (netif_running(dev)) { - napi_disable(&priv->napi); - netif_tx_lock_bh(dev); - nb8800_dma_stop(dev); - nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx); - nb8800_start_rx(dev); - netif_tx_unlock_bh(dev); - napi_enable(&priv->napi); - } else { - nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx); - } -} - -static void nb8800_link_reconfigure(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; - int change = 0; - - if (phydev->link) { - if (phydev->speed != priv->speed) { - priv->speed = phydev->speed; - change = 1; - } - - if (phydev->duplex != priv->duplex) { - priv->duplex = phydev->duplex; - change = 1; - } - - if (change) - nb8800_mac_config(dev); - - nb8800_pause_config(dev); - } - - if (phydev->link != priv->link) { - priv->link = phydev->link; - change = 1; - } - - if (change) - phy_print_status(phydev); -} - -static void nb8800_update_mac_addr(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - int i; - - for (i = 0; i < ETH_ALEN; i++) - nb8800_writeb(priv, NB8800_SRC_ADDR(i), dev->dev_addr[i]); - - for (i = 0; i < ETH_ALEN; i++) - nb8800_writeb(priv, NB8800_UC_ADDR(i), dev->dev_addr[i]); -} - -static int nb8800_set_mac_address(struct net_device *dev, void *addr) -{ - struct sockaddr *sock = addr; - - if (netif_running(dev)) - return -EBUSY; - - ether_addr_copy(dev->dev_addr, sock->sa_data); - nb8800_update_mac_addr(dev); - - return 0; -} - -static void nb8800_mc_init(struct net_device *dev, int val) -{ - struct nb8800_priv *priv = netdev_priv(dev); - - nb8800_writeb(priv, NB8800_MC_INIT, val); - readb_poll_timeout_atomic(priv->base + NB8800_MC_INIT, val, !val, - 1, 1000); -} - -static void nb8800_set_rx_mode(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct netdev_hw_addr *ha; - int i; - - if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) { - nb8800_mac_af(dev, false); - return; - } - - nb8800_mac_af(dev, true); - nb8800_mc_init(dev, 0); - - netdev_for_each_mc_addr(ha, dev) { - for (i = 0; i < ETH_ALEN; i++) - nb8800_writeb(priv, NB8800_MC_ADDR(i), ha->addr[i]); - - nb8800_mc_init(dev, 0xff); - } -} - -#define RX_DESC_SIZE (RX_DESC_COUNT * sizeof(struct nb8800_rx_desc)) -#define TX_DESC_SIZE (TX_DESC_COUNT * sizeof(struct nb8800_tx_desc)) - -static void nb8800_dma_free(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - unsigned int i; - - if (priv->rx_bufs) { - for (i = 0; i < RX_DESC_COUNT; i++) - if (priv->rx_bufs[i].page) - put_page(priv->rx_bufs[i].page); - - kfree(priv->rx_bufs); - priv->rx_bufs = NULL; - } - - if (priv->tx_bufs) { - for (i = 0; i < TX_DESC_COUNT; i++) - kfree_skb(priv->tx_bufs[i].skb); - - kfree(priv->tx_bufs); - priv->tx_bufs = NULL; - } - - if (priv->rx_descs) { - dma_free_coherent(dev->dev.parent, RX_DESC_SIZE, priv->rx_descs, - priv->rx_desc_dma); - priv->rx_descs = NULL; - } - - if (priv->tx_descs) { - dma_free_coherent(dev->dev.parent, TX_DESC_SIZE, priv->tx_descs, - priv->tx_desc_dma); - priv->tx_descs = NULL; - } -} - -static void nb8800_dma_reset(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct nb8800_rx_desc *rxd; - struct nb8800_tx_desc *txd; - unsigned int i; - - for (i = 0; i < RX_DESC_COUNT; i++) { - dma_addr_t rx_dma = priv->rx_desc_dma + i * sizeof(*rxd); - - rxd = &priv->rx_descs[i]; - rxd->desc.n_addr = rx_dma + sizeof(*rxd); - rxd->desc.r_addr = - rx_dma + offsetof(struct nb8800_rx_desc, report); - rxd->desc.config = priv->rx_dma_config; - rxd->report = 0; - } - - rxd->desc.n_addr = priv->rx_desc_dma; - rxd->desc.config |= DESC_EOC; - - priv->rx_eoc = RX_DESC_COUNT - 1; - - for (i = 0; i < TX_DESC_COUNT; i++) { - struct nb8800_tx_buf *txb = &priv->tx_bufs[i]; - dma_addr_t r_dma = txb->dma_desc + - offsetof(struct nb8800_tx_desc, report); - - txd = &priv->tx_descs[i]; - txd->desc[0].r_addr = r_dma; - txd->desc[1].r_addr = r_dma; - txd->report = 0; - } - - priv->tx_next = 0; - priv->tx_queue = 0; - priv->tx_done = 0; - atomic_set(&priv->tx_free, TX_DESC_COUNT); - - nb8800_writel(priv, NB8800_RX_DESC_ADDR, priv->rx_desc_dma); - - wmb(); /* ensure all setup is written before starting */ -} - -static int nb8800_dma_init(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - unsigned int n_rx = RX_DESC_COUNT; - unsigned int n_tx = TX_DESC_COUNT; - unsigned int i; - int err; - - priv->rx_descs = dma_alloc_coherent(dev->dev.parent, RX_DESC_SIZE, - &priv->rx_desc_dma, GFP_KERNEL); - if (!priv->rx_descs) - goto err_out; - - priv->rx_bufs = kcalloc(n_rx, sizeof(*priv->rx_bufs), GFP_KERNEL); - if (!priv->rx_bufs) - goto err_out; - - for (i = 0; i < n_rx; i++) { - err = nb8800_alloc_rx(dev, i, false); - if (err) - goto err_out; - } - - priv->tx_descs = dma_alloc_coherent(dev->dev.parent, TX_DESC_SIZE, - &priv->tx_desc_dma, GFP_KERNEL); - if (!priv->tx_descs) - goto err_out; - - priv->tx_bufs = kcalloc(n_tx, sizeof(*priv->tx_bufs), GFP_KERNEL); - if (!priv->tx_bufs) - goto err_out; - - for (i = 0; i < n_tx; i++) - priv->tx_bufs[i].dma_desc = - priv->tx_desc_dma + i * sizeof(struct nb8800_tx_desc); - - nb8800_dma_reset(dev); - - return 0; - -err_out: - nb8800_dma_free(dev); - - return -ENOMEM; -} - -static int nb8800_dma_stop(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct nb8800_tx_buf *txb = &priv->tx_bufs[0]; - struct nb8800_tx_desc *txd = &priv->tx_descs[0]; - int retry = 5; - u32 txcr; - u32 rxcr; - int err; - unsigned int i; - - /* wait for tx to finish */ - err = readl_poll_timeout_atomic(priv->base + NB8800_TXC_CR, txcr, - !(txcr & TCR_EN) && - priv->tx_done == priv->tx_next, - 1000, 1000000); - if (err) - return err; - - /* The rx DMA only stops if it reaches the end of chain. - * To make this happen, we set the EOC flag on all rx - * descriptors, put the device in loopback mode, and send - * a few dummy frames. The interrupt handler will ignore - * these since NAPI is disabled and no real frames are in - * the tx queue. - */ - - for (i = 0; i < RX_DESC_COUNT; i++) - priv->rx_descs[i].desc.config |= DESC_EOC; - - txd->desc[0].s_addr = - txb->dma_desc + offsetof(struct nb8800_tx_desc, buf); - txd->desc[0].config = DESC_BTS(2) | DESC_DS | DESC_EOF | DESC_EOC | 8; - memset(txd->buf, 0, sizeof(txd->buf)); - - nb8800_mac_af(dev, false); - nb8800_setb(priv, NB8800_MAC_MODE, LOOPBACK_EN); - - do { - nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc); - wmb(); - nb8800_writel(priv, NB8800_TXC_CR, txcr | TCR_EN); - - err = readl_poll_timeout_atomic(priv->base + NB8800_RXC_CR, - rxcr, !(rxcr & RCR_EN), - 1000, 100000); - } while (err && --retry); - - nb8800_mac_af(dev, true); - nb8800_clearb(priv, NB8800_MAC_MODE, LOOPBACK_EN); - nb8800_dma_reset(dev); - - return retry ? 0 : -ETIMEDOUT; -} - -static void nb8800_pause_adv(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; - - if (!phydev) - return; - - phy_set_asym_pause(phydev, priv->pause_rx, priv->pause_tx); -} - -static int nb8800_open(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct phy_device *phydev; - int err; - - /* clear any pending interrupts */ - nb8800_writel(priv, NB8800_RXC_SR, 0xf); - nb8800_writel(priv, NB8800_TXC_SR, 0xf); - - err = nb8800_dma_init(dev); - if (err) - return err; - - err = request_irq(dev->irq, nb8800_irq, 0, dev_name(&dev->dev), dev); - if (err) - goto err_free_dma; - - nb8800_mac_rx(dev, true); - nb8800_mac_tx(dev, true); - - phydev = of_phy_connect(dev, priv->phy_node, - nb8800_link_reconfigure, 0, - priv->phy_mode); - if (!phydev) { - err = -ENODEV; - goto err_free_irq; - } - - nb8800_pause_adv(dev); - - netdev_reset_queue(dev); - napi_enable(&priv->napi); - netif_start_queue(dev); - - nb8800_start_rx(dev); - phy_start(phydev); - - return 0; - -err_free_irq: - free_irq(dev->irq, dev); -err_free_dma: - nb8800_dma_free(dev); - - return err; -} - -static int nb8800_stop(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; - - phy_stop(phydev); - - netif_stop_queue(dev); - napi_disable(&priv->napi); - - nb8800_dma_stop(dev); - nb8800_mac_rx(dev, false); - nb8800_mac_tx(dev, false); - - phy_disconnect(phydev); - - free_irq(dev->irq, dev); - - nb8800_dma_free(dev); - - return 0; -} - -static const struct net_device_ops nb8800_netdev_ops = { - .ndo_open = nb8800_open, - .ndo_stop = nb8800_stop, - .ndo_start_xmit = nb8800_xmit, - .ndo_set_mac_address = nb8800_set_mac_address, - .ndo_set_rx_mode = nb8800_set_rx_mode, - .ndo_do_ioctl = phy_do_ioctl, - .ndo_validate_addr = eth_validate_addr, -}; - -static void nb8800_get_pauseparam(struct net_device *dev, - struct ethtool_pauseparam *pp) -{ - struct nb8800_priv *priv = netdev_priv(dev); - - pp->autoneg = priv->pause_aneg; - pp->rx_pause = priv->pause_rx; - pp->tx_pause = priv->pause_tx; -} - -static int nb8800_set_pauseparam(struct net_device *dev, - struct ethtool_pauseparam *pp) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; - - priv->pause_aneg = pp->autoneg; - priv->pause_rx = pp->rx_pause; - priv->pause_tx = pp->tx_pause; - - nb8800_pause_adv(dev); - - if (!priv->pause_aneg) - nb8800_pause_config(dev); - else if (phydev) - phy_start_aneg(phydev); - - return 0; -} - -static const char nb8800_stats_names[][ETH_GSTRING_LEN] = { - "rx_bytes_ok", - "rx_frames_ok", - "rx_undersize_frames", - "rx_fragment_frames", - "rx_64_byte_frames", - "rx_127_byte_frames", - "rx_255_byte_frames", - "rx_511_byte_frames", - "rx_1023_byte_frames", - "rx_max_size_frames", - "rx_oversize_frames", - "rx_bad_fcs_frames", - "rx_broadcast_frames", - "rx_multicast_frames", - "rx_control_frames", - "rx_pause_frames", - "rx_unsup_control_frames", - "rx_align_error_frames", - "rx_overrun_frames", - "rx_jabber_frames", - "rx_bytes", - "rx_frames", - - "tx_bytes_ok", - "tx_frames_ok", - "tx_64_byte_frames", - "tx_127_byte_frames", - "tx_255_byte_frames", - "tx_511_byte_frames", - "tx_1023_byte_frames", - "tx_max_size_frames", - "tx_oversize_frames", - "tx_broadcast_frames", - "tx_multicast_frames", - "tx_control_frames", - "tx_pause_frames", - "tx_underrun_frames", - "tx_single_collision_frames", - "tx_multi_collision_frames", - "tx_deferred_collision_frames", - "tx_late_collision_frames", - "tx_excessive_collision_frames", - "tx_bytes", - "tx_frames", - "tx_collisions", -}; - -#define NB8800_NUM_STATS ARRAY_SIZE(nb8800_stats_names) - -static int nb8800_get_sset_count(struct net_device *dev, int sset) -{ - if (sset == ETH_SS_STATS) - return NB8800_NUM_STATS; - - return -EOPNOTSUPP; -} - -static void nb8800_get_strings(struct net_device *dev, u32 sset, u8 *buf) -{ - if (sset == ETH_SS_STATS) - memcpy(buf, &nb8800_stats_names, sizeof(nb8800_stats_names)); -} - -static u32 nb8800_read_stat(struct net_device *dev, int index) -{ - struct nb8800_priv *priv = netdev_priv(dev); - - nb8800_writeb(priv, NB8800_STAT_INDEX, index); - - return nb8800_readl(priv, NB8800_STAT_DATA); -} - -static void nb8800_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *estats, u64 *st) -{ - unsigned int i; - u32 rx, tx; - - for (i = 0; i < NB8800_NUM_STATS / 2; i++) { - rx = nb8800_read_stat(dev, i); - tx = nb8800_read_stat(dev, i | 0x80); - st[i] = rx; - st[i + NB8800_NUM_STATS / 2] = tx; - } -} - -static const struct ethtool_ops nb8800_ethtool_ops = { - .nway_reset = phy_ethtool_nway_reset, - .get_link = ethtool_op_get_link, - .get_pauseparam = nb8800_get_pauseparam, - .set_pauseparam = nb8800_set_pauseparam, - .get_sset_count = nb8800_get_sset_count, - .get_strings = nb8800_get_strings, - .get_ethtool_stats = nb8800_get_ethtool_stats, - .get_link_ksettings = phy_ethtool_get_link_ksettings, - .set_link_ksettings = phy_ethtool_set_link_ksettings, -}; - -static int nb8800_hw_init(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - u32 val; - - val = TX_RETRY_EN | TX_PAD_EN | TX_APPEND_FCS; - nb8800_writeb(priv, NB8800_TX_CTL1, val); - - /* Collision retry count */ - nb8800_writeb(priv, NB8800_TX_CTL2, 5); - - val = RX_PAD_STRIP | RX_AF_EN; - nb8800_writeb(priv, NB8800_RX_CTL, val); - - /* Chosen by fair dice roll */ - nb8800_writeb(priv, NB8800_RANDOM_SEED, 4); - - /* TX cycles per deferral period */ - nb8800_writeb(priv, NB8800_TX_SDP, 12); - - /* The following three threshold values have been - * experimentally determined for good results. - */ - - /* RX/TX FIFO threshold for partial empty (64-bit entries) */ - nb8800_writeb(priv, NB8800_PE_THRESHOLD, 0); - - /* RX/TX FIFO threshold for partial full (64-bit entries) */ - nb8800_writeb(priv, NB8800_PF_THRESHOLD, 255); - - /* Buffer size for transmit (64-bit entries) */ - nb8800_writeb(priv, NB8800_TX_BUFSIZE, 64); - - /* Configure tx DMA */ - - val = nb8800_readl(priv, NB8800_TXC_CR); - val &= TCR_LE; /* keep endian setting */ - val |= TCR_DM; /* DMA descriptor mode */ - val |= TCR_RS; /* automatically store tx status */ - val |= TCR_DIE; /* interrupt on DMA chain completion */ - val |= TCR_TFI(7); /* interrupt after 7 frames transmitted */ - val |= TCR_BTS(2); /* 32-byte bus transaction size */ - nb8800_writel(priv, NB8800_TXC_CR, val); - - /* TX complete interrupt after 10 ms or 7 frames (see above) */ - val = clk_get_rate(priv->clk) / 100; - nb8800_writel(priv, NB8800_TX_ITR, val); - - /* Configure rx DMA */ - - val = nb8800_readl(priv, NB8800_RXC_CR); - val &= RCR_LE; /* keep endian setting */ - val |= RCR_DM; /* DMA descriptor mode */ - val |= RCR_RS; /* automatically store rx status */ - val |= RCR_DIE; /* interrupt at end of DMA chain */ - val |= RCR_RFI(7); /* interrupt after 7 frames received */ - val |= RCR_BTS(2); /* 32-byte bus transaction size */ - nb8800_writel(priv, NB8800_RXC_CR, val); - - /* The rx interrupt can fire before the DMA has completed - * unless a small delay is added. 50 us is hopefully enough. - */ - priv->rx_itr_irq = clk_get_rate(priv->clk) / 20000; - - /* In NAPI poll mode we want to disable interrupts, but the - * hardware does not permit this. Delay 10 ms instead. - */ - priv->rx_itr_poll = clk_get_rate(priv->clk) / 100; - - nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq); - - priv->rx_dma_config = RX_BUF_SIZE | DESC_BTS(2) | DESC_DS | DESC_EOF; - - /* Flow control settings */ - - /* Pause time of 0.1 ms */ - val = 100000 / 512; - nb8800_writeb(priv, NB8800_PQ1, val >> 8); - nb8800_writeb(priv, NB8800_PQ2, val & 0xff); - - /* Auto-negotiate by default */ - priv->pause_aneg = true; - priv->pause_rx = true; - priv->pause_tx = true; - - nb8800_mc_init(dev, 0); - - return 0; -} - -static int nb8800_tangox_init(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - u32 pad_mode = PAD_MODE_MII; - - switch (priv->phy_mode) { - case PHY_INTERFACE_MODE_MII: - case PHY_INTERFACE_MODE_GMII: - pad_mode = PAD_MODE_MII; - break; - - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII_RXID: - case PHY_INTERFACE_MODE_RGMII_TXID: - pad_mode = PAD_MODE_RGMII; - break; - - default: - dev_err(dev->dev.parent, "unsupported phy mode %s\n", - phy_modes(priv->phy_mode)); - return -EINVAL; - } - - nb8800_writeb(priv, NB8800_TANGOX_PAD_MODE, pad_mode); - - return 0; -} - -static int nb8800_tangox_reset(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - int clk_div; - - nb8800_writeb(priv, NB8800_TANGOX_RESET, 0); - usleep_range(1000, 10000); - nb8800_writeb(priv, NB8800_TANGOX_RESET, 1); - - wmb(); /* ensure reset is cleared before proceeding */ - - clk_div = DIV_ROUND_UP(clk_get_rate(priv->clk), 2 * MAX_MDC_CLOCK); - nb8800_writew(priv, NB8800_TANGOX_MDIO_CLKDIV, clk_div); - - return 0; -} - -static const struct nb8800_ops nb8800_tangox_ops = { - .init = nb8800_tangox_init, - .reset = nb8800_tangox_reset, -}; - -static int nb8800_tango4_init(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - int err; - - err = nb8800_tangox_init(dev); - if (err) - return err; - - /* On tango4 interrupt on DMA completion per frame works and gives - * better performance despite generating more rx interrupts. - */ - - /* Disable unnecessary interrupt on rx completion */ - nb8800_clearl(priv, NB8800_RXC_CR, RCR_RFI(7)); - - /* Request interrupt on descriptor DMA completion */ - priv->rx_dma_config |= DESC_ID; - - return 0; -} - -static const struct nb8800_ops nb8800_tango4_ops = { - .init = nb8800_tango4_init, - .reset = nb8800_tangox_reset, -}; - -static const struct of_device_id nb8800_dt_ids[] = { - { - .compatible = "aurora,nb8800", - }, - { - .compatible = "sigma,smp8642-ethernet", - .data = &nb8800_tangox_ops, - }, - { - .compatible = "sigma,smp8734-ethernet", - .data = &nb8800_tango4_ops, - }, - { } -}; -MODULE_DEVICE_TABLE(of, nb8800_dt_ids); - -static int nb8800_probe(struct platform_device *pdev) -{ - const struct of_device_id *match; - const struct nb8800_ops *ops = NULL; - struct nb8800_priv *priv; - struct resource *res; - struct net_device *dev; - struct mii_bus *bus; - const unsigned char *mac; - void __iomem *base; - int irq; - int ret; - - match = of_match_device(nb8800_dt_ids, &pdev->dev); - if (match) - ops = match->data; - - irq = platform_get_irq(pdev, 0); - if (irq <= 0) - return -EINVAL; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(base)) - return PTR_ERR(base); - - dev_dbg(&pdev->dev, "AU-NB8800 Ethernet at %pa\n", &res->start); - - dev = alloc_etherdev(sizeof(*priv)); - if (!dev) - return -ENOMEM; - - platform_set_drvdata(pdev, dev); - SET_NETDEV_DEV(dev, &pdev->dev); - - priv = netdev_priv(dev); - priv->base = base; - - ret = of_get_phy_mode(pdev->dev.of_node, &priv->phy_mode); - if (ret) - priv->phy_mode = PHY_INTERFACE_MODE_RGMII; - - priv->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(priv->clk)) { - dev_err(&pdev->dev, "failed to get clock\n"); - ret = PTR_ERR(priv->clk); - goto err_free_dev; - } - - ret = clk_prepare_enable(priv->clk); - if (ret) - goto err_free_dev; - - spin_lock_init(&priv->tx_lock); - - if (ops && ops->reset) { - ret = ops->reset(dev); - if (ret) - goto err_disable_clk; - } - - bus = devm_mdiobus_alloc(&pdev->dev); - if (!bus) { - ret = -ENOMEM; - goto err_disable_clk; - } - - bus->name = "nb8800-mii"; - bus->read = nb8800_mdio_read; - bus->write = nb8800_mdio_write; - bus->parent = &pdev->dev; - snprintf(bus->id, MII_BUS_ID_SIZE, "%lx.nb8800-mii", - (unsigned long)res->start); - bus->priv = priv; - - ret = of_mdiobus_register(bus, pdev->dev.of_node); - if (ret) { - dev_err(&pdev->dev, "failed to register MII bus\n"); - goto err_disable_clk; - } - - if (of_phy_is_fixed_link(pdev->dev.of_node)) { - ret = of_phy_register_fixed_link(pdev->dev.of_node); - if (ret < 0) { - dev_err(&pdev->dev, "bad fixed-link spec\n"); - goto err_free_bus; - } - priv->phy_node = of_node_get(pdev->dev.of_node); - } - - if (!priv->phy_node) - priv->phy_node = of_parse_phandle(pdev->dev.of_node, - "phy-handle", 0); - - if (!priv->phy_node) { - dev_err(&pdev->dev, "no PHY specified\n"); - ret = -ENODEV; - goto err_free_bus; - } - - priv->mii_bus = bus; - - ret = nb8800_hw_init(dev); - if (ret) - goto err_deregister_fixed_link; - - if (ops && ops->init) { - ret = ops->init(dev); - if (ret) - goto err_deregister_fixed_link; - } - - dev->netdev_ops = &nb8800_netdev_ops; - dev->ethtool_ops = &nb8800_ethtool_ops; - dev->flags |= IFF_MULTICAST; - dev->irq = irq; - - mac = of_get_mac_address(pdev->dev.of_node); - if (!IS_ERR(mac)) - ether_addr_copy(dev->dev_addr, mac); - - if (!is_valid_ether_addr(dev->dev_addr)) - eth_hw_addr_random(dev); - - nb8800_update_mac_addr(dev); - - netif_carrier_off(dev); - - ret = register_netdev(dev); - if (ret) { - netdev_err(dev, "failed to register netdev\n"); - goto err_free_dma; - } - - netif_napi_add(dev, &priv->napi, nb8800_poll, NAPI_POLL_WEIGHT); - - netdev_info(dev, "MAC address %pM\n", dev->dev_addr); - - return 0; - -err_free_dma: - nb8800_dma_free(dev); -err_deregister_fixed_link: - if (of_phy_is_fixed_link(pdev->dev.of_node)) - of_phy_deregister_fixed_link(pdev->dev.of_node); -err_free_bus: - of_node_put(priv->phy_node); - mdiobus_unregister(bus); -err_disable_clk: - clk_disable_unprepare(priv->clk); -err_free_dev: - free_netdev(dev); - - return ret; -} - -static int nb8800_remove(struct platform_device *pdev) -{ - struct net_device *ndev = platform_get_drvdata(pdev); - struct nb8800_priv *priv = netdev_priv(ndev); - - unregister_netdev(ndev); - if (of_phy_is_fixed_link(pdev->dev.of_node)) - of_phy_deregister_fixed_link(pdev->dev.of_node); - of_node_put(priv->phy_node); - - mdiobus_unregister(priv->mii_bus); - - clk_disable_unprepare(priv->clk); - - nb8800_dma_free(ndev); - free_netdev(ndev); - - return 0; -} - -static struct platform_driver nb8800_driver = { - .driver = { - .name = "nb8800", - .of_match_table = nb8800_dt_ids, - }, - .probe = nb8800_probe, - .remove = nb8800_remove, -}; - -module_platform_driver(nb8800_driver); - -MODULE_DESCRIPTION("Aurora AU-NB8800 Ethernet driver"); -MODULE_AUTHOR("Mans Rullgard "); -MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/aurora/nb8800.h b/drivers/net/ethernet/aurora/nb8800.h deleted file mode 100644 index 40941fb6065bc..0000000000000 --- a/drivers/net/ethernet/aurora/nb8800.h +++ /dev/null @@ -1,316 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NB8800_H_ -#define _NB8800_H_ - -#include -#include -#include -#include -#include - -#define RX_DESC_COUNT 256 -#define TX_DESC_COUNT 256 - -#define NB8800_DESC_LOW 4 - -#define RX_BUF_SIZE 1552 - -#define RX_COPYBREAK 256 -#define RX_COPYHDR 128 - -#define MAX_MDC_CLOCK 2500000 - -/* Stargate Solutions SSN8800 core registers */ -#define NB8800_TX_CTL1 0x000 -#define TX_TPD BIT(5) -#define TX_APPEND_FCS BIT(4) -#define TX_PAD_EN BIT(3) -#define TX_RETRY_EN BIT(2) -#define TX_EN BIT(0) - -#define NB8800_TX_CTL2 0x001 - -#define NB8800_RX_CTL 0x004 -#define RX_BC_DISABLE BIT(7) -#define RX_RUNT BIT(6) -#define RX_AF_EN BIT(5) -#define RX_PAUSE_EN BIT(3) -#define RX_SEND_CRC BIT(2) -#define RX_PAD_STRIP BIT(1) -#define RX_EN BIT(0) - -#define NB8800_RANDOM_SEED 0x008 -#define NB8800_TX_SDP 0x14 -#define NB8800_TX_TPDP1 0x18 -#define NB8800_TX_TPDP2 0x19 -#define NB8800_SLOT_TIME 0x1c - -#define NB8800_MDIO_CMD 0x020 -#define MDIO_CMD_GO BIT(31) -#define MDIO_CMD_WR BIT(26) -#define MDIO_CMD_ADDR(x) ((x) << 21) -#define MDIO_CMD_REG(x) ((x) << 16) -#define MDIO_CMD_DATA(x) ((x) << 0) - -#define NB8800_MDIO_STS 0x024 -#define MDIO_STS_ERR BIT(31) - -#define NB8800_MC_ADDR(i) (0x028 + (i)) -#define NB8800_MC_INIT 0x02e -#define NB8800_UC_ADDR(i) (0x03c + (i)) - -#define NB8800_MAC_MODE 0x044 -#define RGMII_MODE BIT(7) -#define HALF_DUPLEX BIT(4) -#define BURST_EN BIT(3) -#define LOOPBACK_EN BIT(2) -#define GMAC_MODE BIT(0) - -#define NB8800_IC_THRESHOLD 0x050 -#define NB8800_PE_THRESHOLD 0x051 -#define NB8800_PF_THRESHOLD 0x052 -#define NB8800_TX_BUFSIZE 0x054 -#define NB8800_FIFO_CTL 0x056 -#define NB8800_PQ1 0x060 -#define NB8800_PQ2 0x061 -#define NB8800_SRC_ADDR(i) (0x06a + (i)) -#define NB8800_STAT_DATA 0x078 -#define NB8800_STAT_INDEX 0x07c -#define NB8800_STAT_CLEAR 0x07d - -#define NB8800_SLEEP_MODE 0x07e -#define SLEEP_MODE BIT(0) - -#define NB8800_WAKEUP 0x07f -#define WAKEUP BIT(0) - -/* Aurora NB8800 host interface registers */ -#define NB8800_TXC_CR 0x100 -#define TCR_LK BIT(12) -#define TCR_DS BIT(11) -#define TCR_BTS(x) (((x) & 0x7) << 8) -#define TCR_DIE BIT(7) -#define TCR_TFI(x) (((x) & 0x7) << 4) -#define TCR_LE BIT(3) -#define TCR_RS BIT(2) -#define TCR_DM BIT(1) -#define TCR_EN BIT(0) - -#define NB8800_TXC_SR 0x104 -#define TSR_DE BIT(3) -#define TSR_DI BIT(2) -#define TSR_TO BIT(1) -#define TSR_TI BIT(0) - -#define NB8800_TX_SAR 0x108 -#define NB8800_TX_DESC_ADDR 0x10c - -#define NB8800_TX_REPORT_ADDR 0x110 -#define TX_BYTES_TRANSFERRED(x) (((x) >> 16) & 0xffff) -#define TX_FIRST_DEFERRAL BIT(7) -#define TX_EARLY_COLLISIONS(x) (((x) >> 3) & 0xf) -#define TX_LATE_COLLISION BIT(2) -#define TX_PACKET_DROPPED BIT(1) -#define TX_FIFO_UNDERRUN BIT(0) -#define IS_TX_ERROR(r) ((r) & 0x07) - -#define NB8800_TX_FIFO_SR 0x114 -#define NB8800_TX_ITR 0x118 - -#define NB8800_RXC_CR 0x200 -#define RCR_FL BIT(13) -#define RCR_LK BIT(12) -#define RCR_DS BIT(11) -#define RCR_BTS(x) (((x) & 7) << 8) -#define RCR_DIE BIT(7) -#define RCR_RFI(x) (((x) & 7) << 4) -#define RCR_LE BIT(3) -#define RCR_RS BIT(2) -#define RCR_DM BIT(1) -#define RCR_EN BIT(0) - -#define NB8800_RXC_SR 0x204 -#define RSR_DE BIT(3) -#define RSR_DI BIT(2) -#define RSR_RO BIT(1) -#define RSR_RI BIT(0) - -#define NB8800_RX_SAR 0x208 -#define NB8800_RX_DESC_ADDR 0x20c - -#define NB8800_RX_REPORT_ADDR 0x210 -#define RX_BYTES_TRANSFERRED(x) (((x) >> 16) & 0xFFFF) -#define RX_MULTICAST_PKT BIT(9) -#define RX_BROADCAST_PKT BIT(8) -#define RX_LENGTH_ERR BIT(7) -#define RX_FCS_ERR BIT(6) -#define RX_RUNT_PKT BIT(5) -#define RX_FIFO_OVERRUN BIT(4) -#define RX_LATE_COLLISION BIT(3) -#define RX_ALIGNMENT_ERROR BIT(2) -#define RX_ERROR_MASK 0xfc -#define IS_RX_ERROR(r) ((r) & RX_ERROR_MASK) - -#define NB8800_RX_FIFO_SR 0x214 -#define NB8800_RX_ITR 0x218 - -/* Sigma Designs SMP86xx additional registers */ -#define NB8800_TANGOX_PAD_MODE 0x400 -#define PAD_MODE_MASK 0x7 -#define PAD_MODE_MII 0x0 -#define PAD_MODE_RGMII 0x1 -#define PAD_MODE_GTX_CLK_INV BIT(3) -#define PAD_MODE_GTX_CLK_DELAY BIT(4) - -#define NB8800_TANGOX_MDIO_CLKDIV 0x420 -#define NB8800_TANGOX_RESET 0x424 - -/* Hardware DMA descriptor */ -struct nb8800_dma_desc { - u32 s_addr; /* start address */ - u32 n_addr; /* next descriptor address */ - u32 r_addr; /* report address */ - u32 config; -} __aligned(8); - -#define DESC_ID BIT(23) -#define DESC_EOC BIT(22) -#define DESC_EOF BIT(21) -#define DESC_LK BIT(20) -#define DESC_DS BIT(19) -#define DESC_BTS(x) (((x) & 0x7) << 16) - -/* DMA descriptor and associated data for rx. - * Allocated from coherent memory. - */ -struct nb8800_rx_desc { - /* DMA descriptor */ - struct nb8800_dma_desc desc; - - /* Status report filled in by hardware */ - u32 report; -}; - -/* Address of buffer on rx ring */ -struct nb8800_rx_buf { - struct page *page; - unsigned long offset; -}; - -/* DMA descriptors and associated data for tx. - * Allocated from coherent memory. - */ -struct nb8800_tx_desc { - /* DMA descriptor. The second descriptor is used if packet - * data is unaligned. - */ - struct nb8800_dma_desc desc[2]; - - /* Status report filled in by hardware */ - u32 report; - - /* Bounce buffer for initial unaligned part of packet */ - u8 buf[8] __aligned(8); -}; - -/* Packet in tx queue */ -struct nb8800_tx_buf { - /* Currently queued skb */ - struct sk_buff *skb; - - /* DMA address of the first descriptor */ - dma_addr_t dma_desc; - - /* DMA address of packet data */ - dma_addr_t dma_addr; - - /* Length of DMA mapping, less than skb->len if alignment - * buffer is used. - */ - unsigned int dma_len; - - /* Number of packets in chain starting here */ - unsigned int chain_len; - - /* Packet chain ready to be submitted to hardware */ - bool ready; -}; - -struct nb8800_priv { - struct napi_struct napi; - - void __iomem *base; - - /* RX DMA descriptors */ - struct nb8800_rx_desc *rx_descs; - - /* RX buffers referenced by DMA descriptors */ - struct nb8800_rx_buf *rx_bufs; - - /* Current end of chain */ - u32 rx_eoc; - - /* Value for rx interrupt time register in NAPI interrupt mode */ - u32 rx_itr_irq; - - /* Value for rx interrupt time register in NAPI poll mode */ - u32 rx_itr_poll; - - /* Value for config field of rx DMA descriptors */ - u32 rx_dma_config; - - /* TX DMA descriptors */ - struct nb8800_tx_desc *tx_descs; - - /* TX packet queue */ - struct nb8800_tx_buf *tx_bufs; - - /* Number of free tx queue entries */ - atomic_t tx_free; - - /* First free tx queue entry */ - u32 tx_next; - - /* Next buffer to transmit */ - u32 tx_queue; - - /* Start of current packet chain */ - struct nb8800_tx_buf *tx_chain; - - /* Next buffer to reclaim */ - u32 tx_done; - - /* Lock for DMA activation */ - spinlock_t tx_lock; - - struct mii_bus *mii_bus; - struct device_node *phy_node; - - /* PHY connection type from DT */ - phy_interface_t phy_mode; - - /* Current link status */ - int speed; - int duplex; - int link; - - /* Pause settings */ - bool pause_aneg; - bool pause_rx; - bool pause_tx; - - /* DMA base address of rx descriptors, see rx_descs above */ - dma_addr_t rx_desc_dma; - - /* DMA base address of tx descriptors, see tx_descs above */ - dma_addr_t tx_desc_dma; - - struct clk *clk; -}; - -struct nb8800_ops { - int (*init)(struct net_device *dev); - int (*reset)(struct net_device *dev); -}; - -#endif /* _NB8800_H_ */ -- GitLab From 43e5763152e2d4679954da0d35029637f017b0b3 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Wed, 20 Jan 2021 20:43:03 +0100 Subject: [PATCH 1224/2012] net: macb: ignore tx_clk if MII is used If the MII interface is used, the PHY is the clock master, thus don't set the clock rate. On Zynq-7000, this will prevent the following warning: macb e000b000.ethernet eth0: unable to generate target frequency: 25000000 Hz Signed-off-by: Michael Walle Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210120194303.28268-1-michael@walle.cc Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 814a5b10141d1..472bf8f220bc6 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -470,6 +470,10 @@ static void macb_set_tx_clk(struct macb *bp, int speed) if (!bp->tx_clk || (bp->caps & MACB_CAPS_CLK_HW_CHG)) return; + /* In case of MII the PHY is the clock master */ + if (bp->phy_interface == PHY_INTERFACE_MODE_MII) + return; + switch (speed) { case SPEED_10: rate = 2500000; -- GitLab From 4f6543f28bb05433d87b6de6c21e9c14c35ecf33 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 20 Jan 2021 16:40:45 +1100 Subject: [PATCH 1225/2012] crypto: marvel/cesa - Fix tdma descriptor on 64-bit The patch that added src_dma/dst_dma to struct mv_cesa_tdma_desc is broken on 64-bit systems as the size of the descriptor has been changed. This patch fixes it by using u32 instead of dma_addr_t. Fixes: e62291c1d9f4 ("crypto: marvell/cesa - Fix sparse warnings") Cc: Reported-by: Sven Auhagen Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa/cesa.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/marvell/cesa/cesa.h b/drivers/crypto/marvell/cesa/cesa.h index fabfaaccca872..fa56b45620c79 100644 --- a/drivers/crypto/marvell/cesa/cesa.h +++ b/drivers/crypto/marvell/cesa/cesa.h @@ -300,11 +300,11 @@ struct mv_cesa_tdma_desc { __le32 byte_cnt; union { __le32 src; - dma_addr_t src_dma; + u32 src_dma; }; union { __le32 dst; - dma_addr_t dst_dma; + u32 dst_dma; }; __le32 next_dma; -- GitLab From 86fdf1fc60e95ef4721de1b5f802f6a44db848c1 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 20 Jan 2021 15:26:03 -0600 Subject: [PATCH 1226/2012] net: ipa: remove a remoteproc dependency The IPA driver currently requires a DT property to be defined whose value is the phandle for the modem subsystem. This was needed to look up a remoteproc structure pointer used when registering for notifications in the original IPA notification mechanism. Remoteproc provides a more generic SSR notifier system, and the IPA driver switched over to it last summer, but this remoteproc phandle dependency was not removed at that time. Get rid of the IPA remoteproc pointer and stop requiring the phandle be specified. This avoids a link error (rproc_put() not defined) for certain configurations. Reported-by: Randy Dunlap Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa.h | 2 -- drivers/net/ipa/ipa_main.c | 38 ++------------------------------------ 2 files changed, 2 insertions(+), 38 deletions(-) diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h index 6c2371084c55a..c6c6a7f6909c1 100644 --- a/drivers/net/ipa/ipa.h +++ b/drivers/net/ipa/ipa.h @@ -43,7 +43,6 @@ enum ipa_flag { * @flags: Boolean state flags * @version: IPA hardware version * @pdev: Platform device - * @modem_rproc: Remoteproc handle for modem subsystem * @smp2p: SMP2P information * @clock: IPA clocking information * @table_addr: DMA address of filter/route table content @@ -83,7 +82,6 @@ struct ipa { DECLARE_BITMAP(flags, IPA_FLAG_COUNT); enum ipa_version version; struct platform_device *pdev; - struct rproc *modem_rproc; struct notifier_block nb; void *notifier; struct ipa_smp2p *smp2p; diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index 84bb8ae927252..ab0fd5cb49277 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -729,19 +728,6 @@ static const struct of_device_id ipa_match[] = { }; MODULE_DEVICE_TABLE(of, ipa_match); -static phandle of_property_read_phandle(const struct device_node *np, - const char *name) -{ - struct property *prop; - int len = 0; - - prop = of_find_property(np, name, &len); - if (!prop || len != sizeof(__be32)) - return 0; - - return be32_to_cpup(prop->value); -} - /* Check things that can be validated at build time. This just * groups these things BUILD_BUG_ON() calls don't clutter the rest * of the code. @@ -807,10 +793,8 @@ static int ipa_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; const struct ipa_data *data; struct ipa_clock *clock; - struct rproc *rproc; bool modem_init; struct ipa *ipa; - phandle ph; int ret; ipa_validate_build(); @@ -829,25 +813,12 @@ static int ipa_probe(struct platform_device *pdev) if (!qcom_scm_is_available()) return -EPROBE_DEFER; - /* We rely on remoteproc to tell us about modem state changes */ - ph = of_property_read_phandle(dev->of_node, "modem-remoteproc"); - if (!ph) { - dev_err(dev, "DT missing \"modem-remoteproc\" property\n"); - return -EINVAL; - } - - rproc = rproc_get_by_phandle(ph); - if (!rproc) - return -EPROBE_DEFER; - /* The clock and interconnects might not be ready when we're * probed, so might return -EPROBE_DEFER. */ clock = ipa_clock_init(dev, data->clock_data); - if (IS_ERR(clock)) { - ret = PTR_ERR(clock); - goto err_rproc_put; - } + if (IS_ERR(clock)) + return PTR_ERR(clock); /* No more EPROBE_DEFER. Allocate and initialize the IPA structure */ ipa = kzalloc(sizeof(*ipa), GFP_KERNEL); @@ -858,7 +829,6 @@ static int ipa_probe(struct platform_device *pdev) ipa->pdev = pdev; dev_set_drvdata(dev, ipa); - ipa->modem_rproc = rproc; ipa->clock = clock; ipa->version = data->version; @@ -935,8 +905,6 @@ err_kfree_ipa: kfree(ipa); err_clock_exit: ipa_clock_exit(clock); -err_rproc_put: - rproc_put(rproc); return ret; } @@ -944,7 +912,6 @@ err_rproc_put: static int ipa_remove(struct platform_device *pdev) { struct ipa *ipa = dev_get_drvdata(&pdev->dev); - struct rproc *rproc = ipa->modem_rproc; struct ipa_clock *clock = ipa->clock; int ret; @@ -970,7 +937,6 @@ static int ipa_remove(struct platform_device *pdev) ipa_reg_exit(ipa); kfree(ipa); ipa_clock_exit(clock); - rproc_put(rproc); return 0; } -- GitLab From 27bb36ed7775683a957f57c1c368d319c85f2e4f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 20 Jan 2021 15:26:04 -0600 Subject: [PATCH 1227/2012] dt-bindings: net: remove modem-remoteproc property The IPA driver uses the remoteproc SSR notifier now, rather than the temporary IPA notification system used initially. As a result it no longer needs a property identifying the modem subsystem DT node. Use GIC_SPI rather than 0 in the example interrupt definition. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/qcom,ipa.yaml | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index 8a2d12644675b..8f86084bf12e9 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -113,13 +113,6 @@ properties: performing early IPA initialization, including loading and validating firwmare used by the GSI. - modem-remoteproc: - $ref: /schemas/types.yaml#/definitions/phandle - description: - This defines the phandle to the remoteproc node representing - the modem subsystem. This is requied so the IPA driver can - receive and act on notifications of modem up/down events. - memory-region: maxItems: 1 description: @@ -135,7 +128,6 @@ required: - interrupts - interconnects - qcom,smem-states - - modem-remoteproc oneOf: - required: @@ -147,7 +139,7 @@ additionalProperties: false examples: - | - #include + #include #include #include @@ -168,7 +160,6 @@ examples: compatible = "qcom,sdm845-ipa"; modem-init; - modem-remoteproc = <&mss_pil>; iommus = <&apps_smmu 0x720 0x3>; reg = <0x1e40000 0x7000>, @@ -178,8 +169,8 @@ examples: "ipa-shared", "gsi"; - interrupts-extended = <&intc 0 311 IRQ_TYPE_EDGE_RISING>, - <&intc 0 432 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 311 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 432 IRQ_TYPE_LEVEL_HIGH>, <&ipa_smp2p_in 0 IRQ_TYPE_EDGE_RISING>, <&ipa_smp2p_in 1 IRQ_TYPE_EDGE_RISING>; interrupt-names = "ipa", -- GitLab From 8535c8e300104069873ab00d8f2fc43bf010e653 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 20 Jan 2021 15:26:05 -0600 Subject: [PATCH 1228/2012] arm64: dts: qcom: sc7180: kill IPA modem-remoteproc property The "modem-remoteproc" property is no longer required for the IPA driver, so get rid of it. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- arch/arm64/boot/dts/qcom/sc7180.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index 22b832fc62e3d..003309f0d3e18 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -1434,8 +1434,6 @@ qcom,smem-state-names = "ipa-clock-enabled-valid", "ipa-clock-enabled"; - modem-remoteproc = <&remoteproc_mpss>; - status = "disabled"; }; -- GitLab From 5da1fca9eb73deee5fb738d768dc984280514252 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 20 Jan 2021 15:26:06 -0600 Subject: [PATCH 1229/2012] arm64: dts: qcom: sdm845: kill IPA modem-remoteproc property The "modem-remoteproc" property is no longer required for the IPA driver, so get rid of it. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- arch/arm64/boot/dts/qcom/sdm845.dtsi | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index bcf888381f144..04b2490eec9f4 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -2366,8 +2366,6 @@ qcom,smem-state-names = "ipa-clock-enabled-valid", "ipa-clock-enabled"; - modem-remoteproc = <&mss_pil>; - status = "disabled"; }; -- GitLab From 7f7aa94bcaf03d0f18a6853d8f7dad6a4d25bbd6 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Fri, 25 Dec 2020 11:39:14 +0800 Subject: [PATCH 1230/2012] mac80211: reduce peer HE MCS/NSS to own capabilities For VHT capbility, we do intersection of MCS and NSS for peers in mac80211, to simplify drivers. Add this for HE as well. Signed-off-by: Wen Gong Link: https://lore.kernel.org/r/1609816120-9411-3-git-send-email-wgong@codeaurora.org [reword commit message, style cleanups, fix endian annotations] Signed-off-by: Johannes Berg --- net/mac80211/he.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/net/mac80211/he.c b/net/mac80211/he.c index cc26f239838ba..0c0b970835ceb 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -52,6 +52,57 @@ ieee80211_update_from_he_6ghz_capa(const struct ieee80211_he_6ghz_capa *he_6ghz_ sta->sta.he_6ghz_capa = *he_6ghz_capa; } +static void ieee80211_he_mcs_disable(__le16 *he_mcs) +{ + u32 i; + + for (i = 0; i < 8; i++) + *he_mcs |= cpu_to_le16(IEEE80211_HE_MCS_NOT_SUPPORTED << i * 2); +} + +static void ieee80211_he_mcs_intersection(__le16 *he_own_rx, __le16 *he_peer_rx, + __le16 *he_own_tx, __le16 *he_peer_tx) +{ + u32 i; + u16 own_rx, own_tx, peer_rx, peer_tx; + + for (i = 0; i < 8; i++) { + own_rx = le16_to_cpu(*he_own_rx); + own_rx = (own_rx >> i * 2) & IEEE80211_HE_MCS_NOT_SUPPORTED; + + own_tx = le16_to_cpu(*he_own_tx); + own_tx = (own_tx >> i * 2) & IEEE80211_HE_MCS_NOT_SUPPORTED; + + peer_rx = le16_to_cpu(*he_peer_rx); + peer_rx = (peer_rx >> i * 2) & IEEE80211_HE_MCS_NOT_SUPPORTED; + + peer_tx = le16_to_cpu(*he_peer_tx); + peer_tx = (peer_tx >> i * 2) & IEEE80211_HE_MCS_NOT_SUPPORTED; + + if (peer_tx != IEEE80211_HE_MCS_NOT_SUPPORTED) { + if (own_rx == IEEE80211_HE_MCS_NOT_SUPPORTED) + peer_tx = IEEE80211_HE_MCS_NOT_SUPPORTED; + else if (own_rx < peer_tx) + peer_tx = own_rx; + } + + if (peer_rx != IEEE80211_HE_MCS_NOT_SUPPORTED) { + if (own_tx == IEEE80211_HE_MCS_NOT_SUPPORTED) + peer_rx = IEEE80211_HE_MCS_NOT_SUPPORTED; + else if (own_tx < peer_rx) + peer_rx = own_tx; + } + + *he_peer_rx &= + ~cpu_to_le16(IEEE80211_HE_MCS_NOT_SUPPORTED << i * 2); + *he_peer_rx |= cpu_to_le16(peer_rx << i * 2); + + *he_peer_tx &= + ~cpu_to_le16(IEEE80211_HE_MCS_NOT_SUPPORTED << i * 2); + *he_peer_tx |= cpu_to_le16(peer_tx << i * 2); + } +} + void ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, @@ -60,10 +111,12 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap; + struct ieee80211_sta_he_cap own_he_cap = sband->iftype_data->he_cap; struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie; u8 he_ppe_size; u8 mcs_nss_size; u8 he_total_size; + bool own_160, peer_160, own_80p80, peer_80p80; memset(he_cap, 0, sizeof(*he_cap)); @@ -101,6 +154,45 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, if (sband->band == NL80211_BAND_6GHZ && he_6ghz_capa) ieee80211_update_from_he_6ghz_capa(he_6ghz_capa, sta); + + ieee80211_he_mcs_intersection(&own_he_cap.he_mcs_nss_supp.rx_mcs_80, + &he_cap->he_mcs_nss_supp.rx_mcs_80, + &own_he_cap.he_mcs_nss_supp.tx_mcs_80, + &he_cap->he_mcs_nss_supp.tx_mcs_80); + + own_160 = own_he_cap.he_cap_elem.phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; + peer_160 = he_cap->he_cap_elem.phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; + + if (peer_160 && own_160) { + ieee80211_he_mcs_intersection(&own_he_cap.he_mcs_nss_supp.rx_mcs_160, + &he_cap->he_mcs_nss_supp.rx_mcs_160, + &own_he_cap.he_mcs_nss_supp.tx_mcs_160, + &he_cap->he_mcs_nss_supp.tx_mcs_160); + } else if (peer_160 && !own_160) { + ieee80211_he_mcs_disable(&he_cap->he_mcs_nss_supp.rx_mcs_160); + ieee80211_he_mcs_disable(&he_cap->he_mcs_nss_supp.tx_mcs_160); + he_cap->he_cap_elem.phy_cap_info[0] &= + ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; + } + + own_80p80 = own_he_cap.he_cap_elem.phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G; + peer_80p80 = he_cap->he_cap_elem.phy_cap_info[0] & + IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G; + + if (peer_80p80 && own_80p80) { + ieee80211_he_mcs_intersection(&own_he_cap.he_mcs_nss_supp.rx_mcs_80p80, + &he_cap->he_mcs_nss_supp.rx_mcs_80p80, + &own_he_cap.he_mcs_nss_supp.tx_mcs_80p80, + &he_cap->he_mcs_nss_supp.tx_mcs_80p80); + } else if (peer_80p80 && !own_80p80) { + ieee80211_he_mcs_disable(&he_cap->he_mcs_nss_supp.rx_mcs_80p80); + ieee80211_he_mcs_disable(&he_cap->he_mcs_nss_supp.tx_mcs_80p80); + he_cap->he_cap_elem.phy_cap_info[0] &= + ~IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G; + } } void -- GitLab From c27aa56a72b8ea6d3bef6fcb1be1a85cf78b0673 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Tue, 5 Jan 2021 11:58:39 +0100 Subject: [PATCH 1231/2012] cfg80211: add VHT rate entries for MCS-10 and MCS-11 Observed the warning in cfg80211_calculate_bitrate_vht() using an 11ac chip reporting MCS-11. Since devices reporting non-standard MCS-9 is already supported add similar entries for MCS-10 and MCS-11. Actually, the value of MCS-9@20MHz is slightly off so corrected that. Signed-off-by: Arend van Spriel Link: https://lore.kernel.org/r/20210105105839.3795-1-arend.vanspriel@broadcom.com [fix array size] Signed-off-by: Johannes Berg --- net/wireless/util.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index b4acc805114b6..c22ada0a36fa5 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1224,7 +1224,7 @@ static u32 cfg80211_calculate_bitrate_edmg(struct rate_info *rate) static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) { - static const u32 base[4][10] = { + static const u32 base[4][12] = { { 6500000, 13000000, 19500000, @@ -1235,7 +1235,9 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) 65000000, 78000000, /* not in the spec, but some devices use this: */ - 86500000, + 86700000, + 97500000, + 108300000, }, { 13500000, 27000000, @@ -1247,6 +1249,8 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) 135000000, 162000000, 180000000, + 202500000, + 225000000, }, { 29300000, 58500000, @@ -1258,6 +1262,8 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) 292500000, 351000000, 390000000, + 438800000, + 487500000, }, { 58500000, 117000000, @@ -1269,12 +1275,14 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) 585000000, 702000000, 780000000, + 877500000, + 975000000, }, }; u32 bitrate; int idx; - if (rate->mcs > 9) + if (rate->mcs > 11) goto warn; switch (rate->bw) { -- GitLab From d9c85e24726587277ce0dcf33b5695acfcc72234 Mon Sep 17 00:00:00 2001 From: Max Chen Date: Wed, 6 Jan 2021 15:50:49 -0800 Subject: [PATCH 1232/2012] cfg80211: Add phyrate conversion support for extended MCS in 60GHz band The current phyrate conversion does not include extended MCS and provides incorrect rates. Add a flag for extended MCS in DMG and add corresponding phyrate table for the correct conversions using base MCS in DMG specs. Signed-off-by: Max Chen Link: https://lore.kernel.org/r/1609977050-7089-2-git-send-email-mxchen@codeaurora.org [reduce data size, make a single WARN] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ net/wireless/util.c | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0d6f7ec860615..798f8eb15e00c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1460,6 +1460,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy, * @RATE_INFO_FLAGS_DMG: 60GHz MCS * @RATE_INFO_FLAGS_HE_MCS: HE MCS information * @RATE_INFO_FLAGS_EDMG: 60GHz MCS in EDMG mode + * @RATE_INFO_FLAGS_EXTENDED_SC_DMG: 60GHz extended SC MCS */ enum rate_info_flags { RATE_INFO_FLAGS_MCS = BIT(0), @@ -1468,6 +1469,7 @@ enum rate_info_flags { RATE_INFO_FLAGS_DMG = BIT(3), RATE_INFO_FLAGS_HE_MCS = BIT(4), RATE_INFO_FLAGS_EDMG = BIT(5), + RATE_INFO_FLAGS_EXTENDED_SC_DMG = BIT(6), }; /** diff --git a/net/wireless/util.c b/net/wireless/util.c index c22ada0a36fa5..eab928002cd80 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1188,6 +1188,25 @@ static u32 cfg80211_calculate_bitrate_dmg(struct rate_info *rate) return __mcs2bitrate[rate->mcs]; } +static u32 cfg80211_calculate_bitrate_extended_sc_dmg(struct rate_info *rate) +{ + static const u32 __mcs2bitrate[] = { + [6 - 6] = 26950, /* MCS 9.1 : 2695.0 mbps */ + [7 - 6] = 50050, /* MCS 12.1 */ + [8 - 6] = 53900, + [9 - 6] = 57750, + [10 - 6] = 63900, + [11 - 6] = 75075, + [12 - 6] = 80850, + }; + + /* Extended SC MCS not defined for base MCS below 6 or above 12 */ + if (WARN_ON_ONCE(rate->mcs < 6 || rate->mcs > 12)) + return 0; + + return __mcs2bitrate[rate->mcs - 6]; +} + static u32 cfg80211_calculate_bitrate_edmg(struct rate_info *rate) { static const u32 __mcs2bitrate[] = { @@ -1406,6 +1425,8 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate) return cfg80211_calculate_bitrate_ht(rate); if (rate->flags & RATE_INFO_FLAGS_DMG) return cfg80211_calculate_bitrate_dmg(rate); + if (rate->flags & RATE_INFO_FLAGS_EXTENDED_SC_DMG) + return cfg80211_calculate_bitrate_extended_sc_dmg(rate); if (rate->flags & RATE_INFO_FLAGS_EDMG) return cfg80211_calculate_bitrate_edmg(rate); if (rate->flags & RATE_INFO_FLAGS_VHT_MCS) -- GitLab From e908435e402aff23c9b0b3c59c7cd12b08b681b0 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sat, 9 Jan 2021 18:57:51 +0100 Subject: [PATCH 1233/2012] mac80211: introduce aql_enable node in debugfs Introduce aql_enable node in debugfs in order to enable/disable aql. This is useful for debugging purpose. Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/e7a934d5d84e4796c4f97ea5de4e66c824296b07.1610214851.git.lorenzo@kernel.org Signed-off-by: Johannes Berg --- net/mac80211/debugfs.c | 51 ++++++++++++++++++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/tx.c | 5 ++++ 3 files changed, 58 insertions(+) diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 08a6f6644dc4e..5296898875ffb 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -281,6 +281,56 @@ static const struct file_operations aql_txq_limit_ops = { .llseek = default_llseek, }; +static ssize_t aql_enable_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + char buf[3]; + int len; + + len = scnprintf(buf, sizeof(buf), "%d\n", + !static_key_false(&aql_disable.key)); + + return simple_read_from_buffer(user_buf, count, ppos, buf, len); +} + +static ssize_t aql_enable_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + bool aql_disabled = static_key_false(&aql_disable.key); + char buf[3]; + size_t len; + + if (count > sizeof(buf)) + return -EINVAL; + + if (copy_from_user(buf, user_buf, count)) + return -EFAULT; + + buf[sizeof(buf) - 1] = '\0'; + len = strlen(buf); + if (len > 0 && buf[len - 1] == '\n') + buf[len - 1] = 0; + + if (buf[0] == '0' && buf[1] == '\0') { + if (!aql_disabled) + static_branch_inc(&aql_disable); + } else if (buf[0] == '1' && buf[1] == '\0') { + if (aql_disabled) + static_branch_dec(&aql_disable); + } else { + return -EINVAL; + } + + return count; +} + +static const struct file_operations aql_enable_ops = { + .write = aql_enable_write, + .read = aql_enable_read, + .open = simple_open, + .llseek = default_llseek, +}; + static ssize_t force_tx_status_read(struct file *file, char __user *user_buf, size_t count, @@ -569,6 +619,7 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(power); DEBUGFS_ADD(hw_conf); DEBUGFS_ADD_MODE(force_tx_status, 0600); + DEBUGFS_ADD_MODE(aql_enable, 0600); if (local->ops->wake_tx_queue) DEBUGFS_ADD_MODE(aqm, 0600); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c0f6168fdeedd..982fdc12abd99 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1142,6 +1142,8 @@ enum mac80211_scan_state { SCAN_ABORT, }; +DECLARE_STATIC_KEY_FALSE(aql_disable); + struct ieee80211_local { /* embed the driver visible part. * don't cast (use the static inlines below), but we keep diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 45536185d8d7f..d626e6808bef1 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3811,6 +3811,8 @@ void __ieee80211_schedule_txq(struct ieee80211_hw *hw, } EXPORT_SYMBOL(__ieee80211_schedule_txq); +DEFINE_STATIC_KEY_FALSE(aql_disable); + bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw, struct ieee80211_txq *txq) { @@ -3820,6 +3822,9 @@ bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw, if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) return true; + if (static_branch_unlikely(&aql_disable)) + return true; + if (!txq->sta) return true; -- GitLab From f84de063985a6f8e3adb0c0b409ca51452b4def0 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 15 Jan 2021 13:02:34 +0100 Subject: [PATCH 1234/2012] mac80211: minstrel_ht: clean up CCK code - move ack overhead out of rate duration table - remove cck_supported, cck_supported_short Preparation for adding OFDM legacy rates support Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210115120242.89616-2-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 54 +++++++++++++++++------------- net/mac80211/rc80211_minstrel_ht.h | 5 ++- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index b11a2af55b06a..167d4fa5adf16 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -136,20 +136,16 @@ __VHT_GROUP(_streams, _sgi, _bw, \ VHT_GROUP_SHIFT(_streams, _sgi, _bw)) -#define CCK_DURATION(_bitrate, _short, _len) \ +#define CCK_DURATION(_bitrate, _short) \ (1000 * (10 /* SIFS */ + \ (_short ? 72 + 24 : 144 + 48) + \ - (8 * (_len + 4) * 10) / (_bitrate))) - -#define CCK_ACK_DURATION(_bitrate, _short) \ - (CCK_DURATION((_bitrate > 10 ? 20 : 10), false, 60) + \ - CCK_DURATION(_bitrate, _short, AVG_PKT_SIZE)) + (8 * (AVG_PKT_SIZE + 4) * 10) / (_bitrate))) #define CCK_DURATION_LIST(_short, _s) \ - CCK_ACK_DURATION(10, _short) >> _s, \ - CCK_ACK_DURATION(20, _short) >> _s, \ - CCK_ACK_DURATION(55, _short) >> _s, \ - CCK_ACK_DURATION(110, _short) >> _s + CCK_DURATION(10, _short) >> _s, \ + CCK_DURATION(20, _short) >> _s, \ + CCK_DURATION(55, _short) >> _s, \ + CCK_DURATION(110, _short) >> _s #define __CCK_GROUP(_s) \ [MINSTREL_CCK_GROUP] = { \ @@ -163,7 +159,7 @@ } #define CCK_GROUP_SHIFT \ - GROUP_SHIFT(CCK_ACK_DURATION(10, false)) + GROUP_SHIFT(CCK_DURATION(10, false)) #define CCK_GROUP __CCK_GROUP(CCK_GROUP_SHIFT) @@ -349,15 +345,19 @@ int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, int prob_avg) { - unsigned int nsecs = 0; + unsigned int nsecs = 0, overhead = mi->overhead; + unsigned int ampdu_len = 1; /* do not account throughput if sucess prob is below 10% */ if (prob_avg < MINSTREL_FRAC(10, 100)) return 0; - if (group != MINSTREL_CCK_GROUP) - nsecs = 1000 * mi->overhead / minstrel_ht_avg_ampdu_len(mi); + if (group == MINSTREL_CCK_GROUP) + overhead = mi->overhead_legacy; + else + ampdu_len = minstrel_ht_avg_ampdu_len(mi); + nsecs = 1000 * overhead / ampdu_len; nsecs += minstrel_mcs_groups[group].duration[rate] << minstrel_mcs_groups[group].shift; @@ -1031,7 +1031,10 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, ctime += (t_slot * cw) >> 1; cw = min((cw << 1) | 1, mp->cw_max); - if (index / MCS_GROUP_RATES != MINSTREL_CCK_GROUP) { + if (index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) { + overhead = mi->overhead_legacy; + overhead_rtscts = mi->overhead_legacy_rtscts; + } else { overhead = mi->overhead; overhead_rtscts = mi->overhead_rtscts; } @@ -1369,18 +1372,14 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (!ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES)) return; - mi->cck_supported = 0; - mi->cck_supported_short = 0; for (i = 0; i < 4; i++) { if (!rate_supported(sta, sband->band, mp->cck_rates[i])) continue; - mi->cck_supported |= BIT(i); + mi->supported[MINSTREL_CCK_GROUP] |= BIT(i); if (sband->bitrates[i].flags & IEEE80211_RATE_SHORT_PREAMBLE) - mi->cck_supported_short |= BIT(i); + mi->supported[MINSTREL_CCK_GROUP] |= BIT(i + 4); } - - mi->supported[MINSTREL_CCK_GROUP] = mi->cck_supported; } static void @@ -1394,12 +1393,13 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs; u16 ht_cap = sta->ht_cap.cap; struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap; + const struct ieee80211_rate *ctl_rate; + bool ldpc, erp; int use_vht; int n_supported = 0; int ack_dur; int stbc; int i; - bool ldpc; /* fall back to the old minstrel for legacy stations */ if (!sta->ht_cap.ht_supported) @@ -1423,6 +1423,14 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, mi->overhead += ack_dur; mi->overhead_rtscts = mi->overhead + 2 * ack_dur; + ctl_rate = &sband->bitrates[rate_lowest_index(sband, sta)]; + erp = ctl_rate->flags & IEEE80211_RATE_ERP_G; + ack_dur = ieee80211_frame_duration(sband->band, 10, + ctl_rate->bitrate, erp, 1, + ieee80211_chandef_get_shift(chandef)); + mi->overhead_legacy = ack_dur; + mi->overhead_legacy_rtscts = mi->overhead_legacy + 2 * ack_dur; + mi->avg_ampdu_len = MINSTREL_FRAC(1, 1); /* When using MRR, sample more on the first attempt, without delay */ @@ -1523,8 +1531,6 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, if (!n_supported) goto use_legacy; - mi->supported[MINSTREL_CCK_GROUP] |= mi->cck_supported_short << 4; - /* create an initial rate table with the lowest supported rates */ minstrel_ht_update_stats(mp, mi, true); minstrel_ht_update_rates(mp, mi); diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index 53ea3c29debfd..11300fa48a2fe 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -77,6 +77,8 @@ struct minstrel_ht_sta { /* overhead time in usec for each frame */ unsigned int overhead; unsigned int overhead_rtscts; + unsigned int overhead_legacy; + unsigned int overhead_legacy_rtscts; unsigned int total_packets_last; unsigned int total_packets_cur; @@ -97,9 +99,6 @@ struct minstrel_ht_sta { /* current MCS group to be sampled */ u8 sample_group; - u8 cck_supported; - u8 cck_supported_short; - /* Bitfield of supported MCS rates of all groups */ u16 supported[MINSTREL_GROUPS_NB]; -- GitLab From a7844a53846017c34804b0a22bbda855cb08dd7c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 15 Jan 2021 13:02:35 +0100 Subject: [PATCH 1235/2012] mac80211: minstrel_ht: add support for OFDM rates on non-HT clients The legacy minstrel code is essentially unmaintained and receives only very little testing. In order to bring the significant algorithm improvements from minstrel_ht to legacy clients, this patch adds support for OFDM rates to minstrel_ht and removes the fallback to the legacy codepath. This also makes it work much better on hardware with rate selection constraints, e.g. mt76. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210115120242.89616-3-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel.h | 1 + net/mac80211/rc80211_minstrel_ht.c | 283 ++++++++++++++------- net/mac80211/rc80211_minstrel_ht.h | 17 +- net/mac80211/rc80211_minstrel_ht_debugfs.c | 39 ++- 4 files changed, 224 insertions(+), 116 deletions(-) diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index 86cd80b3ffdef..1b6d859a81e2f 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -152,6 +152,7 @@ struct minstrel_priv { unsigned int lookaround_rate_mrr; u8 cck_rates[4]; + u8 ofdm_rates[NUM_NL80211_BANDS][8]; #ifdef CONFIG_MAC80211_DEBUGFS /* diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 167d4fa5adf16..247557bff26c1 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -163,6 +163,38 @@ #define CCK_GROUP __CCK_GROUP(CCK_GROUP_SHIFT) +#define OFDM_DURATION(_bitrate) \ + (1000 * (16 /* SIFS + signal ext */ + \ + 16 /* T_PREAMBLE */ + \ + 4 /* T_SIGNAL */ + \ + 4 * (((16 + 80 * (AVG_PKT_SIZE + 4) + 6) / \ + ((_bitrate) * 4))))) + +#define OFDM_DURATION_LIST(_s) \ + OFDM_DURATION(60) >> _s, \ + OFDM_DURATION(90) >> _s, \ + OFDM_DURATION(120) >> _s, \ + OFDM_DURATION(180) >> _s, \ + OFDM_DURATION(240) >> _s, \ + OFDM_DURATION(360) >> _s, \ + OFDM_DURATION(480) >> _s, \ + OFDM_DURATION(540) >> _s + +#define __OFDM_GROUP(_s) \ + [MINSTREL_OFDM_GROUP] = { \ + .streams = 1, \ + .flags = 0, \ + .shift = _s, \ + .duration = { \ + OFDM_DURATION_LIST(_s), \ + } \ + } + +#define OFDM_GROUP_SHIFT \ + GROUP_SHIFT(OFDM_DURATION(60)) + +#define OFDM_GROUP __OFDM_GROUP(OFDM_GROUP_SHIFT) + static bool minstrel_vht_only = true; module_param(minstrel_vht_only, bool, 0644); @@ -199,6 +231,7 @@ const struct mcs_group minstrel_mcs_groups[] = { MCS_GROUP(4, 1, BW_40), CCK_GROUP, + OFDM_GROUP, VHT_GROUP(1, 0, BW_20), VHT_GROUP(2, 0, BW_20), @@ -231,6 +264,8 @@ const struct mcs_group minstrel_mcs_groups[] = { VHT_GROUP(4, 1, BW_80), }; +const s16 minstrel_cck_bitrates[4] = { 10, 20, 55, 110 }; +const s16 minstrel_ofdm_bitrates[8] = { 60, 90, 120, 180, 240, 360, 480, 540 }; static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES] __read_mostly; static void @@ -275,6 +310,13 @@ minstrel_get_valid_vht_rates(int bw, int nss, __le16 mcs_map) return 0x3ff & ~mask; } +static bool +minstrel_ht_is_legacy_group(int group) +{ + return group == MINSTREL_CCK_GROUP || + group == MINSTREL_OFDM_GROUP; +} + /* * Look up an MCS group index based on mac80211 rate information */ @@ -304,21 +346,34 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (rate->flags & IEEE80211_TX_RC_MCS) { group = minstrel_ht_get_group_idx(rate); idx = rate->idx % 8; - } else if (rate->flags & IEEE80211_TX_RC_VHT_MCS) { + goto out; + } + + if (rate->flags & IEEE80211_TX_RC_VHT_MCS) { group = minstrel_vht_get_group_idx(rate); idx = ieee80211_rate_get_vht_mcs(rate); - } else { - group = MINSTREL_CCK_GROUP; + goto out; + } - for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) - if (rate->idx == mp->cck_rates[idx]) - break; + group = MINSTREL_CCK_GROUP; + for (idx = 0; idx < ARRAY_SIZE(mp->cck_rates); idx++) { + if (rate->idx != mp->cck_rates[idx]) + continue; /* short preamble */ if ((mi->supported[group] & BIT(idx + 4)) && (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)) - idx += 4; + idx += 4; + goto out; } + + group = MINSTREL_OFDM_GROUP; + for (idx = 0; idx < ARRAY_SIZE(mp->ofdm_rates[0]); idx++) + if (rate->idx == mp->ofdm_rates[mi->band][idx]) + goto out; + + idx = 0; +out: return &mi->groups[group].rates[idx]; } @@ -352,7 +407,7 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, if (prob_avg < MINSTREL_FRAC(10, 100)) return 0; - if (group == MINSTREL_CCK_GROUP) + if (minstrel_ht_is_legacy_group(group)) overhead = mi->overhead_legacy; else ampdu_len = minstrel_ht_avg_ampdu_len(mi); @@ -439,8 +494,8 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index) /* if max_tp_rate[0] is from MCS_GROUP max_prob_rate get selected from * MCS_GROUP as well as CCK_GROUP rates do not allow aggregation */ max_tp_group = mi->max_tp_rate[0] / MCS_GROUP_RATES; - if((index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) && - (max_tp_group != MINSTREL_CCK_GROUP)) + if (minstrel_ht_is_legacy_group(index / MCS_GROUP_RATES) && + !minstrel_ht_is_legacy_group(max_tp_group)) return; max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES; @@ -476,13 +531,13 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index) static void minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi, u16 tmp_mcs_tp_rate[MAX_THR_RATES], - u16 tmp_cck_tp_rate[MAX_THR_RATES]) + u16 tmp_legacy_tp_rate[MAX_THR_RATES]) { unsigned int tmp_group, tmp_idx, tmp_cck_tp, tmp_mcs_tp, tmp_prob; int i; - tmp_group = tmp_cck_tp_rate[0] / MCS_GROUP_RATES; - tmp_idx = tmp_cck_tp_rate[0] % MCS_GROUP_RATES; + tmp_group = tmp_legacy_tp_rate[0] / MCS_GROUP_RATES; + tmp_idx = tmp_legacy_tp_rate[0] % MCS_GROUP_RATES; tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_cck_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); @@ -493,7 +548,7 @@ minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi, if (tmp_cck_tp > tmp_mcs_tp) { for(i = 0; i < MAX_THR_RATES; i++) { - minstrel_ht_sort_best_tp_rates(mi, tmp_cck_tp_rate[i], + minstrel_ht_sort_best_tp_rates(mi, tmp_legacy_tp_rate[i], tmp_mcs_tp_rate); } } @@ -511,6 +566,9 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) int tmp_max_streams, group, tmp_idx, tmp_prob; int tmp_tp = 0; + if (!mi->sta->ht_cap.ht_supported) + return; + tmp_max_streams = minstrel_mcs_groups[mi->max_tp_rate[0] / MCS_GROUP_RATES].streams; for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { @@ -675,7 +733,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct minstrel_rate_stats *mrs; int group, i, j, cur_prob; u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES]; - u16 tmp_cck_tp_rate[MAX_THR_RATES], index; + u16 tmp_legacy_tp_rate[MAX_THR_RATES], index; + bool ht_supported = mi->sta->ht_cap.ht_supported; mi->sample_mode = MINSTREL_SAMPLE_IDLE; @@ -704,21 +763,29 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, mi->sample_count = 0; memset(tmp_mcs_tp_rate, 0, sizeof(tmp_mcs_tp_rate)); - memset(tmp_cck_tp_rate, 0, sizeof(tmp_cck_tp_rate)); + memset(tmp_legacy_tp_rate, 0, sizeof(tmp_legacy_tp_rate)); if (mi->supported[MINSTREL_CCK_GROUP]) - for (j = 0; j < ARRAY_SIZE(tmp_cck_tp_rate); j++) - tmp_cck_tp_rate[j] = MINSTREL_CCK_GROUP * MCS_GROUP_RATES; + for (j = 0; j < ARRAY_SIZE(tmp_legacy_tp_rate); j++) + tmp_legacy_tp_rate[j] = MINSTREL_CCK_GROUP * MCS_GROUP_RATES; + else if (mi->supported[MINSTREL_OFDM_GROUP]) + for (j = 0; j < ARRAY_SIZE(tmp_legacy_tp_rate); j++) + tmp_legacy_tp_rate[j] = MINSTREL_OFDM_GROUP * MCS_GROUP_RATES; if (mi->supported[MINSTREL_VHT_GROUP_0]) index = MINSTREL_VHT_GROUP_0 * MCS_GROUP_RATES; - else + else if (ht_supported) index = MINSTREL_HT_GROUP_0 * MCS_GROUP_RATES; + else if (mi->supported[MINSTREL_CCK_GROUP]) + index = MINSTREL_CCK_GROUP * MCS_GROUP_RATES; + else + index = MINSTREL_OFDM_GROUP * MCS_GROUP_RATES; for (j = 0; j < ARRAY_SIZE(tmp_mcs_tp_rate); j++) tmp_mcs_tp_rate[j] = index; /* Find best rate sets within all MCS groups*/ for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { + u16 *tp_rate = tmp_mcs_tp_rate; mg = &mi->groups[group]; if (!mi->supported[group]) @@ -730,6 +797,9 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, for(j = 0; j < MAX_THR_RATES; j++) tmp_group_tp_rate[j] = MCS_GROUP_RATES * group; + if (group == MINSTREL_CCK_GROUP && ht_supported) + tp_rate = tmp_legacy_tp_rate; + for (i = 0; i < MCS_GROUP_RATES; i++) { if (!(mi->supported[group] & BIT(i))) continue; @@ -745,13 +815,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, continue; /* Find max throughput rate set */ - if (group != MINSTREL_CCK_GROUP) { - minstrel_ht_sort_best_tp_rates(mi, index, - tmp_mcs_tp_rate); - } else if (group == MINSTREL_CCK_GROUP) { - minstrel_ht_sort_best_tp_rates(mi, index, - tmp_cck_tp_rate); - } + minstrel_ht_sort_best_tp_rates(mi, index, tp_rate); /* Find max throughput rate set within a group */ minstrel_ht_sort_best_tp_rates(mi, index, @@ -766,7 +830,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, } /* Assign new rate set per sta */ - minstrel_ht_assign_best_tp_rates(mi, tmp_mcs_tp_rate, tmp_cck_tp_rate); + minstrel_ht_assign_best_tp_rates(mi, tmp_mcs_tp_rate, + tmp_legacy_tp_rate); memcpy(mi->max_tp_rate, tmp_mcs_tp_rate, sizeof(mi->max_tp_rate)); /* Try to increase robustness of max_prob_rate*/ @@ -795,8 +860,11 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, } static bool -minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct ieee80211_tx_rate *rate) +minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + struct ieee80211_tx_rate *rate) { + int i; + if (rate->idx < 0) return false; @@ -807,10 +875,15 @@ minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct ieee80211_tx_rate *rat rate->flags & IEEE80211_TX_RC_VHT_MCS) return true; - return rate->idx == mp->cck_rates[0] || - rate->idx == mp->cck_rates[1] || - rate->idx == mp->cck_rates[2] || - rate->idx == mp->cck_rates[3]; + for (i = 0; i < ARRAY_SIZE(mp->cck_rates); i++) + if (rate->idx == mp->cck_rates[i]) + return true; + + for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates[0]); i++) + if (rate->idx == mp->ofdm_rates[mi->band][i]) + return true; + + return false; } static void @@ -897,11 +970,6 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, bool sample_status = false; int i; - if (!msp->is_ht) - return mac80211_minstrel.tx_status_ext(priv, sband, - &msp->legacy, st); - - /* This packet was aggregated but doesn't carry status info */ if ((info->flags & IEEE80211_TX_CTL_AMPDU) && !(info->flags & IEEE80211_TX_STAT_AMPDU)) @@ -930,10 +998,10 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, if (mi->sample_mode != MINSTREL_SAMPLE_IDLE) rate_sample = minstrel_get_ratestats(mi, mi->sample_rate); - last = !minstrel_ht_txstat_valid(mp, &ar[0]); + last = !minstrel_ht_txstat_valid(mp, mi, &ar[0]); for (i = 0; !last; i++) { last = (i == IEEE80211_TX_MAX_RATES - 1) || - !minstrel_ht_txstat_valid(mp, &ar[i + 1]); + !minstrel_ht_txstat_valid(mp, mi, &ar[i + 1]); rate = minstrel_ht_get_stats(mp, mi, &ar[i]); if (rate == rate_sample) @@ -1031,7 +1099,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, ctime += (t_slot * cw) >> 1; cw = min((cw << 1) | 1, mp->cw_max); - if (index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) { + if (minstrel_ht_is_legacy_group(index / MCS_GROUP_RATES)) { overhead = mi->overhead_legacy; overhead_rtscts = mi->overhead_legacy_rtscts; } else { @@ -1064,7 +1132,8 @@ static void minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_sta_rates *ratetbl, int offset, int index) { - const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; + int group_idx = index / MCS_GROUP_RATES; + const struct mcs_group *group = &minstrel_mcs_groups[group_idx]; struct minstrel_rate_stats *mrs; u8 idx; u16 flags = group->flags; @@ -1083,13 +1152,17 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, ratetbl->rate[offset].count_rts = mrs->retry_count_rtscts; } - if (index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) + index %= MCS_GROUP_RATES; + if (group_idx == MINSTREL_CCK_GROUP) idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)]; + else if (group_idx == MINSTREL_OFDM_GROUP) + idx = mp->ofdm_rates[mi->band][index % + ARRAY_SIZE(mp->ofdm_rates[0])]; else if (flags & IEEE80211_TX_RC_VHT_MCS) idx = ((group->streams - 1) << 4) | - ((index % MCS_GROUP_RATES) & 0xF); + (index & 0xF); else - idx = index % MCS_GROUP_RATES + (group->streams - 1) * 8; + idx = index + (group->streams - 1) * 8; /* enable RTS/CTS if needed: * - if station is in dynamic SMPS (and streams > 1) @@ -1304,11 +1377,8 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, struct minstrel_priv *mp = priv; int sample_idx; - if (!msp->is_ht) - return mac80211_minstrel.get_rate(priv, sta, &msp->legacy, txrc); - if (!(info->flags & IEEE80211_TX_CTL_AMPDU) && - mi->max_prob_rate / MCS_GROUP_RATES != MINSTREL_CCK_GROUP) + !minstrel_ht_is_legacy_group(mi->max_prob_rate / MCS_GROUP_RATES)) minstrel_aggr_check(sta, txrc->skb); info->flags |= mi->tx_flags; @@ -1349,6 +1419,9 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP]) { int idx = sample_idx % ARRAY_SIZE(mp->cck_rates); rate->idx = mp->cck_rates[idx]; + } else if (sample_group == &minstrel_mcs_groups[MINSTREL_OFDM_GROUP]) { + int idx = sample_idx % ARRAY_SIZE(mp->ofdm_rates[0]); + rate->idx = mp->ofdm_rates[mi->band][idx]; } else if (sample_group->flags & IEEE80211_TX_RC_VHT_MCS) { ieee80211_rate_set_vht(rate, sample_idx % MCS_GROUP_RATES, sample_group->streams); @@ -1369,11 +1442,13 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (sband->band != NL80211_BAND_2GHZ) return; - if (!ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES)) + if (sta->ht_cap.ht_supported && + !ieee80211_hw_check(mp->hw, SUPPORTS_HT_CCK_RATES)) return; for (i = 0; i < 4; i++) { - if (!rate_supported(sta, sband->band, mp->cck_rates[i])) + if (mp->cck_rates[i] == 0xff || + !rate_supported(sta, sband->band, mp->cck_rates[i])) continue; mi->supported[MINSTREL_CCK_GROUP] |= BIT(i); @@ -1382,10 +1457,31 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, } } +static void +minstrel_ht_update_ofdm(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta) +{ + const u8 *rates; + int i; + + if (sta->ht_cap.ht_supported) + return; + + rates = mp->ofdm_rates[sband->band]; + for (i = 0; i < ARRAY_SIZE(mp->ofdm_rates[0]); i++) { + if (rates[i] == 0xff || + !rate_supported(sta, sband->band, rates[i])) + continue; + + mi->supported[MINSTREL_OFDM_GROUP] |= BIT(i); + } +} + static void minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, struct cfg80211_chan_def *chandef, - struct ieee80211_sta *sta, void *priv_sta) + struct ieee80211_sta *sta, void *priv_sta) { struct minstrel_priv *mp = priv; struct minstrel_ht_sta_priv *msp = priv_sta; @@ -1401,10 +1497,6 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, int stbc; int i; - /* fall back to the old minstrel for legacy stations */ - if (!sta->ht_cap.ht_supported) - goto use_legacy; - BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) != MINSTREL_GROUPS_NB); if (vht_cap->vht_supported) @@ -1412,10 +1504,10 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, else use_vht = 0; - msp->is_ht = true; memset(mi, 0, sizeof(*mi)); mi->sta = sta; + mi->band = sband->band; mi->last_stats_update = jiffies; ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1, 0); @@ -1464,10 +1556,8 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, int bw, nss; mi->supported[i] = 0; - if (i == MINSTREL_CCK_GROUP) { - minstrel_ht_update_cck(mp, mi, sband, sta); + if (minstrel_ht_is_legacy_group(i)) continue; - } if (gflags & IEEE80211_TX_RC_SHORT_GI) { if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH) { @@ -1528,22 +1618,12 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, n_supported++; } - if (!n_supported) - goto use_legacy; + minstrel_ht_update_cck(mp, mi, sband, sta); + minstrel_ht_update_ofdm(mp, mi, sband, sta); /* create an initial rate table with the lowest supported rates */ minstrel_ht_update_stats(mp, mi, true); minstrel_ht_update_rates(mp, mi); - - return; - -use_legacy: - msp->is_ht = false; - memset(&msp->legacy, 0, sizeof(msp->legacy)); - msp->legacy.r = msp->ratelist; - msp->legacy.sample_table = msp->sample_table; - return mac80211_minstrel.rate_init(priv, sband, chandef, sta, - &msp->legacy); } static void @@ -1611,40 +1691,70 @@ minstrel_ht_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta) } static void -minstrel_ht_init_cck_rates(struct minstrel_priv *mp) +minstrel_ht_fill_rate_array(u8 *dest, struct ieee80211_supported_band *sband, + const s16 *bitrates, int n_rates, u32 rate_flags) { - static const int bitrates[4] = { 10, 20, 55, 110 }; - struct ieee80211_supported_band *sband; - u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef); int i, j; - sband = mp->hw->wiphy->bands[NL80211_BAND_2GHZ]; - if (!sband) - return; - for (i = 0; i < sband->n_bitrates; i++) { struct ieee80211_rate *rate = &sband->bitrates[i]; - if (rate->flags & IEEE80211_RATE_ERP_G) - continue; - if ((rate_flags & sband->bitrates[i].flags) != rate_flags) continue; - for (j = 0; j < ARRAY_SIZE(bitrates); j++) { + for (j = 0; j < n_rates; j++) { if (rate->bitrate != bitrates[j]) continue; - mp->cck_rates[j] = i; + dest[j] = i; break; } } } +static void +minstrel_ht_init_cck_rates(struct minstrel_priv *mp) +{ + static const s16 bitrates[4] = { 10, 20, 55, 110 }; + struct ieee80211_supported_band *sband; + u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef); + + memset(mp->cck_rates, 0xff, sizeof(mp->cck_rates)); + sband = mp->hw->wiphy->bands[NL80211_BAND_2GHZ]; + if (!sband) + return; + + BUILD_BUG_ON(ARRAY_SIZE(mp->cck_rates) != ARRAY_SIZE(bitrates)); + minstrel_ht_fill_rate_array(mp->cck_rates, sband, + minstrel_cck_bitrates, + ARRAY_SIZE(minstrel_cck_bitrates), + rate_flags); +} + +static void +minstrel_ht_init_ofdm_rates(struct minstrel_priv *mp, enum nl80211_band band) +{ + static const s16 bitrates[8] = { 60, 90, 120, 180, 240, 360, 480, 540 }; + struct ieee80211_supported_band *sband; + u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef); + + memset(mp->ofdm_rates[band], 0xff, sizeof(mp->ofdm_rates[band])); + sband = mp->hw->wiphy->bands[band]; + if (!sband) + return; + + BUILD_BUG_ON(ARRAY_SIZE(mp->ofdm_rates[band]) != ARRAY_SIZE(bitrates)); + minstrel_ht_fill_rate_array(mp->ofdm_rates[band], sband, + minstrel_ofdm_bitrates, + ARRAY_SIZE(minstrel_ofdm_bitrates), + rate_flags); +} + static void * minstrel_ht_alloc(struct ieee80211_hw *hw) { struct minstrel_priv *mp; + int i; mp = kzalloc(sizeof(struct minstrel_priv), GFP_ATOMIC); if (!mp) @@ -1681,6 +1791,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw) mp->new_avg = true; minstrel_ht_init_cck_rates(mp); + for (i = 0; i < ARRAY_SIZE(mp->hw->wiphy->bands); i++) + minstrel_ht_init_ofdm_rates(mp, i); return mp; } @@ -1713,9 +1825,6 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta) struct minstrel_ht_sta *mi = &msp->ht; int i, j, prob, tp_avg; - if (!msp->is_ht) - return mac80211_minstrel.get_expected_throughput(priv_sta); - i = mi->max_tp_rate[0] / MCS_GROUP_RATES; j = mi->max_tp_rate[0] % MCS_GROUP_RATES; prob = mi->groups[i].rates[j].prob_avg; diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index 11300fa48a2fe..3321630ebeea1 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -18,14 +18,15 @@ MINSTREL_HT_STREAM_GROUPS) #define MINSTREL_VHT_GROUPS_NB (MINSTREL_MAX_STREAMS * \ MINSTREL_VHT_STREAM_GROUPS) -#define MINSTREL_CCK_GROUPS_NB 1 +#define MINSTREL_LEGACY_GROUPS_NB 2 #define MINSTREL_GROUPS_NB (MINSTREL_HT_GROUPS_NB + \ MINSTREL_VHT_GROUPS_NB + \ - MINSTREL_CCK_GROUPS_NB) + MINSTREL_LEGACY_GROUPS_NB) #define MINSTREL_HT_GROUP_0 0 #define MINSTREL_CCK_GROUP (MINSTREL_HT_GROUP_0 + MINSTREL_HT_GROUPS_NB) -#define MINSTREL_VHT_GROUP_0 (MINSTREL_CCK_GROUP + 1) +#define MINSTREL_OFDM_GROUP (MINSTREL_CCK_GROUP + 1) +#define MINSTREL_VHT_GROUP_0 (MINSTREL_OFDM_GROUP + 1) #define MCS_GROUP_RATES 10 @@ -37,6 +38,8 @@ struct mcs_group { u16 duration[MCS_GROUP_RATES]; }; +extern const s16 minstrel_cck_bitrates[4]; +extern const s16 minstrel_ofdm_bitrates[8]; extern const struct mcs_group minstrel_mcs_groups[]; struct minstrel_mcs_group_data { @@ -99,6 +102,8 @@ struct minstrel_ht_sta { /* current MCS group to be sampled */ u8 sample_group; + u8 band; + /* Bitfield of supported MCS rates of all groups */ u16 supported[MINSTREL_GROUPS_NB]; @@ -107,13 +112,9 @@ struct minstrel_ht_sta { }; struct minstrel_ht_sta_priv { - union { - struct minstrel_ht_sta ht; - struct minstrel_sta_info legacy; - }; + struct minstrel_ht_sta ht; void *ratelist; void *sample_table; - bool is_ht; }; void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index bebb71917742a..75ecc3318b500 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -52,7 +52,6 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) for (j = 0; j < MCS_GROUP_RATES; j++) { struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j]; - static const int bitrates[4] = { 10, 20, 55, 110 }; int idx = i * MCS_GROUP_RATES + j; unsigned int duration; @@ -67,6 +66,9 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) p += sprintf(p, "VHT%c0 ", htmode); p += sprintf(p, "%cGI ", gimode); p += sprintf(p, "%d ", mg->streams); + } else if (i == MINSTREL_OFDM_GROUP) { + p += sprintf(p, "OFDM "); + p += sprintf(p, "1 "); } else { p += sprintf(p, "CCK "); p += sprintf(p, "%cP ", j < 4 ? 'L' : 'S'); @@ -84,7 +86,12 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) } else if (gflags & IEEE80211_TX_RC_VHT_MCS) { p += sprintf(p, " MCS%-1u/%1u", j, mg->streams); } else { - int r = bitrates[j % 4]; + int r; + + if (i == MINSTREL_OFDM_GROUP) + r = minstrel_ofdm_bitrates[j % 8]; + else + r = minstrel_cck_bitrates[j % 4]; p += sprintf(p, " %2u.%1uM", r / 10, r % 10); } @@ -124,16 +131,8 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) struct minstrel_ht_sta *mi = &msp->ht; struct minstrel_debugfs_info *ms; unsigned int i; - int ret; char *p; - if (!msp->is_ht) { - inode->i_private = &msp->legacy; - ret = minstrel_stats_open(inode, file); - inode->i_private = msp; - return ret; - } - ms = kmalloc(32768, GFP_KERNEL); if (!ms) return -ENOMEM; @@ -199,7 +198,6 @@ minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p) for (j = 0; j < MCS_GROUP_RATES; j++) { struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j]; - static const int bitrates[4] = { 10, 20, 55, 110 }; int idx = i * MCS_GROUP_RATES + j; unsigned int duration; @@ -214,6 +212,8 @@ minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p) p += sprintf(p, "VHT%c0,", htmode); p += sprintf(p, "%cGI,", gimode); p += sprintf(p, "%d,", mg->streams); + } else if (i == MINSTREL_OFDM_GROUP) { + p += sprintf(p, "OFDM,,1,"); } else { p += sprintf(p, "CCK,"); p += sprintf(p, "%cP,", j < 4 ? 'L' : 'S'); @@ -231,7 +231,13 @@ minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p) } else if (gflags & IEEE80211_TX_RC_VHT_MCS) { p += sprintf(p, ",MCS%-1u/%1u,", j, mg->streams); } else { - int r = bitrates[j % 4]; + int r; + + if (i == MINSTREL_OFDM_GROUP) + r = minstrel_ofdm_bitrates[j % 8]; + else + r = minstrel_cck_bitrates[j % 4]; + p += sprintf(p, ",%2u.%1uM,", r / 10, r % 10); } @@ -274,18 +280,9 @@ minstrel_ht_stats_csv_open(struct inode *inode, struct file *file) struct minstrel_ht_sta *mi = &msp->ht; struct minstrel_debugfs_info *ms; unsigned int i; - int ret; char *p; - if (!msp->is_ht) { - inode->i_private = &msp->legacy; - ret = minstrel_stats_csv_open(inode, file); - inode->i_private = msp; - return ret; - } - ms = kmalloc(32768, GFP_KERNEL); - if (!ms) return -ENOMEM; -- GitLab From cbda98c710d273b2725e2b551d929879bff93c1d Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 15 Jan 2021 13:02:36 +0100 Subject: [PATCH 1236/2012] mac80211: remove legacy minstrel rate control Now that minstrel_ht supports legacy rates, it is no longer needed Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210115120242.89616-4-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/Makefile | 2 - net/mac80211/rc80211_minstrel.c | 574 --------------------- net/mac80211/rc80211_minstrel.h | 185 ------- net/mac80211/rc80211_minstrel_debugfs.c | 172 ------ net/mac80211/rc80211_minstrel_ht.c | 126 +++-- net/mac80211/rc80211_minstrel_ht.h | 79 ++- net/mac80211/rc80211_minstrel_ht_debugfs.c | 18 +- 7 files changed, 167 insertions(+), 989 deletions(-) delete mode 100644 net/mac80211/rc80211_minstrel.c delete mode 100644 net/mac80211/rc80211_minstrel.h delete mode 100644 net/mac80211/rc80211_minstrel_debugfs.c diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index ad04c361cba53..23d25e8b23584 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -56,11 +56,9 @@ mac80211-$(CONFIG_PM) += pm.o CFLAGS_trace.o := -I$(src) rc80211_minstrel-y := \ - rc80211_minstrel.o \ rc80211_minstrel_ht.o rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += \ - rc80211_minstrel_debugfs.o \ rc80211_minstrel_ht_debugfs.o mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c deleted file mode 100644 index b13b1da193867..0000000000000 --- a/net/mac80211/rc80211_minstrel.c +++ /dev/null @@ -1,574 +0,0 @@ -/* - * Copyright (C) 2008 Felix Fietkau - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on minstrel.c: - * Copyright (C) 2005-2007 Derek Smithies - * Sponsored by Indranet Technologies Ltd - * - * Based on sample.c: - * Copyright (c) 2005 John Bicket - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer, - * without modification. - * 2. Redistributions in binary form must reproduce at minimum a disclaimer - * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any - * redistribution must be conditioned upon including a substantially - * similar Disclaimer requirement for further binary redistribution. - * 3. Neither the names of the above-listed copyright holders nor the names - * of any contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * NO WARRANTY - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, - * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGES. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "rate.h" -#include "rc80211_minstrel.h" - -#define SAMPLE_TBL(_mi, _idx, _col) \ - _mi->sample_table[(_idx * SAMPLE_COLUMNS) + _col] - -/* convert mac80211 rate index to local array index */ -static inline int -rix_to_ndx(struct minstrel_sta_info *mi, int rix) -{ - int i = rix; - for (i = rix; i >= 0; i--) - if (mi->r[i].rix == rix) - break; - return i; -} - -/* return current EMWA throughput */ -int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_avg) -{ - int usecs; - - usecs = mr->perfect_tx_time; - if (!usecs) - usecs = 1000000; - - /* reset thr. below 10% success */ - if (mr->stats.prob_avg < MINSTREL_FRAC(10, 100)) - return 0; - - if (prob_avg > MINSTREL_FRAC(90, 100)) - return MINSTREL_TRUNC(100000 * (MINSTREL_FRAC(90, 100) / usecs)); - else - return MINSTREL_TRUNC(100000 * (prob_avg / usecs)); -} - -/* find & sort topmost throughput rates */ -static inline void -minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list) -{ - int j; - struct minstrel_rate_stats *tmp_mrs; - struct minstrel_rate_stats *cur_mrs = &mi->r[i].stats; - - for (j = MAX_THR_RATES; j > 0; --j) { - tmp_mrs = &mi->r[tp_list[j - 1]].stats; - if (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_avg) <= - minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_avg)) - break; - } - - if (j < MAX_THR_RATES - 1) - memmove(&tp_list[j + 1], &tp_list[j], MAX_THR_RATES - (j + 1)); - if (j < MAX_THR_RATES) - tp_list[j] = i; -} - -static void -minstrel_set_rate(struct minstrel_sta_info *mi, struct ieee80211_sta_rates *ratetbl, - int offset, int idx) -{ - struct minstrel_rate *r = &mi->r[idx]; - - ratetbl->rate[offset].idx = r->rix; - ratetbl->rate[offset].count = r->adjusted_retry_count; - ratetbl->rate[offset].count_cts = r->retry_count_cts; - ratetbl->rate[offset].count_rts = r->stats.retry_count_rtscts; -} - -static void -minstrel_update_rates(struct minstrel_priv *mp, struct minstrel_sta_info *mi) -{ - struct ieee80211_sta_rates *ratetbl; - int i = 0; - - ratetbl = kzalloc(sizeof(*ratetbl), GFP_ATOMIC); - if (!ratetbl) - return; - - /* Start with max_tp_rate */ - minstrel_set_rate(mi, ratetbl, i++, mi->max_tp_rate[0]); - - if (mp->hw->max_rates >= 3) { - /* At least 3 tx rates supported, use max_tp_rate2 next */ - minstrel_set_rate(mi, ratetbl, i++, mi->max_tp_rate[1]); - } - - if (mp->hw->max_rates >= 2) { - /* At least 2 tx rates supported, use max_prob_rate next */ - minstrel_set_rate(mi, ratetbl, i++, mi->max_prob_rate); - } - - /* Use lowest rate last */ - ratetbl->rate[i].idx = mi->lowest_rix; - ratetbl->rate[i].count = mp->max_retry; - ratetbl->rate[i].count_cts = mp->max_retry; - ratetbl->rate[i].count_rts = mp->max_retry; - - rate_control_set_rates(mp->hw, mi->sta, ratetbl); -} - -/* -* Recalculate statistics and counters of a given rate -*/ -void -minstrel_calc_rate_stats(struct minstrel_priv *mp, - struct minstrel_rate_stats *mrs) -{ - unsigned int cur_prob; - - if (unlikely(mrs->attempts > 0)) { - mrs->sample_skipped = 0; - cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts); - if (mp->new_avg) { - minstrel_filter_avg_add(&mrs->prob_avg, - &mrs->prob_avg_1, cur_prob); - } else if (unlikely(!mrs->att_hist)) { - mrs->prob_avg = cur_prob; - } else { - /*update exponential weighted moving avarage */ - mrs->prob_avg = minstrel_ewma(mrs->prob_avg, - cur_prob, - EWMA_LEVEL); - } - mrs->att_hist += mrs->attempts; - mrs->succ_hist += mrs->success; - } else { - mrs->sample_skipped++; - } - - mrs->last_success = mrs->success; - mrs->last_attempts = mrs->attempts; - mrs->success = 0; - mrs->attempts = 0; -} - -static void -minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) -{ - u8 tmp_tp_rate[MAX_THR_RATES]; - u8 tmp_prob_rate = 0; - int i, tmp_cur_tp, tmp_prob_tp; - - for (i = 0; i < MAX_THR_RATES; i++) - tmp_tp_rate[i] = 0; - - for (i = 0; i < mi->n_rates; i++) { - struct minstrel_rate *mr = &mi->r[i]; - struct minstrel_rate_stats *mrs = &mi->r[i].stats; - struct minstrel_rate_stats *tmp_mrs = &mi->r[tmp_prob_rate].stats; - - /* Update statistics of success probability per rate */ - minstrel_calc_rate_stats(mp, mrs); - - /* Sample less often below the 10% chance of success. - * Sample less often above the 95% chance of success. */ - if (mrs->prob_avg > MINSTREL_FRAC(95, 100) || - mrs->prob_avg < MINSTREL_FRAC(10, 100)) { - mr->adjusted_retry_count = mrs->retry_count >> 1; - if (mr->adjusted_retry_count > 2) - mr->adjusted_retry_count = 2; - mr->sample_limit = 4; - } else { - mr->sample_limit = -1; - mr->adjusted_retry_count = mrs->retry_count; - } - if (!mr->adjusted_retry_count) - mr->adjusted_retry_count = 2; - - minstrel_sort_best_tp_rates(mi, i, tmp_tp_rate); - - /* To determine the most robust rate (max_prob_rate) used at - * 3rd mmr stage we distinct between two cases: - * (1) if any success probabilitiy >= 95%, out of those rates - * choose the maximum throughput rate as max_prob_rate - * (2) if all success probabilities < 95%, the rate with - * highest success probability is chosen as max_prob_rate */ - if (mrs->prob_avg >= MINSTREL_FRAC(95, 100)) { - tmp_cur_tp = minstrel_get_tp_avg(mr, mrs->prob_avg); - tmp_prob_tp = minstrel_get_tp_avg(&mi->r[tmp_prob_rate], - tmp_mrs->prob_avg); - if (tmp_cur_tp >= tmp_prob_tp) - tmp_prob_rate = i; - } else { - if (mrs->prob_avg >= tmp_mrs->prob_avg) - tmp_prob_rate = i; - } - } - - /* Assign the new rate set */ - memcpy(mi->max_tp_rate, tmp_tp_rate, sizeof(mi->max_tp_rate)); - mi->max_prob_rate = tmp_prob_rate; - -#ifdef CONFIG_MAC80211_DEBUGFS - /* use fixed index if set */ - if (mp->fixed_rate_idx != -1) { - mi->max_tp_rate[0] = mp->fixed_rate_idx; - mi->max_tp_rate[1] = mp->fixed_rate_idx; - mi->max_prob_rate = mp->fixed_rate_idx; - } -#endif - - /* Reset update timer */ - mi->last_stats_update = jiffies; - - minstrel_update_rates(mp, mi); -} - -static void -minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, - void *priv_sta, struct ieee80211_tx_status *st) -{ - struct ieee80211_tx_info *info = st->info; - struct minstrel_priv *mp = priv; - struct minstrel_sta_info *mi = priv_sta; - struct ieee80211_tx_rate *ar = info->status.rates; - int i, ndx; - int success; - - success = !!(info->flags & IEEE80211_TX_STAT_ACK); - - for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { - if (ar[i].idx < 0 || !ar[i].count) - break; - - ndx = rix_to_ndx(mi, ar[i].idx); - if (ndx < 0) - continue; - - mi->r[ndx].stats.attempts += ar[i].count; - - if ((i != IEEE80211_TX_MAX_RATES - 1) && (ar[i + 1].idx < 0)) - mi->r[ndx].stats.success += success; - } - - if (time_after(jiffies, mi->last_stats_update + - mp->update_interval / (mp->new_avg ? 2 : 1))) - minstrel_update_stats(mp, mi); -} - - -static inline unsigned int -minstrel_get_retry_count(struct minstrel_rate *mr, - struct ieee80211_tx_info *info) -{ - u8 retry = mr->adjusted_retry_count; - - if (info->control.use_rts) - retry = max_t(u8, 2, min(mr->stats.retry_count_rtscts, retry)); - else if (info->control.use_cts_prot) - retry = max_t(u8, 2, min(mr->retry_count_cts, retry)); - return retry; -} - - -static int -minstrel_get_next_sample(struct minstrel_sta_info *mi) -{ - unsigned int sample_ndx; - sample_ndx = SAMPLE_TBL(mi, mi->sample_row, mi->sample_column); - mi->sample_row++; - if ((int) mi->sample_row >= mi->n_rates) { - mi->sample_row = 0; - mi->sample_column++; - if (mi->sample_column >= SAMPLE_COLUMNS) - mi->sample_column = 0; - } - return sample_ndx; -} - -static void -minstrel_get_rate(void *priv, struct ieee80211_sta *sta, - void *priv_sta, struct ieee80211_tx_rate_control *txrc) -{ - struct sk_buff *skb = txrc->skb; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct minstrel_sta_info *mi = priv_sta; - struct minstrel_priv *mp = priv; - struct ieee80211_tx_rate *rate = &info->control.rates[0]; - struct minstrel_rate *msr, *mr; - unsigned int ndx; - bool mrr_capable; - bool prev_sample; - int delta; - int sampling_ratio; - - /* check multi-rate-retry capabilities & adjust lookaround_rate */ - mrr_capable = mp->has_mrr && - !txrc->rts && - !txrc->bss_conf->use_cts_prot; - if (mrr_capable) - sampling_ratio = mp->lookaround_rate_mrr; - else - sampling_ratio = mp->lookaround_rate; - - /* increase sum packet counter */ - mi->total_packets++; - -#ifdef CONFIG_MAC80211_DEBUGFS - if (mp->fixed_rate_idx != -1) - return; -#endif - - /* Don't use EAPOL frames for sampling on non-mrr hw */ - if (mp->hw->max_rates == 1 && - (info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO)) - return; - - delta = (mi->total_packets * sampling_ratio / 100) - - mi->sample_packets; - - /* delta < 0: no sampling required */ - prev_sample = mi->prev_sample; - mi->prev_sample = false; - if (delta < 0 || (!mrr_capable && prev_sample)) - return; - - if (mi->total_packets >= 10000) { - mi->sample_packets = 0; - mi->total_packets = 0; - } else if (delta > mi->n_rates * 2) { - /* With multi-rate retry, not every planned sample - * attempt actually gets used, due to the way the retry - * chain is set up - [max_tp,sample,prob,lowest] for - * sample_rate < max_tp. - * - * If there's too much sampling backlog and the link - * starts getting worse, minstrel would start bursting - * out lots of sampling frames, which would result - * in a large throughput loss. */ - mi->sample_packets += (delta - mi->n_rates * 2); - } - - /* get next random rate sample */ - ndx = minstrel_get_next_sample(mi); - msr = &mi->r[ndx]; - mr = &mi->r[mi->max_tp_rate[0]]; - - /* Decide if direct ( 1st mrr stage) or indirect (2nd mrr stage) - * rate sampling method should be used. - * Respect such rates that are not sampled for 20 interations. - */ - if (msr->perfect_tx_time < mr->perfect_tx_time || - msr->stats.sample_skipped >= 20) { - if (!msr->sample_limit) - return; - - mi->sample_packets++; - if (msr->sample_limit > 0) - msr->sample_limit--; - } - - /* If we're not using MRR and the sampling rate already - * has a probability of >95%, we shouldn't be attempting - * to use it, as this only wastes precious airtime */ - if (!mrr_capable && - (mi->r[ndx].stats.prob_avg > MINSTREL_FRAC(95, 100))) - return; - - mi->prev_sample = true; - - rate->idx = mi->r[ndx].rix; - rate->count = minstrel_get_retry_count(&mi->r[ndx], info); - info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; -} - - -static void -calc_rate_durations(enum nl80211_band band, - struct minstrel_rate *d, - struct ieee80211_rate *rate, - struct cfg80211_chan_def *chandef) -{ - int erp = !!(rate->flags & IEEE80211_RATE_ERP_G); - int shift = ieee80211_chandef_get_shift(chandef); - - d->perfect_tx_time = ieee80211_frame_duration(band, 1200, - DIV_ROUND_UP(rate->bitrate, 1 << shift), erp, 1, - shift); - d->ack_time = ieee80211_frame_duration(band, 10, - DIV_ROUND_UP(rate->bitrate, 1 << shift), erp, 1, - shift); -} - -static void -init_sample_table(struct minstrel_sta_info *mi) -{ - unsigned int i, col, new_idx; - u8 rnd[8]; - - mi->sample_column = 0; - mi->sample_row = 0; - memset(mi->sample_table, 0xff, SAMPLE_COLUMNS * mi->n_rates); - - for (col = 0; col < SAMPLE_COLUMNS; col++) { - prandom_bytes(rnd, sizeof(rnd)); - for (i = 0; i < mi->n_rates; i++) { - new_idx = (i + rnd[i & 7]) % mi->n_rates; - while (SAMPLE_TBL(mi, new_idx, col) != 0xff) - new_idx = (new_idx + 1) % mi->n_rates; - - SAMPLE_TBL(mi, new_idx, col) = i; - } - } -} - -static void -minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, - struct cfg80211_chan_def *chandef, - struct ieee80211_sta *sta, void *priv_sta) -{ - struct minstrel_sta_info *mi = priv_sta; - struct minstrel_priv *mp = priv; - struct ieee80211_rate *ctl_rate; - unsigned int i, n = 0; - unsigned int t_slot = 9; /* FIXME: get real slot time */ - u32 rate_flags; - - mi->sta = sta; - mi->lowest_rix = rate_lowest_index(sband, sta); - ctl_rate = &sband->bitrates[mi->lowest_rix]; - mi->sp_ack_dur = ieee80211_frame_duration(sband->band, 10, - ctl_rate->bitrate, - !!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1, - ieee80211_chandef_get_shift(chandef)); - - rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef); - memset(mi->max_tp_rate, 0, sizeof(mi->max_tp_rate)); - mi->max_prob_rate = 0; - - for (i = 0; i < sband->n_bitrates; i++) { - struct minstrel_rate *mr = &mi->r[n]; - struct minstrel_rate_stats *mrs = &mi->r[n].stats; - unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0; - unsigned int tx_time_single; - unsigned int cw = mp->cw_min; - int shift; - - if (!rate_supported(sta, sband->band, i)) - continue; - if ((rate_flags & sband->bitrates[i].flags) != rate_flags) - continue; - - n++; - memset(mr, 0, sizeof(*mr)); - memset(mrs, 0, sizeof(*mrs)); - - mr->rix = i; - shift = ieee80211_chandef_get_shift(chandef); - mr->bitrate = DIV_ROUND_UP(sband->bitrates[i].bitrate, - (1 << shift) * 5); - calc_rate_durations(sband->band, mr, &sband->bitrates[i], - chandef); - - /* calculate maximum number of retransmissions before - * fallback (based on maximum segment size) */ - mr->sample_limit = -1; - mrs->retry_count = 1; - mr->retry_count_cts = 1; - mrs->retry_count_rtscts = 1; - tx_time = mr->perfect_tx_time + mi->sp_ack_dur; - do { - /* add one retransmission */ - tx_time_single = mr->ack_time + mr->perfect_tx_time; - - /* contention window */ - tx_time_single += (t_slot * cw) >> 1; - cw = min((cw << 1) | 1, mp->cw_max); - - tx_time += tx_time_single; - tx_time_cts += tx_time_single + mi->sp_ack_dur; - tx_time_rtscts += tx_time_single + 2 * mi->sp_ack_dur; - if ((tx_time_cts < mp->segment_size) && - (mr->retry_count_cts < mp->max_retry)) - mr->retry_count_cts++; - if ((tx_time_rtscts < mp->segment_size) && - (mrs->retry_count_rtscts < mp->max_retry)) - mrs->retry_count_rtscts++; - } while ((tx_time < mp->segment_size) && - (++mr->stats.retry_count < mp->max_retry)); - mr->adjusted_retry_count = mrs->retry_count; - if (!(sband->bitrates[i].flags & IEEE80211_RATE_ERP_G)) - mr->retry_count_cts = mrs->retry_count; - } - - for (i = n; i < sband->n_bitrates; i++) { - struct minstrel_rate *mr = &mi->r[i]; - mr->rix = -1; - } - - mi->n_rates = n; - mi->last_stats_update = jiffies; - - init_sample_table(mi); - minstrel_update_rates(mp, mi); -} - -static u32 minstrel_get_expected_throughput(void *priv_sta) -{ - struct minstrel_sta_info *mi = priv_sta; - struct minstrel_rate_stats *tmp_mrs; - int idx = mi->max_tp_rate[0]; - int tmp_cur_tp; - - /* convert pkt per sec in kbps (1200 is the average pkt size used for - * computing cur_tp - */ - tmp_mrs = &mi->r[idx].stats; - tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_avg) * 10; - tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024; - - return tmp_cur_tp; -} - -const struct rate_control_ops mac80211_minstrel = { - .tx_status_ext = minstrel_tx_status, - .get_rate = minstrel_get_rate, - .rate_init = minstrel_rate_init, - .get_expected_throughput = minstrel_get_expected_throughput, -}; diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h deleted file mode 100644 index 1b6d859a81e2f..0000000000000 --- a/net/mac80211/rc80211_minstrel.h +++ /dev/null @@ -1,185 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2008 Felix Fietkau - */ - -#ifndef __RC_MINSTREL_H -#define __RC_MINSTREL_H - -#define EWMA_LEVEL 96 /* ewma weighting factor [/EWMA_DIV] */ -#define EWMA_DIV 128 -#define SAMPLE_COLUMNS 10 /* number of columns in sample table */ - -/* scaled fraction values */ -#define MINSTREL_SCALE 12 -#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div) -#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE) - -/* number of highest throughput rates to consider*/ -#define MAX_THR_RATES 4 - -/* - * Coefficients for moving average with noise filter (period=16), - * scaled by 10 bits - * - * a1 = exp(-pi * sqrt(2) / period) - * coeff2 = 2 * a1 * cos(sqrt(2) * 2 * pi / period) - * coeff3 = -sqr(a1) - * coeff1 = 1 - coeff2 - coeff3 - */ -#define MINSTREL_AVG_COEFF1 (MINSTREL_FRAC(1, 1) - \ - MINSTREL_AVG_COEFF2 - \ - MINSTREL_AVG_COEFF3) -#define MINSTREL_AVG_COEFF2 0x00001499 -#define MINSTREL_AVG_COEFF3 -0x0000092e - -/* - * Perform EWMA (Exponentially Weighted Moving Average) calculation - */ -static inline int -minstrel_ewma(int old, int new, int weight) -{ - int diff, incr; - - diff = new - old; - incr = (EWMA_DIV - weight) * diff / EWMA_DIV; - - return old + incr; -} - -static inline int minstrel_filter_avg_add(u16 *prev_1, u16 *prev_2, s32 in) -{ - s32 out_1 = *prev_1; - s32 out_2 = *prev_2; - s32 val; - - if (!in) - in += 1; - - if (!out_1) { - val = out_1 = in; - goto out; - } - - val = MINSTREL_AVG_COEFF1 * in; - val += MINSTREL_AVG_COEFF2 * out_1; - val += MINSTREL_AVG_COEFF3 * out_2; - val >>= MINSTREL_SCALE; - - if (val > 1 << MINSTREL_SCALE) - val = 1 << MINSTREL_SCALE; - if (val < 0) - val = 1; - -out: - *prev_2 = out_1; - *prev_1 = val; - - return val; -} - -struct minstrel_rate_stats { - /* current / last sampling period attempts/success counters */ - u16 attempts, last_attempts; - u16 success, last_success; - - /* total attempts/success counters */ - u32 att_hist, succ_hist; - - /* prob_avg - moving average of prob */ - u16 prob_avg; - u16 prob_avg_1; - - /* maximum retry counts */ - u8 retry_count; - u8 retry_count_rtscts; - - u8 sample_skipped; - bool retry_updated; -}; - -struct minstrel_rate { - int bitrate; - - s8 rix; - u8 retry_count_cts; - u8 adjusted_retry_count; - - unsigned int perfect_tx_time; - unsigned int ack_time; - - int sample_limit; - - struct minstrel_rate_stats stats; -}; - -struct minstrel_sta_info { - struct ieee80211_sta *sta; - - unsigned long last_stats_update; - unsigned int sp_ack_dur; - unsigned int rate_avg; - - unsigned int lowest_rix; - - u8 max_tp_rate[MAX_THR_RATES]; - u8 max_prob_rate; - unsigned int total_packets; - unsigned int sample_packets; - - unsigned int sample_row; - unsigned int sample_column; - - int n_rates; - struct minstrel_rate *r; - bool prev_sample; - - /* sampling table */ - u8 *sample_table; -}; - -struct minstrel_priv { - struct ieee80211_hw *hw; - bool has_mrr; - bool new_avg; - u32 sample_switch; - unsigned int cw_min; - unsigned int cw_max; - unsigned int max_retry; - unsigned int segment_size; - unsigned int update_interval; - unsigned int lookaround_rate; - unsigned int lookaround_rate_mrr; - - u8 cck_rates[4]; - u8 ofdm_rates[NUM_NL80211_BANDS][8]; - -#ifdef CONFIG_MAC80211_DEBUGFS - /* - * enable fixed rate processing per RC - * - write static index to debugfs:ieee80211/phyX/rc/fixed_rate_idx - * - write -1 to enable RC processing again - * - setting will be applied on next update - */ - u32 fixed_rate_idx; -#endif -}; - -struct minstrel_debugfs_info { - size_t len; - char buf[]; -}; - -extern const struct rate_control_ops mac80211_minstrel; -void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); - -/* Recalculate success probabilities and counters for a given rate using EWMA */ -void minstrel_calc_rate_stats(struct minstrel_priv *mp, - struct minstrel_rate_stats *mrs); -int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_avg); - -/* debugfs */ -int minstrel_stats_open(struct inode *inode, struct file *file); -int minstrel_stats_csv_open(struct inode *inode, struct file *file); - -#endif diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c deleted file mode 100644 index 9b8e0daeb7bb5..0000000000000 --- a/net/mac80211/rc80211_minstrel_debugfs.c +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (C) 2008 Felix Fietkau - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on minstrel.c: - * Copyright (C) 2005-2007 Derek Smithies - * Sponsored by Indranet Technologies Ltd - * - * Based on sample.c: - * Copyright (c) 2005 John Bicket - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer, - * without modification. - * 2. Redistributions in binary form must reproduce at minimum a disclaimer - * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any - * redistribution must be conditioned upon including a substantially - * similar Disclaimer requirement for further binary redistribution. - * 3. Neither the names of the above-listed copyright holders nor the names - * of any contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * NO WARRANTY - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, - * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGES. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "rc80211_minstrel.h" - -int -minstrel_stats_open(struct inode *inode, struct file *file) -{ - struct minstrel_sta_info *mi = inode->i_private; - struct minstrel_debugfs_info *ms; - unsigned int i, tp_max, tp_avg, eprob; - char *p; - - ms = kmalloc(2048, GFP_KERNEL); - if (!ms) - return -ENOMEM; - - file->private_data = ms; - p = ms->buf; - p += sprintf(p, "\n"); - p += sprintf(p, - "best __________rate_________ ____statistics___ ____last_____ ______sum-of________\n"); - p += sprintf(p, - "rate [name idx airtime max_tp] [avg(tp) avg(prob)] [retry|suc|att] [#success | #attempts]\n"); - - for (i = 0; i < mi->n_rates; i++) { - struct minstrel_rate *mr = &mi->r[i]; - struct minstrel_rate_stats *mrs = &mi->r[i].stats; - - *(p++) = (i == mi->max_tp_rate[0]) ? 'A' : ' '; - *(p++) = (i == mi->max_tp_rate[1]) ? 'B' : ' '; - *(p++) = (i == mi->max_tp_rate[2]) ? 'C' : ' '; - *(p++) = (i == mi->max_tp_rate[3]) ? 'D' : ' '; - *(p++) = (i == mi->max_prob_rate) ? 'P' : ' '; - - p += sprintf(p, " %3u%s ", mr->bitrate / 2, - (mr->bitrate & 1 ? ".5" : " ")); - p += sprintf(p, "%3u ", i); - p += sprintf(p, "%6u ", mr->perfect_tx_time); - - tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100)); - tp_avg = minstrel_get_tp_avg(mr, mrs->prob_avg); - eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000); - - p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u" - " %3u %3u %-3u " - "%9llu %-9llu\n", - tp_max / 10, tp_max % 10, - tp_avg / 10, tp_avg % 10, - eprob / 10, eprob % 10, - mrs->retry_count, - mrs->last_success, - mrs->last_attempts, - (unsigned long long)mrs->succ_hist, - (unsigned long long)mrs->att_hist); - } - p += sprintf(p, "\nTotal packet count:: ideal %d " - "lookaround %d\n\n", - mi->total_packets - mi->sample_packets, - mi->sample_packets); - ms->len = p - ms->buf; - - WARN_ON(ms->len + sizeof(*ms) > 2048); - - return 0; -} - -int -minstrel_stats_csv_open(struct inode *inode, struct file *file) -{ - struct minstrel_sta_info *mi = inode->i_private; - struct minstrel_debugfs_info *ms; - unsigned int i, tp_max, tp_avg, eprob; - char *p; - - ms = kmalloc(2048, GFP_KERNEL); - if (!ms) - return -ENOMEM; - - file->private_data = ms; - p = ms->buf; - - for (i = 0; i < mi->n_rates; i++) { - struct minstrel_rate *mr = &mi->r[i]; - struct minstrel_rate_stats *mrs = &mi->r[i].stats; - - p += sprintf(p, "%s" ,((i == mi->max_tp_rate[0]) ? "A" : "")); - p += sprintf(p, "%s" ,((i == mi->max_tp_rate[1]) ? "B" : "")); - p += sprintf(p, "%s" ,((i == mi->max_tp_rate[2]) ? "C" : "")); - p += sprintf(p, "%s" ,((i == mi->max_tp_rate[3]) ? "D" : "")); - p += sprintf(p, "%s" ,((i == mi->max_prob_rate) ? "P" : "")); - - p += sprintf(p, ",%u%s", mr->bitrate / 2, - (mr->bitrate & 1 ? ".5," : ",")); - p += sprintf(p, "%u,", i); - p += sprintf(p, "%u,",mr->perfect_tx_time); - - tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100)); - tp_avg = minstrel_get_tp_avg(mr, mrs->prob_avg); - eprob = MINSTREL_TRUNC(mrs->prob_avg * 1000); - - p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u,%u,%u," - "%llu,%llu,%d,%d\n", - tp_max / 10, tp_max % 10, - tp_avg / 10, tp_avg % 10, - eprob / 10, eprob % 10, - mrs->retry_count, - mrs->last_success, - mrs->last_attempts, - (unsigned long long)mrs->succ_hist, - (unsigned long long)mrs->att_hist, - mi->total_packets - mi->sample_packets, - mi->sample_packets); - - } - ms->len = p - ms->buf; - - WARN_ON(ms->len + sizeof(*ms) > 2048); - - return 0; -} diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 247557bff26c1..1fc3e836fe95d 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -13,7 +13,6 @@ #include #include "rate.h" #include "sta_info.h" -#include "rc80211_minstrel.h" #include "rc80211_minstrel_ht.h" #define AVG_AMPDU_SIZE 16 @@ -716,6 +715,83 @@ out: mi->sample_mode = MINSTREL_SAMPLE_ACTIVE; } +static inline int +minstrel_ewma(int old, int new, int weight) +{ + int diff, incr; + + diff = new - old; + incr = (EWMA_DIV - weight) * diff / EWMA_DIV; + + return old + incr; +} + +static inline int minstrel_filter_avg_add(u16 *prev_1, u16 *prev_2, s32 in) +{ + s32 out_1 = *prev_1; + s32 out_2 = *prev_2; + s32 val; + + if (!in) + in += 1; + + if (!out_1) { + val = out_1 = in; + goto out; + } + + val = MINSTREL_AVG_COEFF1 * in; + val += MINSTREL_AVG_COEFF2 * out_1; + val += MINSTREL_AVG_COEFF3 * out_2; + val >>= MINSTREL_SCALE; + + if (val > 1 << MINSTREL_SCALE) + val = 1 << MINSTREL_SCALE; + if (val < 0) + val = 1; + +out: + *prev_2 = out_1; + *prev_1 = val; + + return val; +} + +/* +* Recalculate statistics and counters of a given rate +*/ +static void +minstrel_ht_calc_rate_stats(struct minstrel_priv *mp, + struct minstrel_rate_stats *mrs) +{ + unsigned int cur_prob; + + if (unlikely(mrs->attempts > 0)) { + mrs->sample_skipped = 0; + cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts); + if (mp->new_avg) { + minstrel_filter_avg_add(&mrs->prob_avg, + &mrs->prob_avg_1, cur_prob); + } else if (unlikely(!mrs->att_hist)) { + mrs->prob_avg = cur_prob; + } else { + /*update exponential weighted moving avarage */ + mrs->prob_avg = minstrel_ewma(mrs->prob_avg, + cur_prob, + EWMA_LEVEL); + } + mrs->att_hist += mrs->attempts; + mrs->succ_hist += mrs->success; + } else { + mrs->sample_skipped++; + } + + mrs->last_success = mrs->success; + mrs->last_attempts = mrs->attempts; + mrs->success = 0; + mrs->attempts = 0; +} + /* * Update rate statistics and select new primary rates * @@ -808,7 +884,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, mrs = &mg->rates[i]; mrs->retry_updated = false; - minstrel_calc_rate_stats(mp, mrs); + minstrel_ht_calc_rate_stats(mp, mrs); cur_prob = mrs->prob_avg; if (minstrel_ht_get_tp_avg(mi, group, i, cur_prob) == 0) @@ -960,8 +1036,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, void *priv_sta, struct ieee80211_tx_status *st) { struct ieee80211_tx_info *info = st->info; - struct minstrel_ht_sta_priv *msp = priv_sta; - struct minstrel_ht_sta *mi = &msp->ht; + struct minstrel_ht_sta *mi = priv_sta; struct ieee80211_tx_rate *ar = info->status.rates; struct minstrel_rate_stats *rate, *rate2, *rate_sample = NULL; struct minstrel_priv *mp = priv; @@ -1372,8 +1447,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, const struct mcs_group *sample_group; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); struct ieee80211_tx_rate *rate = &info->status.rates[0]; - struct minstrel_ht_sta_priv *msp = priv_sta; - struct minstrel_ht_sta *mi = &msp->ht; + struct minstrel_ht_sta *mi = priv_sta; struct minstrel_priv *mp = priv; int sample_idx; @@ -1484,8 +1558,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, void *priv_sta) { struct minstrel_priv *mp = priv; - struct minstrel_ht_sta_priv *msp = priv_sta; - struct minstrel_ht_sta *mi = &msp->ht; + struct minstrel_ht_sta *mi = priv_sta; struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs; u16 ht_cap = sta->ht_cap.cap; struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap; @@ -1647,7 +1720,7 @@ static void * minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) { struct ieee80211_supported_band *sband; - struct minstrel_ht_sta_priv *msp; + struct minstrel_ht_sta *mi; struct minstrel_priv *mp = priv; struct ieee80211_hw *hw = mp->hw; int max_rates = 0; @@ -1659,35 +1732,13 @@ minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) max_rates = sband->n_bitrates; } - msp = kzalloc(sizeof(*msp), gfp); - if (!msp) - return NULL; - - msp->ratelist = kcalloc(max_rates, sizeof(struct minstrel_rate), gfp); - if (!msp->ratelist) - goto error; - - msp->sample_table = kmalloc_array(max_rates, SAMPLE_COLUMNS, gfp); - if (!msp->sample_table) - goto error1; - - return msp; - -error1: - kfree(msp->ratelist); -error: - kfree(msp); - return NULL; + return kzalloc(sizeof(*mi), gfp); } static void minstrel_ht_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta) { - struct minstrel_ht_sta_priv *msp = priv_sta; - - kfree(msp->sample_table); - kfree(msp->ratelist); - kfree(msp); + kfree(priv_sta); } static void @@ -1768,12 +1819,6 @@ minstrel_ht_alloc(struct ieee80211_hw *hw) mp->cw_min = 15; mp->cw_max = 1023; - /* number of packets (in %) to use for sampling other rates - * sample less often for non-mrr packets, because the overhead - * is much higher than with mrr */ - mp->lookaround_rate = 5; - mp->lookaround_rate_mrr = 10; - /* maximum time that the hw is allowed to stay in one MRR segment */ mp->segment_size = 6000; @@ -1821,8 +1866,7 @@ minstrel_ht_free(void *priv) static u32 minstrel_ht_get_expected_throughput(void *priv_sta) { - struct minstrel_ht_sta_priv *msp = priv_sta; - struct minstrel_ht_sta *mi = &msp->ht; + struct minstrel_ht_sta *mi = priv_sta; int i, j, prob, tp_avg; i = mi->max_tp_rate[0] / MCS_GROUP_RATES; diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index 3321630ebeea1..25c3664bddde6 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -6,6 +6,33 @@ #ifndef __RC_MINSTREL_HT_H #define __RC_MINSTREL_HT_H +/* number of highest throughput rates to consider*/ +#define MAX_THR_RATES 4 +#define SAMPLE_COLUMNS 10 /* number of columns in sample table */ + +/* scaled fraction values */ +#define MINSTREL_SCALE 12 +#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div) +#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE) + +#define EWMA_LEVEL 96 /* ewma weighting factor [/EWMA_DIV] */ +#define EWMA_DIV 128 + +/* + * Coefficients for moving average with noise filter (period=16), + * scaled by 10 bits + * + * a1 = exp(-pi * sqrt(2) / period) + * coeff2 = 2 * a1 * cos(sqrt(2) * 2 * pi / period) + * coeff3 = -sqr(a1) + * coeff1 = 1 - coeff2 - coeff3 + */ +#define MINSTREL_AVG_COEFF1 (MINSTREL_FRAC(1, 1) - \ + MINSTREL_AVG_COEFF2 - \ + MINSTREL_AVG_COEFF3) +#define MINSTREL_AVG_COEFF2 0x00001499 +#define MINSTREL_AVG_COEFF3 -0x0000092e + /* * The number of streams can be changed to 2 to reduce code * size and memory footprint. @@ -30,6 +57,32 @@ #define MCS_GROUP_RATES 10 +struct minstrel_priv { + struct ieee80211_hw *hw; + bool has_mrr; + bool new_avg; + u32 sample_switch; + unsigned int cw_min; + unsigned int cw_max; + unsigned int max_retry; + unsigned int segment_size; + unsigned int update_interval; + + u8 cck_rates[4]; + u8 ofdm_rates[NUM_NL80211_BANDS][8]; + +#ifdef CONFIG_MAC80211_DEBUGFS + /* + * enable fixed rate processing per RC + * - write static index to debugfs:ieee80211/phyX/rc/fixed_rate_idx + * - write -1 to enable RC processing again + * - setting will be applied on next update + */ + u32 fixed_rate_idx; +#endif +}; + + struct mcs_group { u16 flags; u8 streams; @@ -42,6 +95,26 @@ extern const s16 minstrel_cck_bitrates[4]; extern const s16 minstrel_ofdm_bitrates[8]; extern const struct mcs_group minstrel_mcs_groups[]; +struct minstrel_rate_stats { + /* current / last sampling period attempts/success counters */ + u16 attempts, last_attempts; + u16 success, last_success; + + /* total attempts/success counters */ + u32 att_hist, succ_hist; + + /* prob_avg - moving average of prob */ + u16 prob_avg; + u16 prob_avg_1; + + /* maximum retry counts */ + u8 retry_count; + u8 retry_count_rtscts; + + u8 sample_skipped; + bool retry_updated; +}; + struct minstrel_mcs_group_data { u8 index; u8 column; @@ -111,12 +184,6 @@ struct minstrel_ht_sta { struct minstrel_mcs_group_data groups[MINSTREL_GROUPS_NB]; }; -struct minstrel_ht_sta_priv { - struct minstrel_ht_sta ht; - void *ratelist; - void *sample_table; -}; - void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, int prob_avg); diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index 75ecc3318b500..3b7af242cde67 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -9,9 +9,13 @@ #include #include #include -#include "rc80211_minstrel.h" #include "rc80211_minstrel_ht.h" +struct minstrel_debugfs_info { + size_t len; + char buf[]; +}; + static ssize_t minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { @@ -127,8 +131,7 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) static int minstrel_ht_stats_open(struct inode *inode, struct file *file) { - struct minstrel_ht_sta_priv *msp = inode->i_private; - struct minstrel_ht_sta *mi = &msp->ht; + struct minstrel_ht_sta *mi = inode->i_private; struct minstrel_debugfs_info *ms; unsigned int i; char *p; @@ -276,8 +279,7 @@ minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p) static int minstrel_ht_stats_csv_open(struct inode *inode, struct file *file) { - struct minstrel_ht_sta_priv *msp = inode->i_private; - struct minstrel_ht_sta *mi = &msp->ht; + struct minstrel_ht_sta *mi = inode->i_private; struct minstrel_debugfs_info *ms; unsigned int i; char *p; @@ -313,10 +315,8 @@ static const struct file_operations minstrel_ht_stat_csv_fops = { void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir) { - struct minstrel_ht_sta_priv *msp = priv_sta; - - debugfs_create_file("rc_stats", 0444, dir, msp, + debugfs_create_file("rc_stats", 0444, dir, priv_sta, &minstrel_ht_stat_fops); - debugfs_create_file("rc_stats_csv", 0444, dir, msp, + debugfs_create_file("rc_stats_csv", 0444, dir, priv_sta, &minstrel_ht_stat_csv_fops); } -- GitLab From eeafcb0c80c81d6f569fc72630f573ea56112f2b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 15 Jan 2021 13:02:37 +0100 Subject: [PATCH 1237/2012] mac80211: minstrel_ht: remove old ewma based rate average code The new noise filter has been the default for a while now with no reported downside and significant improvement compared to the old code. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210115120242.89616-5-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 31 ++++++++---------------------- net/mac80211/rc80211_minstrel_ht.h | 1 - 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 1fc3e836fe95d..9280461ed3cc0 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -769,17 +769,8 @@ minstrel_ht_calc_rate_stats(struct minstrel_priv *mp, if (unlikely(mrs->attempts > 0)) { mrs->sample_skipped = 0; cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts); - if (mp->new_avg) { - minstrel_filter_avg_add(&mrs->prob_avg, - &mrs->prob_avg_1, cur_prob); - } else if (unlikely(!mrs->att_hist)) { - mrs->prob_avg = cur_prob; - } else { - /*update exponential weighted moving avarage */ - mrs->prob_avg = minstrel_ewma(mrs->prob_avg, - cur_prob, - EWMA_LEVEL); - } + minstrel_filter_avg_add(&mrs->prob_avg, + &mrs->prob_avg_1, cur_prob); mrs->att_hist += mrs->attempts; mrs->succ_hist += mrs->success; } else { @@ -913,10 +904,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, /* Try to increase robustness of max_prob_rate*/ minstrel_ht_prob_rate_reduce_streams(mi); - /* try to sample all available rates during each interval */ - mi->sample_count *= 8; - if (mp->new_avg) - mi->sample_count /= 2; + /* try to sample half of all available rates during each interval */ + mi->sample_count *= 4; if (sample) minstrel_ht_rate_sample_switch(mp, mi); @@ -1040,7 +1029,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_tx_rate *ar = info->status.rates; struct minstrel_rate_stats *rate, *rate2, *rate_sample = NULL; struct minstrel_priv *mp = priv; - u32 update_interval = mp->update_interval / 2; + u32 update_interval = mp->update_interval; bool last, update = false; bool sample_status = false; int i; @@ -1090,9 +1079,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, switch (mi->sample_mode) { case MINSTREL_SAMPLE_IDLE: - if (mp->new_avg && - (mp->hw->max_rates > 1 || - mi->total_packets_cur < SAMPLE_SWITCH_THR)) + if (mp->hw->max_rates > 1 || + mi->total_packets_cur < SAMPLE_SWITCH_THR) update_interval /= 2; break; @@ -1832,8 +1820,7 @@ minstrel_ht_alloc(struct ieee80211_hw *hw) mp->has_mrr = true; mp->hw = hw; - mp->update_interval = HZ / 10; - mp->new_avg = true; + mp->update_interval = HZ / 20; minstrel_ht_init_cck_rates(mp); for (i = 0; i < ARRAY_SIZE(mp->hw->wiphy->bands); i++) @@ -1853,8 +1840,6 @@ static void minstrel_ht_add_debugfs(struct ieee80211_hw *hw, void *priv, &mp->fixed_rate_idx); debugfs_create_u32("sample_switch", S_IRUGO | S_IWUSR, debugfsdir, &mp->sample_switch); - debugfs_create_bool("new_avg", S_IRUGO | S_IWUSR, debugfsdir, - &mp->new_avg); } #endif diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index 25c3664bddde6..7d6d0b720f6d0 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -60,7 +60,6 @@ struct minstrel_priv { struct ieee80211_hw *hw; bool has_mrr; - bool new_avg; u32 sample_switch; unsigned int cw_min; unsigned int cw_max; -- GitLab From 1ae8bba9a23b6cc9efbd9b4ca485ed057d5639a6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 15 Jan 2021 13:02:38 +0100 Subject: [PATCH 1238/2012] mac80211: minstrel_ht: improve ampdu length estimation If the driver does not report A-MPDU length, estimate it based on the rate. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210115120242.89616-6-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 38 +++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 9280461ed3cc0..8b2376898ec8d 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -382,13 +382,37 @@ minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index) return &mi->groups[index / MCS_GROUP_RATES].rates[index % MCS_GROUP_RATES]; } +static inline int minstrel_get_duration(int index) +{ + const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; + unsigned int duration = group->duration[index % MCS_GROUP_RATES]; + + return duration << group->shift; +} + static unsigned int minstrel_ht_avg_ampdu_len(struct minstrel_ht_sta *mi) { - if (!mi->avg_ampdu_len) - return AVG_AMPDU_SIZE; + int duration; + + if (mi->avg_ampdu_len) + return MINSTREL_TRUNC(mi->avg_ampdu_len); + + if (minstrel_ht_is_legacy_group(mi->max_tp_rate[0] / MCS_GROUP_RATES)) + return 1; + + duration = minstrel_get_duration(mi->max_tp_rate[0]); - return MINSTREL_TRUNC(mi->avg_ampdu_len); + if (duration > 400 * 1000) + return 2; + + if (duration > 250 * 1000) + return 4; + + if (duration > 150 * 1000) + return 8; + + return 16; } /* @@ -588,14 +612,6 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) } } -static inline int -minstrel_get_duration(int index) -{ - const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; - unsigned int duration = group->duration[index % MCS_GROUP_RATES]; - return duration << group->shift; -} - static bool minstrel_ht_probe_group(struct minstrel_ht_sta *mi, const struct mcs_group *tp_group, int tp_idx, const struct mcs_group *group) -- GitLab From 019c6fc2782998fbaa0bdca578e26f7823a11cf2 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 15 Jan 2021 13:02:39 +0100 Subject: [PATCH 1239/2012] mac80211: minstrel_ht: improve sample rate selection Always allow sampling of rates faster than the primary max throughput rate. When the second max_tp_rate is higher than the first one, sample attempts were previously skipped, potentially causing rate control to get stuck at a slightly lower rate Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210115120242.89616-7-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 8b2376898ec8d..aa71a58d4431f 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1379,13 +1379,13 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) mrs = &mg->rates[sample_idx]; sample_idx += sample_group * MCS_GROUP_RATES; - /* Set tp_rate1, tp_rate2 to the highest / second highest max_tp_rate */ + tp_rate1 = mi->max_tp_rate[0]; + + /* Set tp_rate2 to the second highest max_tp_rate */ if (minstrel_get_duration(mi->max_tp_rate[0]) > minstrel_get_duration(mi->max_tp_rate[1])) { - tp_rate1 = mi->max_tp_rate[1]; tp_rate2 = mi->max_tp_rate[0]; } else { - tp_rate1 = mi->max_tp_rate[0]; tp_rate2 = mi->max_tp_rate[1]; } -- GitLab From a7fca4e4037f7e3fa84d4532ea0fd8b00c39c7a2 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 15 Jan 2021 13:02:40 +0100 Subject: [PATCH 1240/2012] mac80211: minstrel_ht: fix max probability rate selection - do not select rates faster than the max throughput rate if probability is lower - reset previous rate before sorting again This ensures that the max prob rate gets set to a more reliable rate Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210115120242.89616-8-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 47 ++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index aa71a58d4431f..b1a06ecca34d3 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -495,12 +495,13 @@ minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u16 index, * Find and set the topmost probability rate per sta and per group */ static void -minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index) +minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 *dest, u16 index) { struct minstrel_mcs_group_data *mg; struct minstrel_rate_stats *mrs; int tmp_group, tmp_idx, tmp_tp_avg, tmp_prob; - int max_tp_group, cur_tp_avg, cur_group, cur_idx; + int max_tp_group, max_tp_idx, max_tp_prob; + int cur_tp_avg, cur_group, cur_idx; int max_gpr_group, max_gpr_idx; int max_gpr_tp_avg, max_gpr_prob; @@ -509,18 +510,26 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index) mg = &mi->groups[index / MCS_GROUP_RATES]; mrs = &mg->rates[index % MCS_GROUP_RATES]; - tmp_group = mi->max_prob_rate / MCS_GROUP_RATES; - tmp_idx = mi->max_prob_rate % MCS_GROUP_RATES; + tmp_group = *dest / MCS_GROUP_RATES; + tmp_idx = *dest % MCS_GROUP_RATES; tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg; tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); /* if max_tp_rate[0] is from MCS_GROUP max_prob_rate get selected from * MCS_GROUP as well as CCK_GROUP rates do not allow aggregation */ max_tp_group = mi->max_tp_rate[0] / MCS_GROUP_RATES; + max_tp_idx = mi->max_tp_rate[0] % MCS_GROUP_RATES; + max_tp_prob = mi->groups[max_tp_group].rates[max_tp_idx].prob_avg; + if (minstrel_ht_is_legacy_group(index / MCS_GROUP_RATES) && !minstrel_ht_is_legacy_group(max_tp_group)) return; + /* skip rates faster than max tp rate with lower prob */ + if (minstrel_get_duration(mi->max_tp_rate[0]) > minstrel_get_duration(index) && + mrs->prob_avg < max_tp_prob) + return; + max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES; max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES; max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_avg; @@ -538,7 +547,7 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index) mg->max_group_prob_rate = index; } else { if (mrs->prob_avg > tmp_prob) - mi->max_prob_rate = index; + *dest = index; if (mrs->prob_avg > max_gpr_prob) mg->max_group_prob_rate = index; } @@ -816,7 +825,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct minstrel_rate_stats *mrs; int group, i, j, cur_prob; u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES]; - u16 tmp_legacy_tp_rate[MAX_THR_RATES], index; + u16 tmp_legacy_tp_rate[MAX_THR_RATES], tmp_max_prob_rate; + u16 index; bool ht_supported = mi->sta->ht_cap.ht_supported; mi->sample_mode = MINSTREL_SAMPLE_IDLE; @@ -863,6 +873,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, else index = MINSTREL_OFDM_GROUP * MCS_GROUP_RATES; + tmp_max_prob_rate = index; for (j = 0; j < ARRAY_SIZE(tmp_mcs_tp_rate); j++) tmp_mcs_tp_rate[j] = index; @@ -903,9 +914,6 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, /* Find max throughput rate set within a group */ minstrel_ht_sort_best_tp_rates(mi, index, tmp_group_tp_rate); - - /* Find max probability rate per group and global */ - minstrel_ht_set_best_prob_rate(mi, index); } memcpy(mg->max_group_tp_rate, tmp_group_tp_rate, @@ -917,6 +925,27 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, tmp_legacy_tp_rate); memcpy(mi->max_tp_rate, tmp_mcs_tp_rate, sizeof(mi->max_tp_rate)); + for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { + if (!mi->supported[group]) + continue; + + mg = &mi->groups[group]; + mg->max_group_prob_rate = MCS_GROUP_RATES * group; + + for (i = 0; i < MCS_GROUP_RATES; i++) { + if (!(mi->supported[group] & BIT(i))) + continue; + + index = MCS_GROUP_RATES * group + i; + + /* Find max probability rate per group and global */ + minstrel_ht_set_best_prob_rate(mi, &tmp_max_prob_rate, + index); + } + } + + mi->max_prob_rate = tmp_max_prob_rate; + /* Try to increase robustness of max_prob_rate*/ minstrel_ht_prob_rate_reduce_streams(mi); -- GitLab From 7e2123abc51648c508c8e6a10e44ab6d2db6f0ec Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 15 Jan 2021 13:02:41 +0100 Subject: [PATCH 1241/2012] mac80211: minstrel_ht: increase stats update interval The shorter interval was leading to too many frames being used for probing Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210115120242.89616-9-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index b1a06ecca34d3..782b4668a3da0 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1865,7 +1865,7 @@ minstrel_ht_alloc(struct ieee80211_hw *hw) mp->has_mrr = true; mp->hw = hw; - mp->update_interval = HZ / 20; + mp->update_interval = HZ / 10; minstrel_ht_init_cck_rates(mp); for (i = 0; i < ARRAY_SIZE(mp->hw->wiphy->bands); i++) -- GitLab From 347c2989a8ba8a231f2ffc0635f2f36fedd30bde Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 15 Jan 2021 13:02:42 +0100 Subject: [PATCH 1242/2012] mac80211: minstrel_ht: fix rounding error in throughput calculation On lower data rates, the throughput calculation has a significant rounding error, causing rates like 48M and 54M OFDM to share the same throughput value with >= 90% success probablity. This is because the result of the division (prob_avg * 1000) / nsecs is really small (8 in this example). Improve accuracy by moving over some zeroes, making better use of the full range of u32 before the division. Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210115120242.89616-10-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 782b4668a3da0..e35948f4e1bf9 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -445,10 +445,9 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, * (prob is scaled - see MINSTREL_FRAC above) */ if (prob_avg > MINSTREL_FRAC(90, 100)) - return MINSTREL_TRUNC(100000 * ((MINSTREL_FRAC(90, 100) * 1000) - / nsecs)); - else - return MINSTREL_TRUNC(100000 * ((prob_avg * 1000) / nsecs)); + prob_avg = MINSTREL_FRAC(90, 100); + + return MINSTREL_TRUNC(100 * ((prob_avg * 1000000) / nsecs)); } /* -- GitLab From a4166340a6e4d501c9e3aee81c20a269726ecde0 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 19 Jan 2021 23:41:23 -0300 Subject: [PATCH 1243/2012] Revert "i2c: imx: Remove unused .id_table support" Coldfire platforms are non-DT users of this driver, so keep the .id_table support. This reverts commit c610199cd392e6e2d41811ef83d85355c1b862b3. Fixes: c610199cd392 (i2c: imx: Remove unused .id_table support") Reported-by: Sascha Hauer Signed-off-by: Fabio Estevam Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-imx.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index b444fbf1a2625..a8e8af57e33f4 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -241,6 +241,19 @@ static struct imx_i2c_hwdata vf610_i2c_hwdata = { }; +static const struct platform_device_id imx_i2c_devtype[] = { + { + .name = "imx1-i2c", + .driver_data = (kernel_ulong_t)&imx1_i2c_hwdata, + }, { + .name = "imx21-i2c", + .driver_data = (kernel_ulong_t)&imx21_i2c_hwdata, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(platform, imx_i2c_devtype); + static const struct of_device_id i2c_imx_dt_ids[] = { { .compatible = "fsl,imx1-i2c", .data = &imx1_i2c_hwdata, }, { .compatible = "fsl,imx21-i2c", .data = &imx21_i2c_hwdata, }, @@ -1330,7 +1343,11 @@ static int i2c_imx_probe(struct platform_device *pdev) return -ENOMEM; match = device_get_match_data(&pdev->dev); - i2c_imx->hwdata = match; + if (match) + i2c_imx->hwdata = match; + else + i2c_imx->hwdata = (struct imx_i2c_hwdata *) + platform_get_device_id(pdev)->driver_data; /* Setup i2c_imx driver structure */ strlcpy(i2c_imx->adapter.name, pdev->name, sizeof(i2c_imx->adapter.name)); @@ -1498,6 +1515,7 @@ static struct platform_driver i2c_imx_driver = { .of_match_table = i2c_imx_dt_ids, .acpi_match_table = i2c_imx_acpi_ids, }, + .id_table = imx_i2c_devtype, }; static int __init i2c_adap_imx_init(void) -- GitLab From 5de3b9430221b11a5e1fc2f5687af80777c8392a Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Fri, 22 Jan 2021 13:47:06 +0800 Subject: [PATCH 1244/2012] ALSA: hda/realtek: Enable headset of ASUS B1400CEPE with ALC256 ASUS B1400CEPE laptop's headset audio is not enabled until ALC256_FIXUP_ASUS_HPE quirk is applied. Here is the original pin node values: 0x12 0x40000000 0x13 0x411111f0 0x14 0x90170110 0x18 0x411111f0 0x19 0x411111f0 0x1a 0x411111f0 0x1b 0x411111f0 0x1d 0x40461b45 0x1e 0x411111f0 0x21 0x04211020 Signed-off-by: Jian-Hong Pan Cc: Link: https://lore.kernel.org/r/20210122054705.48804-1-jhp@endlessos.org Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ed5b6b894dc19..290645516313c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8006,6 +8006,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x18f1, "Asus FX505DT", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x194e, "ASUS UX563FD", ALC294_FIXUP_ASUS_HPE), + SND_PCI_QUIRK(0x1043, 0x1982, "ASUS B1400CEPE", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x19ce, "ASUS B9450FA", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), -- GitLab From 9ecd1d2b302b600351fac50779f43fcb680c1a16 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 17 Jan 2021 12:43:13 +0100 Subject: [PATCH 1245/2012] i2c: sprd: depend on COMMON_CLK to fix compile tests The I2C_SPRD uses Common Clock Framework thus it cannot be built on platforms without it (e.g. compile test on MIPS with LANTIQ): /usr/bin/mips-linux-gnu-ld: drivers/i2c/busses/i2c-sprd.o: in function `sprd_i2c_probe': i2c-sprd.c:(.text.sprd_i2c_probe+0x254): undefined reference to `clk_set_parent' Fixes: 4a2d5f663dab ("i2c: Enable compile testing for more drivers") Reported-by: kernel test robot Signed-off-by: Krzysztof Kozlowski Reviewed-by: Baolin Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index d4d60ad0eda0b..ab1f39ac39f4f 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -1013,6 +1013,7 @@ config I2C_SIRF config I2C_SPRD tristate "Spreadtrum I2C interface" depends on I2C=y && (ARCH_SPRD || COMPILE_TEST) + depends on COMMON_CLK help If you say yes to this option, support will be included for the Spreadtrum I2C interface. -- GitLab From 17749851eb9ca2298e7c3b81aae4228961b36f28 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 21 Jan 2021 10:04:27 -0800 Subject: [PATCH 1246/2012] tty: fix up hung_up_tty_write() conversion In commit "tty: implement write_iter", I left the write_iter conversion of the hung up tty case alone, because I incorrectly thought it didn't matter. Jiri showed me the errors of my ways, and pointed out the problems with that incomplete conversion. Fix it all up. Reported-by: Jiri Slaby Signed-off-by: Linus Torvalds Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/CAHk-=wh+-rGsa=xruEWdg_fJViFG8rN9bpLrfLz=_yBYh2tBhA@mail.gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 502862626b2bc..4a208a95e9219 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -437,8 +437,7 @@ static ssize_t hung_up_tty_read(struct file *file, char __user *buf, return 0; } -static ssize_t hung_up_tty_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t hung_up_tty_write(struct kiocb *iocb, struct iov_iter *from) { return -EIO; } @@ -504,7 +503,7 @@ static const struct file_operations console_fops = { static const struct file_operations hung_up_tty_fops = { .llseek = no_llseek, .read = hung_up_tty_read, - .write = hung_up_tty_write, + .write_iter = hung_up_tty_write, .poll = hung_up_tty_poll, .unlocked_ioctl = hung_up_tty_ioctl, .compat_ioctl = hung_up_tty_compat_ioctl, @@ -1044,7 +1043,9 @@ static ssize_t tty_write(struct kiocb *iocb, struct iov_iter *from) if (tty->ops->write_room == NULL) tty_err(tty, "missing write_room method\n"); ld = tty_ldisc_ref_wait(tty); - if (!ld || !ld->ops->write) + if (!ld) + return hung_up_tty_write(iocb, from); + if (!ld->ops->write) ret = -EIO; else ret = do_tty_write(ld->ops->write, tty, file, from); -- GitLab From 31b081066e9c8f4a931a3d20dc0c6ca63c595c44 Mon Sep 17 00:00:00 2001 From: Ricky Wu Date: Fri, 22 Jan 2021 16:19:06 +0800 Subject: [PATCH 1247/2012] misc: rtsx: init value of aspm_enabled make sure ASPM state sync with pcr->aspm_enabled init value pcr->aspm_enabled Cc: stable@vger.kernel.org Signed-off-by: Ricky Wu Link: https://lore.kernel.org/r/20210122081906.19100-1-ricky_wu@realtek.com Fixes: d928061c3143 ("misc: rtsx: modify en/disable aspm function") Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cardreader/rtsx_pcr.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c index 2aa6648fa41f9..5a491d2cd1ae6 100644 --- a/drivers/misc/cardreader/rtsx_pcr.c +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -1512,6 +1512,7 @@ static int rtsx_pci_probe(struct pci_dev *pcidev, struct pcr_handle *handle; u32 base, len; int ret, i, bar = 0; + u8 val; dev_dbg(&(pcidev->dev), ": Realtek PCI-E Card Reader found at %s [%04x:%04x] (rev %x)\n", @@ -1577,7 +1578,11 @@ static int rtsx_pci_probe(struct pci_dev *pcidev, pcr->host_cmds_addr = pcr->rtsx_resv_buf_addr; pcr->host_sg_tbl_ptr = pcr->rtsx_resv_buf + HOST_CMDS_BUF_LEN; pcr->host_sg_tbl_addr = pcr->rtsx_resv_buf_addr + HOST_CMDS_BUF_LEN; - + rtsx_pci_read_register(pcr, ASPM_FORCE_CTL, &val); + if (val & FORCE_ASPM_CTL0 && val & FORCE_ASPM_CTL1) + pcr->aspm_enabled = false; + else + pcr->aspm_enabled = true; pcr->card_inserted = 0; pcr->card_removed = 0; INIT_DELAYED_WORK(&pcr->carddet_work, rtsx_pci_card_detect); -- GitLab From 14a71d509ac809dcf56d7e3ca376b15d17bd0ddd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 22 Jan 2021 13:20:42 +0000 Subject: [PATCH 1248/2012] regulator: Fix lockdep warning resolving supplies With commit eaa7995c529b54 (regulator: core: avoid regulator_resolve_supply() race condition) we started holding the rdev lock while resolving supplies, an operation that requires holding the regulator_list_mutex. This results in lockdep warnings since in other places we take the list mutex then the mutex on an individual rdev. Since the goal is to make sure that we don't call set_supply() twice rather than a concern about the cost of resolution pull the rdev lock and check for duplicate resolution down to immediately before we do the set_supply() and drop it again once the allocation is done. Fixes: eaa7995c529b54 (regulator: core: avoid regulator_resolve_supply() race condition) Reported-by: Marek Szyprowski Tested-by: Marek Szyprowski Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210122132042.10306-1-broonie@kernel.org Signed-off-by: Mark Brown --- drivers/regulator/core.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index cfc3bc9df93a0..67a768fe5b2a3 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1823,17 +1823,6 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) if (rdev->supply) return 0; - /* - * Recheck rdev->supply with rdev->mutex lock held to avoid a race - * between rdev->supply null check and setting rdev->supply in - * set_supply() from concurrent tasks. - */ - regulator_lock(rdev); - - /* Supply just resolved by a concurrent task? */ - if (rdev->supply) - goto out; - r = regulator_dev_lookup(dev, rdev->supply_name); if (IS_ERR(r)) { ret = PTR_ERR(r); @@ -1885,12 +1874,29 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) goto out; } + /* + * Recheck rdev->supply with rdev->mutex lock held to avoid a race + * between rdev->supply null check and setting rdev->supply in + * set_supply() from concurrent tasks. + */ + regulator_lock(rdev); + + /* Supply just resolved by a concurrent task? */ + if (rdev->supply) { + regulator_unlock(rdev); + put_device(&r->dev); + goto out; + } + ret = set_supply(rdev, r); if (ret < 0) { + regulator_unlock(rdev); put_device(&r->dev); goto out; } + regulator_unlock(rdev); + /* * In set_machine_constraints() we may have turned this regulator on * but we couldn't propagate to the supply if it hadn't been resolved @@ -1906,7 +1912,6 @@ static int regulator_resolve_supply(struct regulator_dev *rdev) } out: - regulator_unlock(rdev); return ret; } -- GitLab From 36c6e17bf16922935a5a0dd073d5b032d34aa73d Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 13 Jan 2021 18:31:41 +0000 Subject: [PATCH 1249/2012] sched/core: Print out straggler tasks in sched_cpu_dying() Since commit 1cf12e08bc4d ("sched/hotplug: Consolidate task migration on CPU unplug") tasks are expected to move themselves out of a out-going CPU. For most tasks this will be done automagically via BALANCE_PUSH, but percpu kthreads will have to cooperate and move themselves away one way or another. Currently, some percpu kthreads (workqueues being a notable exemple) do not cooperate nicely and can end up on an out-going CPU at the time sched_cpu_dying() is invoked. Print the dying rq's tasks to shed some light on the stragglers. Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210113183141.11974-1-valentin.schneider@arm.com --- kernel/sched/core.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 15d2562118d17..627534fa91961 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7574,6 +7574,25 @@ static void calc_load_migrate(struct rq *rq) atomic_long_add(delta, &calc_load_tasks); } +static void dump_rq_tasks(struct rq *rq, const char *loglvl) +{ + struct task_struct *g, *p; + int cpu = cpu_of(rq); + + lockdep_assert_held(&rq->lock); + + printk("%sCPU%d enqueued tasks (%u total):\n", loglvl, cpu, rq->nr_running); + for_each_process_thread(g, p) { + if (task_cpu(p) != cpu) + continue; + + if (!task_on_rq_queued(p)) + continue; + + printk("%s\tpid: %d, name: %s\n", loglvl, p->pid, p->comm); + } +} + int sched_cpu_dying(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); @@ -7583,7 +7602,10 @@ int sched_cpu_dying(unsigned int cpu) sched_tick_stop(cpu); rq_lock_irqsave(rq, &rf); - BUG_ON(rq->nr_running != 1 || rq_has_pinned_tasks(rq)); + if (rq->nr_running != 1 || rq_has_pinned_tasks(rq)) { + WARN(true, "Dying CPU not properly vacated!"); + dump_rq_tasks(rq, KERN_WARNING); + } rq_unlock_irqrestore(rq, &rf); calc_load_migrate(rq); -- GitLab From 547a77d02f8cfb345631ce23b5b548d27afa0fc4 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 11 Jan 2021 23:26:33 +0800 Subject: [PATCH 1250/2012] workqueue: Use cpu_possible_mask instead of cpu_active_mask to break affinity The scheduler won't break affinity for us any more, and we should "emulate" the same behavior when the scheduler breaks affinity for us. The behavior is "changing the cpumask to cpu_possible_mask". And there might be some other CPUs online later while the worker is still running with the pending work items. The worker should be allowed to use the later online CPUs as before and process the work items ASAP. If we use cpu_active_mask here, we can't achieve this goal but using cpu_possible_mask can. Fixes: 06249738a41a ("workqueue: Manually break affinity on hotplug") Signed-off-by: Lai Jiangshan Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Acked-by: Tejun Heo Tested-by: Paul E. McKenney Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210111152638.2417-4-jiangshanlai@gmail.com --- kernel/workqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 9880b6c0e2721..1646331546eb6 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4920,7 +4920,7 @@ static void unbind_workers(int cpu) raw_spin_unlock_irq(&pool->lock); for_each_pool_worker(worker, pool) - WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_active_mask) < 0); + WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0); mutex_unlock(&wq_pool_attach_mutex); -- GitLab From 22f667c97aadbf481e2cae2d6feabdf431e27b31 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 15 Jan 2021 18:17:45 +0100 Subject: [PATCH 1251/2012] sched: Don't run cpu-online with balance_push() enabled We don't need to push away tasks when we come online, mark the push complete right before the CPU dies. XXX hotplug state machine has trouble with rollback here. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210121103506.415606087@infradead.org --- kernel/sched/core.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 627534fa91961..8da0fd7f3cca1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7320,10 +7320,12 @@ static void balance_push_set(int cpu, bool on) struct rq_flags rf; rq_lock_irqsave(rq, &rf); - if (on) + if (on) { + WARN_ON_ONCE(rq->balance_callback); rq->balance_callback = &balance_push_callback; - else + } else if (rq->balance_callback == &balance_push_callback) { rq->balance_callback = NULL; + } rq_unlock_irqrestore(rq, &rf); } @@ -7441,6 +7443,10 @@ int sched_cpu_activate(unsigned int cpu) struct rq *rq = cpu_rq(cpu); struct rq_flags rf; + /* + * Make sure that when the hotplug state machine does a roll-back + * we clear balance_push. Ideally that would happen earlier... + */ balance_push_set(cpu, false); #ifdef CONFIG_SCHED_SMT @@ -7608,6 +7614,12 @@ int sched_cpu_dying(unsigned int cpu) } rq_unlock_irqrestore(rq, &rf); + /* + * Now that the CPU is offline, make sure we're welcome + * to new tasks once we come back up. + */ + balance_push_set(cpu, false); + calc_load_migrate(rq); update_max_interval(); nohz_balance_exit_idle(rq); -- GitLab From ac687e6e8c26181a33270efd1a2e2241377924b0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 Jan 2021 11:24:04 +0100 Subject: [PATCH 1252/2012] kthread: Extract KTHREAD_IS_PER_CPU There is a need to distinguish geniune per-cpu kthreads from kthreads that happen to have a single CPU affinity. Geniune per-cpu kthreads are kthreads that are CPU affine for correctness, these will obviously have PF_KTHREAD set, but must also have PF_NO_SETAFFINITY set, lest userspace modify their affinity and ruins things. However, these two things are not sufficient, PF_NO_SETAFFINITY is also set on other tasks that have their affinities controlled through other means, like for instance workqueues. Therefore another bit is needed; it turns out kthread_create_per_cpu() already has such a bit: KTHREAD_IS_PER_CPU, which is used to make kthread_park()/kthread_unpark() work correctly. Expose this flag and remove the implicit setting of it from kthread_create_on_cpu(); the io_uring usage of it seems dubious at best. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210121103506.557620262@infradead.org --- include/linux/kthread.h | 3 +++ kernel/kthread.c | 27 ++++++++++++++++++++++++++- kernel/smpboot.c | 1 + 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 65b81e0c494d2..2484ed97e72f5 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -33,6 +33,9 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), unsigned int cpu, const char *namefmt); +void kthread_set_per_cpu(struct task_struct *k, int cpu); +bool kthread_is_per_cpu(struct task_struct *k); + /** * kthread_run - create and wake a thread. * @threadfn: the function to run until signal_pending(current). diff --git a/kernel/kthread.c b/kernel/kthread.c index a5eceecd4513c..e0e4a423f184c 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -493,11 +493,36 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), return p; kthread_bind(p, cpu); /* CPU hotplug need to bind once again when unparking the thread. */ - set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags); to_kthread(p)->cpu = cpu; return p; } +void kthread_set_per_cpu(struct task_struct *k, int cpu) +{ + struct kthread *kthread = to_kthread(k); + if (!kthread) + return; + + WARN_ON_ONCE(!(k->flags & PF_NO_SETAFFINITY)); + + if (cpu < 0) { + clear_bit(KTHREAD_IS_PER_CPU, &kthread->flags); + return; + } + + kthread->cpu = cpu; + set_bit(KTHREAD_IS_PER_CPU, &kthread->flags); +} + +bool kthread_is_per_cpu(struct task_struct *k) +{ + struct kthread *kthread = to_kthread(k); + if (!kthread) + return false; + + return test_bit(KTHREAD_IS_PER_CPU, &kthread->flags); +} + /** * kthread_unpark - unpark a thread created by kthread_create(). * @k: thread created by kthread_create(). diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 2efe1e206167c..f25208e8df836 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -188,6 +188,7 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) kfree(td); return PTR_ERR(tsk); } + kthread_set_per_cpu(tsk, cpu); /* * Park the thread so that it could start right on the CPU * when it is available. -- GitLab From 5c25b5ff89f004c30b04759dc34ace8585a4085f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 Jan 2021 11:26:49 +0100 Subject: [PATCH 1253/2012] workqueue: Tag bound workers with KTHREAD_IS_PER_CPU Mark the per-cpu workqueue workers as KTHREAD_IS_PER_CPU. Workqueues have unfortunate semantics in that per-cpu workers are not default flushed and parked during hotplug, however a subset does manual flush on hotplug and hard relies on them for correctness. Therefore play silly games.. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210121103506.693465814@infradead.org --- kernel/workqueue.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1646331546eb6..cce34336fbd7a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1861,6 +1861,8 @@ static void worker_attach_to_pool(struct worker *worker, */ if (pool->flags & POOL_DISASSOCIATED) worker->flags |= WORKER_UNBOUND; + else + kthread_set_per_cpu(worker->task, pool->cpu); list_add_tail(&worker->node, &pool->workers); worker->pool = pool; @@ -1883,6 +1885,7 @@ static void worker_detach_from_pool(struct worker *worker) mutex_lock(&wq_pool_attach_mutex); + kthread_set_per_cpu(worker->task, -1); list_del(&worker->node); worker->pool = NULL; @@ -4919,8 +4922,10 @@ static void unbind_workers(int cpu) raw_spin_unlock_irq(&pool->lock); - for_each_pool_worker(worker, pool) + for_each_pool_worker(worker, pool) { + kthread_set_per_cpu(worker->task, -1); WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0); + } mutex_unlock(&wq_pool_attach_mutex); @@ -4972,9 +4977,11 @@ static void rebind_workers(struct worker_pool *pool) * of all workers first and then clear UNBOUND. As we're called * from CPU_ONLINE, the following shouldn't fail. */ - for_each_pool_worker(worker, pool) + for_each_pool_worker(worker, pool) { + kthread_set_per_cpu(worker->task, pool->cpu); WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask) < 0); + } raw_spin_lock_irq(&pool->lock); -- GitLab From 640f17c82460e9724fd256f0a1f5d99e7ff0bda4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 15 Jan 2021 19:08:36 +0100 Subject: [PATCH 1254/2012] workqueue: Restrict affinity change to rescuer create_worker() will already set the right affinity using kthread_bind_mask(), this means only the rescuer will need to change it's affinity. Howveer, while in cpu-hot-unplug a regular task is not allowed to run on online&&!active as it would be pushed away quite agressively. We need KTHREAD_IS_PER_CPU to survive in that environment. Therefore set the affinity after getting that magic flag. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210121103506.826629830@infradead.org --- kernel/workqueue.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index cce34336fbd7a..894bb885b40b1 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1848,12 +1848,6 @@ static void worker_attach_to_pool(struct worker *worker, { mutex_lock(&wq_pool_attach_mutex); - /* - * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any - * online CPUs. It'll be re-applied when any of the CPUs come up. - */ - set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); - /* * The wq_pool_attach_mutex ensures %POOL_DISASSOCIATED remains * stable across this function. See the comments above the flag @@ -1864,6 +1858,9 @@ static void worker_attach_to_pool(struct worker *worker, else kthread_set_per_cpu(worker->task, pool->cpu); + if (worker->rescue_wq) + set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); + list_add_tail(&worker->node, &pool->workers); worker->pool = pool; -- GitLab From 975707f227b07a8212060f94447171d15d7a681b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 20 Jan 2021 15:05:41 +0100 Subject: [PATCH 1255/2012] sched: Prepare to use balance_push in ttwu() In preparation of using the balance_push state in ttwu() we need it to provide a reliable and consistent state. The immediate problem is that rq->balance_callback gets cleared every schedule() and then re-set in the balance_push_callback() itself. This is not a reliable signal, so add a variable that stays set during the entire time. Also move setting it before the synchronize_rcu() in sched_cpu_deactivate(), such that we get guaranteed visibility to ttwu(), which is a preempt-disable region. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210121103506.966069627@infradead.org --- kernel/sched/core.c | 11 ++++++----- kernel/sched/sched.h | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8da0fd7f3cca1..16946b55e8e78 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7320,6 +7320,7 @@ static void balance_push_set(int cpu, bool on) struct rq_flags rf; rq_lock_irqsave(rq, &rf); + rq->balance_push = on; if (on) { WARN_ON_ONCE(rq->balance_callback); rq->balance_callback = &balance_push_callback; @@ -7489,17 +7490,17 @@ int sched_cpu_deactivate(unsigned int cpu) int ret; set_cpu_active(cpu, false); + balance_push_set(cpu, true); + /* - * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU - * users of this state to go away such that all new such users will - * observe it. + * We've cleared cpu_active_mask / set balance_push, wait for all + * preempt-disabled and RCU users of this state to go away such that + * all new such users will observe it. * * Do sync before park smpboot threads to take care the rcu boost case. */ synchronize_rcu(); - balance_push_set(cpu, true); - rq_lock_irqsave(rq, &rf); if (rq->rd) { update_rq_clock(rq); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 12ada79d40f33..bb09988451a04 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -975,6 +975,7 @@ struct rq { unsigned long cpu_capacity_orig; struct callback_head *balance_callback; + unsigned char balance_push; unsigned char nohz_idle_balance; unsigned char idle_balance; -- GitLab From 5ba2ffba13a1e24e7b153683e97300f9cc6f605a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 Jan 2021 11:28:16 +0100 Subject: [PATCH 1256/2012] sched: Fix CPU hotplug / tighten is_per_cpu_kthread() Prior to commit 1cf12e08bc4d ("sched/hotplug: Consolidate task migration on CPU unplug") we'd leave any task on the dying CPU and break affinity and force them off at the very end. This scheme had to change in order to enable migrate_disable(). One cannot wait for migrate_disable() to complete while stuck in stop_machine(). Furthermore, since we need at the very least: idle, hotplug and stop threads at any point before stop_machine, we can't break affinity and/or push those away. Under the assumption that all per-cpu kthreads are sanely handled by CPU hotplug, the new code no long breaks affinity or migrates any of them (which then includes the critical ones above). However, there's an important difference between per-cpu kthreads and kthreads that happen to have a single CPU affinity which is lost. The latter class very much relies on the forced affinity breaking and migration semantics previously provided. Use the new kthread_is_per_cpu() infrastructure to tighten is_per_cpu_kthread() and fix the hot-unplug problems stemming from the change. Fixes: 1cf12e08bc4d ("sched/hotplug: Consolidate task migration on CPU unplug") Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210121103507.102416009@infradead.org --- kernel/sched/core.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 16946b55e8e78..56b09628692aa 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1796,13 +1796,28 @@ static inline bool rq_has_pinned_tasks(struct rq *rq) */ static inline bool is_cpu_allowed(struct task_struct *p, int cpu) { + /* When not in the task's cpumask, no point in looking further. */ if (!cpumask_test_cpu(cpu, p->cpus_ptr)) return false; - if (is_per_cpu_kthread(p) || is_migration_disabled(p)) + /* migrate_disabled() must be allowed to finish. */ + if (is_migration_disabled(p)) return cpu_online(cpu); - return cpu_active(cpu); + /* Non kernel threads are not allowed during either online or offline. */ + if (!(p->flags & PF_KTHREAD)) + return cpu_active(cpu); + + /* KTHREAD_IS_PER_CPU is always allowed. */ + if (kthread_is_per_cpu(p)) + return cpu_online(cpu); + + /* Regular kernel threads don't get to stay during offline. */ + if (cpu_rq(cpu)->balance_push) + return false; + + /* But are allowed during online. */ + return cpu_online(cpu); } /* @@ -3121,6 +3136,13 @@ bool cpus_share_cache(int this_cpu, int that_cpu) static inline bool ttwu_queue_cond(int cpu, int wake_flags) { + /* + * Do not complicate things with the async wake_list while the CPU is + * in hotplug state. + */ + if (!cpu_active(cpu)) + return false; + /* * If the CPU does not share cache, then queue the task on the * remote rqs wakelist to avoid accessing remote data. @@ -7276,8 +7298,14 @@ static void balance_push(struct rq *rq) /* * Both the cpu-hotplug and stop task are in this case and are * required to complete the hotplug process. + * + * XXX: the idle task does not match kthread_is_per_cpu() due to + * histerical raisins. */ - if (is_per_cpu_kthread(push_task) || is_migration_disabled(push_task)) { + if (rq->idle == push_task || + ((push_task->flags & PF_KTHREAD) && kthread_is_per_cpu(push_task)) || + is_migration_disabled(push_task)) { + /* * If this is the idle task on the outgoing CPU try to wake * up the hotplug control thread which might wait for the @@ -7309,7 +7337,7 @@ static void balance_push(struct rq *rq) /* * At this point need_resched() is true and we'll take the loop in * schedule(). The next pick is obviously going to be the stop task - * which is_per_cpu_kthread() and will push this task away. + * which kthread_is_per_cpu() and will push this task away. */ raw_spin_lock(&rq->lock); } @@ -7497,6 +7525,9 @@ int sched_cpu_deactivate(unsigned int cpu) * preempt-disabled and RCU users of this state to go away such that * all new such users will observe it. * + * Specifically, we rely on ttwu to no longer target this CPU, see + * ttwu_queue_cond() and is_cpu_allowed(). + * * Do sync before park smpboot threads to take care the rcu boost case. */ synchronize_rcu(); -- GitLab From 741ba80f6f9a4702089c122129f22df9774b3e64 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 16 Jan 2021 11:56:37 +0100 Subject: [PATCH 1257/2012] sched: Relax the set_cpus_allowed_ptr() semantics Now that we have KTHREAD_IS_PER_CPU to denote the critical per-cpu tasks to retain during CPU offline, we can relax the warning in set_cpus_allowed_ptr(). Any spurious kthread that wants to get on at the last minute will get pushed off before it can run. While during CPU online there is no harm, and actual benefit, to allowing kthreads back on early, it simplifies hotplug code and fixes a number of outstanding races. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Lai jiangshan Reviewed-by: Valentin Schneider Tested-by: Valentin Schneider Link: https://lkml.kernel.org/r/20210121103507.240724591@infradead.org --- kernel/sched/core.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 56b09628692aa..ff74fca39ed21 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2342,7 +2342,9 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, if (p->flags & PF_KTHREAD || is_migration_disabled(p)) { /* - * Kernel threads are allowed on online && !active CPUs. + * Kernel threads are allowed on online && !active CPUs, + * however, during cpu-hot-unplug, even these might get pushed + * away if not KTHREAD_IS_PER_CPU. * * Specifically, migration_disabled() tasks must not fail the * cpumask_any_and_distribute() pick below, esp. so on @@ -2386,16 +2388,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, __do_set_cpus_allowed(p, new_mask, flags); - if (p->flags & PF_KTHREAD) { - /* - * For kernel threads that do indeed end up on online && - * !active we want to ensure they are strict per-CPU threads. - */ - WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && - !cpumask_intersects(new_mask, cpu_active_mask) && - p->nr_cpus_allowed != 1); - } - return affine_move_task(rq, p, &rf, dest_cpu, flags); out: @@ -7518,6 +7510,13 @@ int sched_cpu_deactivate(unsigned int cpu) int ret; set_cpu_active(cpu, false); + + /* + * From this point forward, this CPU will refuse to run any task that + * is not: migrate_disable() or KTHREAD_IS_PER_CPU, and will actively + * push those tasks away until this gets cleared, see + * sched_cpu_dying(). + */ balance_push_set(cpu, true); /* -- GitLab From 2fe8ef106238b274c505c480ecf00d8765abf0d8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 Jan 2021 16:19:42 +0100 Subject: [PATCH 1258/2012] cfg80211: change netdev registration/unregistration semantics We used to not require anything in terms of registering netdevs with cfg80211, using a netdev notifier instead. However, in the next patch reducing RTNL locking, this causes big problems, and the simplest way is to just require drivers to do things better. Change the registration/unregistration semantics to require the drivers to call cfg80211_(un)register_netdevice() when this is happening due to a cfg80211 request, i.e. add_virtual_intf() or del_virtual_intf() (or if it somehow has to happen in any other cfg80211 callback). Otherwise, in other contexts, drivers may continue to use the normal netdev (un)registration functions as usual. Internally, we still use the netdev notifier and track (by the new wdev->registered bool) if the wdev had already been added to cfg80211 or not. Link: https://lore.kernel.org/r/20210122161942.cf2f4b65e4e9.Ida8234e50da13eb675b557bac52a713ad4eddf71@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 4 +- drivers/net/wireless/ath/wil6210/netdev.c | 4 +- .../broadcom/brcm80211/brcmfmac/core.c | 6 +- .../net/wireless/marvell/mwifiex/cfg80211.c | 4 +- .../wireless/microchip/wilc1000/cfg80211.c | 2 +- drivers/net/wireless/microchip/wilc1000/mon.c | 4 +- .../net/wireless/microchip/wilc1000/netdev.c | 2 +- .../net/wireless/quantenna/qtnfmac/cfg80211.c | 4 +- drivers/net/wireless/quantenna/qtnfmac/core.c | 2 +- include/net/cfg80211.h | 40 ++++-- net/mac80211/iface.c | 9 +- net/wireless/core.c | 119 +++++++++++------- 12 files changed, 123 insertions(+), 77 deletions(-) diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 9c83e9a4299b0..29527e8dcced7 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -3648,7 +3648,7 @@ void ath6kl_cfg80211_vif_cleanup(struct ath6kl_vif *vif) kfree(mc_filter); } - unregister_netdevice(vif->ndev); + cfg80211_unregister_netdevice(vif->ndev); ar->num_vif--; } @@ -3821,7 +3821,7 @@ struct wireless_dev *ath6kl_interface_add(struct ath6kl *ar, const char *name, netdev_set_default_ethtool_ops(ndev, &ath6kl_ethtool_ops); - if (register_netdevice(ndev)) + if (cfg80211_register_netdevice(ndev)) goto err; ar->avail_idx_map &= ~BIT(fw_vif_idx); diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c index 07b4a252a23c9..472fe804203d2 100644 --- a/drivers/net/wireless/ath/wil6210/netdev.c +++ b/drivers/net/wireless/ath/wil6210/netdev.c @@ -424,7 +424,7 @@ int wil_vif_add(struct wil6210_priv *wil, struct wil6210_vif *vif) if (rc) return rc; } - rc = register_netdevice(ndev); + rc = cfg80211_register_netdevice(ndev); if (rc < 0) { dev_err(&ndev->dev, "Failed to register netdev: %d\n", rc); if (any_active && vif->mid != 0) @@ -511,7 +511,7 @@ void wil_vif_remove(struct wil6210_priv *wil, u8 mid) /* during unregister_netdevice cfg80211_leave may perform operations * such as stop AP, disconnect, so we only clear the VIF afterwards */ - unregister_netdevice(ndev); + cfg80211_unregister_netdevice(ndev); if (any_active && vif->mid != 0) wmi_port_delete(wil, vif->mid); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 3dd28f5fef19e..6cf308d5934c9 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -657,7 +657,7 @@ int brcmf_net_attach(struct brcmf_if *ifp, bool rtnl_locked) INIT_WORK(&ifp->ndoffload_work, _brcmf_update_ndtable); if (rtnl_locked) - err = register_netdevice(ndev); + err = cfg80211_register_netdevice(ndev); else err = register_netdev(ndev); if (err != 0) { @@ -681,7 +681,7 @@ void brcmf_net_detach(struct net_device *ndev, bool rtnl_locked) { if (ndev->reg_state == NETREG_REGISTERED) { if (rtnl_locked) - unregister_netdevice(ndev); + cfg80211_unregister_netdevice(ndev); else unregister_netdev(ndev); } else { @@ -758,7 +758,7 @@ int brcmf_net_mon_attach(struct brcmf_if *ifp) ndev = ifp->ndev; ndev->netdev_ops = &brcmf_netdev_ops_mon; - err = register_netdevice(ndev); + err = cfg80211_register_netdevice(ndev); if (err) bphy_err(drvr, "Failed to register %s device\n", ndev->name); diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index a6b9dc6700b14..15e1cee7f465d 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -3081,7 +3081,7 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, mutex_init(&priv->async_mutex); /* Register network device */ - if (register_netdevice(dev)) { + if (cfg80211_register_netdevice(dev)) { mwifiex_dbg(adapter, ERROR, "cannot register network device\n"); ret = -EFAULT; goto err_reg_netdev; @@ -3160,7 +3160,7 @@ int mwifiex_del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev) netif_carrier_off(priv->netdev); if (wdev->netdev->reg_state == NETREG_REGISTERED) - unregister_netdevice(wdev->netdev); + cfg80211_unregister_netdevice(wdev->netdev); if (priv->dfs_cac_workqueue) { flush_workqueue(priv->dfs_cac_workqueue); diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index e3dd205cbbe57..96973ec7bd9ac 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -1538,7 +1538,7 @@ static int del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev) wilc_wfi_deinit_mon_interface(wl, true); vif = netdev_priv(wdev->netdev); cfg80211_stop_iface(wiphy, wdev, GFP_KERNEL); - unregister_netdevice(vif->ndev); + cfg80211_unregister_netdevice(vif->ndev); vif->monitor_flag = 0; wilc_set_operation_mode(vif, 0, 0, 0); diff --git a/drivers/net/wireless/microchip/wilc1000/mon.c b/drivers/net/wireless/microchip/wilc1000/mon.c index b5a1b65c087ca..6bd63934c2d84 100644 --- a/drivers/net/wireless/microchip/wilc1000/mon.c +++ b/drivers/net/wireless/microchip/wilc1000/mon.c @@ -233,7 +233,7 @@ struct net_device *wilc_wfi_init_mon_interface(struct wilc *wl, wl->monitor_dev->netdev_ops = &wilc_wfi_netdev_ops; wl->monitor_dev->needs_free_netdev = true; - if (register_netdevice(wl->monitor_dev)) { + if (cfg80211_register_netdevice(wl->monitor_dev)) { netdev_err(real_dev, "register_netdevice failed\n"); free_netdev(wl->monitor_dev); return NULL; @@ -251,7 +251,7 @@ void wilc_wfi_deinit_mon_interface(struct wilc *wl, bool rtnl_locked) return; if (rtnl_locked) - unregister_netdevice(wl->monitor_dev); + cfg80211_unregister_netdevice(wl->monitor_dev); else unregister_netdev(wl->monitor_dev); wl->monitor_dev = NULL; diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c index 2a1fbbdd6a4bd..643cbb155439d 100644 --- a/drivers/net/wireless/microchip/wilc1000/netdev.c +++ b/drivers/net/wireless/microchip/wilc1000/netdev.c @@ -950,7 +950,7 @@ struct wilc_vif *wilc_netdev_ifc_init(struct wilc *wl, const char *name, vif->priv.dev = ndev; if (rtnl_locked) - ret = register_netdevice(ndev); + ret = cfg80211_register_netdevice(ndev); else ret = register_netdev(ndev); diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 54cdf3ad09d75..504b4d0b98c4e 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -180,7 +180,7 @@ int qtnf_del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev) cancel_work_sync(&vif->high_pri_tx_work); if (netdev->reg_state == NETREG_REGISTERED) - unregister_netdevice(netdev); + cfg80211_unregister_netdevice(netdev); if (qtnf_cmd_send_del_intf(vif)) pr_err("VIF%u.%u: failed to delete VIF\n", vif->mac->macid, @@ -267,7 +267,7 @@ static struct wireless_dev *qtnf_add_virtual_intf(struct wiphy *wiphy, if (qtnf_hwcap_is_set(&mac->bus->hw_info, QLINK_HW_CAPAB_HW_BRIDGE)) { ret = qtnf_cmd_netdev_changeupper(vif, vif->netdev->ifindex); if (ret) { - unregister_netdevice(vif->netdev); + cfg80211_unregister_netdevice(vif->netdev); vif->netdev = NULL; goto error_del_vif; } diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c index ad726bd100ec7..18964e2a9f281 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.c +++ b/drivers/net/wireless/quantenna/qtnfmac/core.c @@ -492,7 +492,7 @@ int qtnf_core_net_attach(struct qtnf_wmac *mac, struct qtnf_vif *vif, SET_NETDEV_DEV(dev, wiphy_dev(wiphy)); - ret = register_netdevice(dev); + ret = cfg80211_register_netdevice(dev); if (ret) { free_netdev(dev); vif->netdev = NULL; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 798f8eb15e00c..e7703fdbac8db 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5228,6 +5228,7 @@ struct cfg80211_cqm_config; * * @wiphy: pointer to hardware description * @iftype: interface type + * @registered: is this wdev already registered with cfg80211 * @list: (private) Used to collect the interfaces * @netdev: (private) Used to reference back to the netdev, may be %NULL * @identifier: (private) Identifier used in nl80211 to identify this @@ -5311,7 +5312,7 @@ struct wireless_dev { struct mutex mtx; - bool use_4addr, is_running; + bool use_4addr, is_running, registered; u8 address[ETH_ALEN] __aligned(sizeof(u16)); @@ -7654,18 +7655,41 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate); * cfg80211_unregister_wdev - remove the given wdev * @wdev: struct wireless_dev to remove * - * Call this function only for wdevs that have no netdev assigned, - * e.g. P2P Devices. It removes the device from the list so that - * it can no longer be used. It is necessary to call this function - * even when cfg80211 requests the removal of the interface by - * calling the del_virtual_intf() callback. The function must also - * be called when the driver wishes to unregister the wdev, e.g. - * when the device is unbound from the driver. + * This function removes the device so it can no longer be used. It is necessary + * to call this function even when cfg80211 requests the removal of the device + * by calling the del_virtual_intf() callback. The function must also be called + * when the driver wishes to unregister the wdev, e.g. when the hardware device + * is unbound from the driver. * * Requires the RTNL to be held. */ void cfg80211_unregister_wdev(struct wireless_dev *wdev); +/** + * cfg80211_register_netdevice - register the given netdev + * @dev: the netdev to register + * + * Note: In contexts coming from cfg80211 callbacks, you must call this rather + * than register_netdevice(), unregister_netdev() is impossible as the RTNL is + * held. Otherwise, both register_netdevice() and register_netdev() are usable + * instead as well. + */ +int cfg80211_register_netdevice(struct net_device *dev); + +/** + * cfg80211_unregister_netdevice - unregister the given netdev + * @dev: the netdev to register + * + * Note: In contexts coming from cfg80211 callbacks, you must call this rather + * than unregister_netdevice(), unregister_netdev() is impossible as the RTNL + * is held. Otherwise, both unregister_netdevice() and unregister_netdev() are + * usable instead as well. + */ +static inline void cfg80211_unregister_netdevice(struct net_device *dev) +{ + cfg80211_unregister_wdev(dev->ieee80211_ptr); +} + /** * struct cfg80211_ft_event_params - FT Information Elements * @ies: FT IEs diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 1b44690823aa5..fcaf4d20cabf5 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1976,7 +1976,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ndev->min_mtu = 256; ndev->max_mtu = local->hw.max_mtu; - ret = register_netdevice(ndev); + ret = cfg80211_register_netdevice(ndev); if (ret) { free_netdev(ndev); return ret; @@ -2006,10 +2006,9 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) synchronize_rcu(); - if (sdata->dev) { - unregister_netdevice(sdata->dev); - } else { - cfg80211_unregister_wdev(&sdata->wdev); + cfg80211_unregister_wdev(&sdata->wdev); + + if (!sdata->dev) { ieee80211_teardown_sdata(sdata); kfree(sdata); } diff --git a/net/wireless/core.c b/net/wireless/core.c index 4b1f35e976e70..9e7d1f9620bd3 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1094,7 +1094,8 @@ void cfg80211_cqm_config_free(struct wireless_dev *wdev) wdev->cqm_config = NULL; } -static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync) +static void _cfg80211_unregister_wdev(struct wireless_dev *wdev, + bool unregister_netdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); @@ -1104,9 +1105,16 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync) nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE); + wdev->registered = false; + + if (wdev->netdev) { + sysfs_remove_link(&wdev->netdev->dev.kobj, "phy80211"); + if (unregister_netdev) + unregister_netdevice(wdev->netdev); + } + list_del_rcu(&wdev->list); - if (sync) - synchronize_rcu(); + synchronize_net(); rdev->devlist_generation++; cfg80211_mlme_purge_registrations(wdev); @@ -1131,14 +1139,23 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync) flush_work(&wdev->disconnect_wk); cfg80211_cqm_config_free(wdev); + + /* + * Ensure that all events have been processed and + * freed. + */ + cfg80211_process_wdev_events(wdev); + + if (WARN_ON(wdev->current_bss)) { + cfg80211_unhold_bss(wdev->current_bss); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); + wdev->current_bss = NULL; + } } void cfg80211_unregister_wdev(struct wireless_dev *wdev) { - if (WARN_ON(wdev->netdev)) - return; - - __cfg80211_unregister_wdev(wdev, true); + _cfg80211_unregister_wdev(wdev, true); } EXPORT_SYMBOL(cfg80211_unregister_wdev); @@ -1290,10 +1307,49 @@ void cfg80211_register_wdev(struct cfg80211_registered_device *rdev, wdev->identifier = ++rdev->wdev_id; list_add_rcu(&wdev->list, &rdev->wiphy.wdev_list); rdev->devlist_generation++; + wdev->registered = true; nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); } +int cfg80211_register_netdevice(struct net_device *dev) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev; + int ret; + + ASSERT_RTNL(); + + if (WARN_ON(!wdev)) + return -EINVAL; + + rdev = wiphy_to_rdev(wdev->wiphy); + + lockdep_assert_held(&rdev->wiphy.mtx); + + /* we'll take care of this */ + wdev->registered = true; + ret = register_netdevice(dev); + if (ret) + goto out; + + if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj, + "phy80211")) { + pr_err("failed to add phy80211 symlink to netdev!\n"); + unregister_netdevice(dev); + ret = -EINVAL; + goto out; + } + + cfg80211_register_wdev(rdev, wdev); + ret = 0; +out: + if (ret) + wdev->registered = false; + return ret; +} +EXPORT_SYMBOL(cfg80211_register_netdevice); + static int cfg80211_netdev_notifier_call(struct notifier_block *nb, unsigned long state, void *ptr) { @@ -1319,17 +1375,16 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, cfg80211_init_wdev(wdev); break; case NETDEV_REGISTER: + if (!wdev->registered) + cfg80211_register_wdev(rdev, wdev); + break; + case NETDEV_UNREGISTER: /* - * NB: cannot take rdev->mtx here because this may be - * called within code protected by it when interfaces - * are added with nl80211. + * It is possible to get NETDEV_UNREGISTER multiple times, + * so check wdev->registered. */ - if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj, - "phy80211")) { - pr_err("failed to add phy80211 symlink to netdev!\n"); - } - - cfg80211_register_wdev(rdev, wdev); + if (wdev->registered) + _cfg80211_unregister_wdev(wdev, false); break; case NETDEV_GOING_DOWN: cfg80211_leave(rdev, wdev); @@ -1401,38 +1456,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, wdev->ps = false; } break; - case NETDEV_UNREGISTER: - /* - * It is possible to get NETDEV_UNREGISTER - * multiple times. To detect that, check - * that the interface is still on the list - * of registered interfaces, and only then - * remove and clean it up. - */ - if (!list_empty(&wdev->list)) { - __cfg80211_unregister_wdev(wdev, false); - sysfs_remove_link(&dev->dev.kobj, "phy80211"); - } - /* - * synchronise (so that we won't find this netdev - * from other code any more) and then clear the list - * head so that the above code can safely check for - * !list_empty() to avoid double-cleanup. - */ - synchronize_rcu(); - INIT_LIST_HEAD(&wdev->list); - /* - * Ensure that all events have been processed and - * freed. - */ - cfg80211_process_wdev_events(wdev); - - if (WARN_ON(wdev->current_bss)) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; - } - break; case NETDEV_PRE_UP: if (!cfg80211_iftype_allowed(wdev->wiphy, wdev->iftype, wdev->use_4addr, 0)) -- GitLab From 75bd4bff300b3c5252d4a0e7a959569c62d1dbae Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Fri, 22 Jan 2021 11:09:09 +0000 Subject: [PATCH 1259/2012] arm64: kprobes: Fix Uexpected kernel BRK exception at EL1 I was hitting the below panic continuously when attaching kprobes to scheduler functions [ 159.045212] Unexpected kernel BRK exception at EL1 [ 159.053753] Internal error: BRK handler: f2000006 [#1] PREEMPT SMP [ 159.059954] Modules linked in: [ 159.063025] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 5.11.0-rc4-00008-g1e2a199f6ccd #56 [rt-app] [1] Exiting.[ 159.071166] Hardware name: ARM Juno development board (r2) (DT) [ 159.079689] pstate: 600003c5 (nZCv DAIF -PAN -UAO -TCO BTYPE=--) [ 159.085723] pc : 0xffff80001624501c [ 159.089377] lr : attach_entity_load_avg+0x2ac/0x350 [ 159.094271] sp : ffff80001622b640 [rt-app] [0] Exiting.[ 159.097591] x29: ffff80001622b640 x28: 0000000000000001 [ 159.105515] x27: 0000000000000049 x26: ffff000800b79980 [ 159.110847] x25: ffff00097ef37840 x24: 0000000000000000 [ 159.116331] x23: 00000024eacec1ec x22: ffff00097ef12b90 [ 159.121663] x21: ffff00097ef37700 x20: ffff800010119170 [rt-app] [11] Exiting.[ 159.126995] x19: ffff00097ef37840 x18: 000000000000000e [ 159.135003] x17: 0000000000000001 x16: 0000000000000019 [ 159.140335] x15: 0000000000000000 x14: 0000000000000000 [ 159.145666] x13: 0000000000000002 x12: 0000000000000002 [ 159.150996] x11: ffff80001592f9f0 x10: 0000000000000060 [ 159.156327] x9 : ffff8000100f6f9c x8 : be618290de0999a1 [ 159.161659] x7 : ffff80096a4b1000 x6 : 0000000000000000 [ 159.166990] x5 : ffff00097ef37840 x4 : 0000000000000000 [ 159.172321] x3 : ffff000800328948 x2 : 0000000000000000 [ 159.177652] x1 : 0000002507d52fec x0 : ffff00097ef12b90 [ 159.182983] Call trace: [ 159.185433] 0xffff80001624501c [ 159.188581] update_load_avg+0x2d0/0x778 [ 159.192516] enqueue_task_fair+0x134/0xe20 [ 159.196625] enqueue_task+0x4c/0x2c8 [ 159.200211] ttwu_do_activate+0x70/0x138 [ 159.204147] sched_ttwu_pending+0xbc/0x160 [ 159.208253] flush_smp_call_function_queue+0x16c/0x320 [ 159.213408] generic_smp_call_function_single_interrupt+0x1c/0x28 [ 159.219521] ipi_handler+0x1e8/0x3c8 [ 159.223106] handle_percpu_devid_irq+0xd8/0x460 [ 159.227650] generic_handle_irq+0x38/0x50 [ 159.231672] __handle_domain_irq+0x6c/0xc8 [ 159.235781] gic_handle_irq+0xcc/0xf0 [ 159.239452] el1_irq+0xb4/0x180 [ 159.242600] rcu_is_watching+0x28/0x70 [ 159.246359] rcu_read_lock_held_common+0x44/0x88 [ 159.250991] rcu_read_lock_any_held+0x30/0xc0 [ 159.255360] kretprobe_dispatcher+0xc4/0xf0 [ 159.259555] __kretprobe_trampoline_handler+0xc0/0x150 [ 159.264710] trampoline_probe_handler+0x38/0x58 [ 159.269255] kretprobe_trampoline+0x70/0xc4 [ 159.273450] run_rebalance_domains+0x54/0x80 [ 159.277734] __do_softirq+0x164/0x684 [ 159.281406] irq_exit+0x198/0x1b8 [ 159.284731] __handle_domain_irq+0x70/0xc8 [ 159.288840] gic_handle_irq+0xb0/0xf0 [ 159.292510] el1_irq+0xb4/0x180 [ 159.295658] arch_cpu_idle+0x18/0x28 [ 159.299245] default_idle_call+0x9c/0x3e8 [ 159.303265] do_idle+0x25c/0x2a8 [ 159.306502] cpu_startup_entry+0x2c/0x78 [ 159.310436] secondary_start_kernel+0x160/0x198 [ 159.314984] Code: d42000c0 aa1e03e9 d42000c0 aa1e03e9 (d42000c0) After a bit of head scratching and debugging it turned out that it is due to kprobe handler being interrupted by a tick that causes us to go into (I think another) kprobe handler. The culprit was kprobe_breakpoint_ss_handler() returning DBG_HOOK_ERROR which leads to the Unexpected kernel BRK exception. Reverting commit ba090f9cafd5 ("arm64: kprobes: Remove redundant kprobe_step_ctx") seemed to fix the problem for me. Further analysis showed that kcb->kprobe_status is set to KPROBE_REENTER when the error occurs. By teaching kprobe_breakpoint_ss_handler() to handle this status I can no longer reproduce the problem. Fixes: ba090f9cafd5 ("arm64: kprobes: Remove redundant kprobe_step_ctx") Signed-off-by: Qais Yousef Acked-by: Will Deacon Acked-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20210122110909.3324607-1-qais.yousef@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/probes/kprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 89c64ada87324..66aac2881ba84 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -352,8 +352,8 @@ kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr) unsigned long addr = instruction_pointer(regs); struct kprobe *cur = kprobe_running(); - if (cur && (kcb->kprobe_status == KPROBE_HIT_SS) - && ((unsigned long)&cur->ainsn.api.insn[1] == addr)) { + if (cur && (kcb->kprobe_status & (KPROBE_HIT_SS | KPROBE_REENTER)) && + ((unsigned long)&cur->ainsn.api.insn[1] == addr)) { kprobes_restore_local_irqflag(kcb, regs); post_kprobe_handler(cur, kcb, regs); -- GitLab From 1230d94820c9cf74776eaac4ee45081e33ea1a30 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:12 -0800 Subject: [PATCH 1260/2012] devlink: Prepare code to fill multiple port function attributes Prepare code to fill zero or more port function optional attributes. Subsequent patch makes use of this to fill more port function attributes. Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- net/core/devlink.c | 64 ++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index ee828e4b1007e..c39496311b71f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -712,6 +712,31 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, return 0; } +static int +devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *ops, + struct devlink_port *port, struct sk_buff *msg, + struct netlink_ext_ack *extack, bool *msg_updated) +{ + u8 hw_addr[MAX_ADDR_LEN]; + int hw_addr_len; + int err; + + if (!ops->port_function_hw_addr_get) + return 0; + + err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr); + if (err) + return err; + *msg_updated = true; + return 0; +} + static int devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, struct netlink_ext_ack *extack) @@ -719,36 +744,17 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por struct devlink *devlink = port->devlink; const struct devlink_ops *ops; struct nlattr *function_attr; - bool empty_nest = true; - int err = 0; + bool msg_updated = false; + int err; function_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PORT_FUNCTION); if (!function_attr) return -EMSGSIZE; ops = devlink->ops; - if (ops->port_function_hw_addr_get) { - int hw_addr_len; - u8 hw_addr[MAX_ADDR_LEN]; - - err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack); - if (err == -EOPNOTSUPP) { - /* Port function attributes are optional for a port. If port doesn't - * support function attribute, returning -EOPNOTSUPP is not an error. - */ - err = 0; - goto out; - } else if (err) { - goto out; - } - err = nla_put(msg, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, hw_addr_len, hw_addr); - if (err) - goto out; - empty_nest = false; - } - -out: - if (err || empty_nest) + err = devlink_port_fn_hw_addr_fill(devlink, ops, port, msg, + extack, &msg_updated); + if (err || !msg_updated) nla_nest_cancel(msg, function_attr); else nla_nest_end(msg, function_attr); @@ -986,7 +992,6 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port * const struct devlink_ops *ops; const u8 *hw_addr; int hw_addr_len; - int err; hw_addr = nla_data(attr); hw_addr_len = nla_len(attr); @@ -1011,12 +1016,7 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port * return -EOPNOTSUPP; } - err = ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack); - if (err) - return err; - - devlink_port_notify(port, DEVLINK_CMD_PORT_NEW); - return 0; + return ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack); } static int @@ -1037,6 +1037,8 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, if (attr) err = devlink_port_function_hw_addr_set(devlink, port, attr, extack); + if (!err) + devlink_port_notify(port, DEVLINK_CMD_PORT_NEW); return err; } -- GitLab From b8288837ef6bdaac331752b401f5ca3b59b37430 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:13 -0800 Subject: [PATCH 1261/2012] devlink: Introduce PCI SF port flavour and port attribute A PCI sub-function (SF) represents a portion of the device similar to PCI VF. In an eswitch, PCI SF may have port which is normally represented using a representor netdevice. To have better visibility of eswitch port, its association with SF, and its representor netdevice, introduce a PCI SF port flavour. When devlink port flavour is PCI SF, fill up PCI SF attributes of the port. Extend port name creation using PCI PF and SF number scheme on best effort basis, so that vendor drivers can skip defining their own scheme. This is done as cApfNSfM, where A, N and M are controller, PCI PF and PCI SF number respectively. This is similar to existing naming for PCI PF and PCI VF ports. An example view of a PCI SF port: $ devlink port show pci/0000:06:00.0/32768 pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:88:88 state active opstate attached $ devlink port show pci/0000:06:00.0/32768 -jp { "port": { "pci/0000:06:00.0/32768": { "type": "eth", "netdev": "ens2f0npf0sf88", "flavour": "pcisf", "controller": 0, "pfnum": 0, "sfnum": 88, "splittable": false, "function": { "hw_addr": "00:00:00:00:88:88", "state": "active", "opstate": "attached" } } } } Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- include/net/devlink.h | 16 +++++++++++++++ include/uapi/linux/devlink.h | 5 +++++ net/core/devlink.c | 39 ++++++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index f466819cc4771..dc3bf8000082c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -93,6 +93,18 @@ struct devlink_port_pci_vf_attrs { u8 external:1; }; +/** + * struct devlink_port_pci_sf_attrs - devlink port's PCI SF attributes + * @controller: Associated controller number + * @sf: Associated PCI SF for of the PCI PF for this port. + * @pf: Associated PCI PF number for this port. + */ +struct devlink_port_pci_sf_attrs { + u32 controller; + u32 sf; + u16 pf; +}; + /** * struct devlink_port_attrs - devlink port object * @flavour: flavour of the port @@ -103,6 +115,7 @@ struct devlink_port_pci_vf_attrs { * @phys: physical port attributes * @pci_pf: PCI PF port attributes * @pci_vf: PCI VF port attributes + * @pci_sf: PCI SF port attributes */ struct devlink_port_attrs { u8 split:1, @@ -114,6 +127,7 @@ struct devlink_port_attrs { struct devlink_port_phys_attrs phys; struct devlink_port_pci_pf_attrs pci_pf; struct devlink_port_pci_vf_attrs pci_vf; + struct devlink_port_pci_sf_attrs pci_sf; }; }; @@ -1404,6 +1418,8 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro u16 pf, bool external); void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, u16 vf, bool external); +void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, + u32 controller, u16 pf, u32 sf); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index cf89c318f2ac9..1a241b09a7f85 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -200,6 +200,10 @@ enum devlink_port_flavour { DEVLINK_PORT_FLAVOUR_UNUSED, /* Port which exists in the switch, but * is not used in any way. */ + DEVLINK_PORT_FLAVOUR_PCI_SF, /* Represents eswitch port + * for the PCI SF. It is an internal + * port that faces the PCI SF. + */ }; enum devlink_param_cmode { @@ -529,6 +533,7 @@ enum devlink_attr { DEVLINK_ATTR_RELOAD_ACTION_INFO, /* nested */ DEVLINK_ATTR_RELOAD_ACTION_STATS, /* nested */ + DEVLINK_ATTR_PORT_PCI_SF_NUMBER, /* u32 */ /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index c39496311b71f..4cbc02fb602df 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -690,6 +690,15 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, if (nla_put_u8(msg, DEVLINK_ATTR_PORT_EXTERNAL, attrs->pci_vf.external)) return -EMSGSIZE; break; + case DEVLINK_PORT_FLAVOUR_PCI_SF: + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_CONTROLLER_NUMBER, + attrs->pci_sf.controller) || + nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, + attrs->pci_sf.pf) || + nla_put_u32(msg, DEVLINK_ATTR_PORT_PCI_SF_NUMBER, + attrs->pci_sf.sf)) + return -EMSGSIZE; + break; case DEVLINK_PORT_FLAVOUR_PHYSICAL: case DEVLINK_PORT_FLAVOUR_CPU: case DEVLINK_PORT_FLAVOUR_DSA: @@ -8374,6 +8383,32 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro } EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set); +/** + * devlink_port_attrs_pci_sf_set - Set PCI SF port attributes + * + * @devlink_port: devlink port + * @controller: associated controller number for the devlink port instance + * @pf: associated PF for the devlink port instance + * @sf: associated SF of a PF for the devlink port instance + */ +void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller, + u16 pf, u32 sf) +{ + struct devlink_port_attrs *attrs = &devlink_port->attrs; + int ret; + + if (WARN_ON(devlink_port->registered)) + return; + ret = __devlink_port_attrs_set(devlink_port, + DEVLINK_PORT_FLAVOUR_PCI_SF); + if (ret) + return; + attrs->pci_sf.controller = controller; + attrs->pci_sf.pf = pf; + attrs->pci_sf.sf = sf; +} +EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set); + static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, char *name, size_t len) { @@ -8422,6 +8457,10 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, n = snprintf(name, len, "pf%uvf%u", attrs->pci_vf.pf, attrs->pci_vf.vf); break; + case DEVLINK_PORT_FLAVOUR_PCI_SF: + n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf, + attrs->pci_sf.sf); + break; } if (n >= len) -- GitLab From cd76dcd68d96aa5bbc63b7ef25a87a1dbea3d73c Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:14 -0800 Subject: [PATCH 1262/2012] devlink: Support add and delete devlink port Extended devlink interface for the user to add and delete a port. Extend devlink to connect user requests to driver to add/delete a port in the device. Driver routines are invoked without holding devlink instance lock. This enables driver to perform several devlink objects registration, unregistration such as (port, health reporter, resource etc) by using existing devlink APIs. This also helps to uniformly use the code for port unregistration during driver unload and during port deletion initiated by user. Examples of add, show and delete commands: $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev $ devlink port show pci/0000:06:00.0/65535: type eth netdev ens2f0np0 flavour physical port 0 splittable false $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88 pci/0000:06:00.0/32768: type eth netdev eth6 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:00:00 state inactive opstate detached $ devlink port show pci/0000:06:00.0/32768 pci/0000:06:00.0/32768: type eth netdev eth6 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:00:00 state inactive opstate detached $ udevadm test-builtin net_id /sys/class/net/eth6 Load module index Parsed configuration file /usr/lib/systemd/network/99-default.link Created link configuration context. Using default interface naming scheme 'v245'. ID_NET_NAMING_SCHEME=v245 ID_NET_NAME_PATH=enp6s0f0npf0sf88 ID_NET_NAME_SLOT=ens2f0npf0sf88 Unload module index Unloaded link configuration context. Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- include/net/devlink.h | 52 ++++++++++++++++++ net/core/devlink.c | 121 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index dc3bf8000082c..d8edd9a109078 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -152,6 +152,17 @@ struct devlink_port { struct mutex reporters_lock; /* Protects reporter_list */ }; +struct devlink_port_new_attrs { + enum devlink_port_flavour flavour; + unsigned int port_index; + u32 controller; + u32 sfnum; + u16 pfnum; + u8 port_index_valid:1, + controller_valid:1, + sfnum_valid:1; +}; + struct devlink_sb_pool_info { enum devlink_sb_pool_type pool_type; u32 size; @@ -1362,6 +1373,47 @@ struct devlink_ops { int (*port_function_hw_addr_set)(struct devlink *devlink, struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); + /** + * port_new() - Add a new port function of a specified flavor + * @devlink: Devlink instance + * @attrs: attributes of the new port + * @extack: extack for reporting error messages + * @new_port_index: index of the new port + * + * Devlink core will call this device driver function upon user request + * to create a new port function of a specified flavor and optional + * attributes + * + * Notes: + * - Called without devlink instance lock being held. Drivers must + * implement own means of synchronization + * - On success, drivers must register a port with devlink core + * + * Return: 0 on success, negative value otherwise. + */ + int (*port_new)(struct devlink *devlink, + const struct devlink_port_new_attrs *attrs, + struct netlink_ext_ack *extack, + unsigned int *new_port_index); + /** + * port_del() - Delete a port function + * @devlink: Devlink instance + * @port_index: port function index to delete + * @extack: extack for reporting error messages + * + * Devlink core will call this device driver function upon user request + * to delete a previously created port function + * + * Notes: + * - Called without devlink instance lock being held. Drivers must + * implement own means of synchronization + * - On success, drivers must unregister the corresponding devlink + * port + * + * Return: 0 on success, negative value otherwise. + */ + int (*port_del)(struct devlink *devlink, unsigned int port_index, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/net/core/devlink.c b/net/core/devlink.c index 4cbc02fb602df..541b5f5492741 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1147,6 +1147,111 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, return devlink_port_unsplit(devlink, port_index, info->extack); } +static int devlink_port_new_notifiy(struct devlink *devlink, + unsigned int port_index, + struct genl_info *info) +{ + struct devlink_port *devlink_port; + struct sk_buff *msg; + int err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + mutex_lock(&devlink->lock); + devlink_port = devlink_port_get_by_index(devlink, port_index); + if (!devlink_port) { + err = -ENODEV; + goto out; + } + + err = devlink_nl_port_fill(msg, devlink, devlink_port, + DEVLINK_CMD_NEW, info->snd_portid, + info->snd_seq, 0, NULL); + if (err) + goto out; + + err = genlmsg_reply(msg, info); + mutex_unlock(&devlink->lock); + return err; + +out: + mutex_unlock(&devlink->lock); + nlmsg_free(msg); + return err; +} + +static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct netlink_ext_ack *extack = info->extack; + struct devlink_port_new_attrs new_attrs = {}; + struct devlink *devlink = info->user_ptr[0]; + unsigned int new_port_index; + int err; + + if (!devlink->ops->port_new || !devlink->ops->port_del) + return -EOPNOTSUPP; + + if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] || + !info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]) { + NL_SET_ERR_MSG_MOD(extack, "Port flavour or PCI PF are not specified"); + return -EINVAL; + } + new_attrs.flavour = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_FLAVOUR]); + new_attrs.pfnum = + nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]); + + if (info->attrs[DEVLINK_ATTR_PORT_INDEX]) { + /* Port index of the new port being created by driver. */ + new_attrs.port_index = + nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); + new_attrs.port_index_valid = true; + } + if (info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]) { + new_attrs.controller = + nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER]); + new_attrs.controller_valid = true; + } + if (new_attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_SF && + info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]) { + new_attrs.sfnum = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_PCI_SF_NUMBER]); + new_attrs.sfnum_valid = true; + } + + err = devlink->ops->port_new(devlink, &new_attrs, extack, + &new_port_index); + if (err) + return err; + + err = devlink_port_new_notifiy(devlink, new_port_index, info); + if (err && err != -ENODEV) { + /* Fail to send the response; destroy newly created port. */ + devlink->ops->port_del(devlink, new_port_index, extack); + } + return err; +} + +static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct netlink_ext_ack *extack = info->extack; + struct devlink *devlink = info->user_ptr[0]; + unsigned int port_index; + + if (!devlink->ops->port_del) + return -EOPNOTSUPP; + + if (!info->attrs[DEVLINK_ATTR_PORT_INDEX]) { + NL_SET_ERR_MSG_MOD(extack, "Port index is not specified"); + return -EINVAL; + } + port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); + + return devlink->ops->port_del(devlink, port_index, extack); +} + static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink, struct devlink_sb *devlink_sb, enum devlink_command cmd, u32 portid, @@ -7605,6 +7710,10 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, DEVLINK_RELOAD_ACTION_MAX), [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(DEVLINK_RELOAD_LIMITS_VALID_MASK), + [DEVLINK_ATTR_PORT_FLAVOUR] = { .type = NLA_U16 }, + [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 }, + [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 }, + [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 }, }; static const struct genl_small_ops devlink_nl_ops[] = { @@ -7644,6 +7753,18 @@ static const struct genl_small_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, }, + { + .cmd = DEVLINK_CMD_PORT_NEW, + .doit = devlink_nl_cmd_port_new_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, + }, + { + .cmd = DEVLINK_CMD_PORT_DEL, + .doit = devlink_nl_cmd_port_del_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NO_LOCK, + }, { .cmd = DEVLINK_CMD_SB_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, -- GitLab From a556dded9c23c51c82654f1ebe389cbc0bc22057 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:15 -0800 Subject: [PATCH 1263/2012] devlink: Support get and set state of port function devlink port function can be in active or inactive state. Allow users to get and set port function's state. When the port function it activated, its operational state may change after a while when the device is created and driver binds to it. Similarly on deactivation flow. To clearly describe the state of the port function and its device's operational state in the host system, define state and opstate attributes. Example of a PCI SF port which supports a port function: $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev $ devlink port show pci/0000:06:00.0/65535: type eth netdev ens2f0np0 flavour physical port 0 splittable false $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88 pci/0000:08:00.0/32768: type eth netdev eth6 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:00:00 state inactive opstate detached $ devlink port show pci/0000:06:00.0/32768 pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:88:88 state inactive opstate detached $ devlink port function set pci/0000:06:00.0/32768 hw_addr 00:00:00:00:88:88 state active $ devlink port show pci/0000:06:00.0/32768 -jp { "port": { "pci/0000:06:00.0/32768": { "type": "eth", "netdev": "ens2f0npf0sf88", "flavour": "pcisf", "controller": 0, "pfnum": 0, "sfnum": 88, "external": false, "splittable": false, "function": { "hw_addr": "00:00:00:00:88:88", "state": "active", "opstate": "attached" } } } } Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- include/net/devlink.h | 32 +++++++++++++ include/uapi/linux/devlink.h | 20 ++++++++ net/core/devlink.c | 90 +++++++++++++++++++++++++++++++++++- 3 files changed, 141 insertions(+), 1 deletion(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index d8edd9a109078..691ee76ca5481 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1414,6 +1414,38 @@ struct devlink_ops { */ int (*port_del)(struct devlink *devlink, unsigned int port_index, struct netlink_ext_ack *extack); + /** + * port_fn_state_get() - Get the state of a port function + * @devlink: Devlink instance + * @port: The devlink port + * @state: Admin configured state + * @opstate: Current operational state + * @extack: extack for reporting error messages + * + * Reports the admin and operational state of a devlink port function + * + * Return: 0 on success, negative value otherwise. + */ + int (*port_fn_state_get)(struct devlink *devlink, + struct devlink_port *port, + enum devlink_port_fn_state *state, + enum devlink_port_fn_opstate *opstate, + struct netlink_ext_ack *extack); + /** + * port_fn_state_set() - Set the admin state of a port function + * @devlink: Devlink instance + * @port: The devlink port + * @state: Admin state + * @extack: extack for reporting error messages + * + * Set the admin state of a devlink port function + * + * Return: 0 on success, negative value otherwise. + */ + int (*port_fn_state_set)(struct devlink *devlink, + struct devlink_port *port, + enum devlink_port_fn_state state, + struct netlink_ext_ack *extack); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 1a241b09a7f85..f6008b2fa60ff 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -583,9 +583,29 @@ enum devlink_resource_unit { enum devlink_port_function_attr { DEVLINK_PORT_FUNCTION_ATTR_UNSPEC, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, /* binary */ + DEVLINK_PORT_FN_ATTR_STATE, /* u8 */ + DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */ __DEVLINK_PORT_FUNCTION_ATTR_MAX, DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1 }; +enum devlink_port_fn_state { + DEVLINK_PORT_FN_STATE_INACTIVE, + DEVLINK_PORT_FN_STATE_ACTIVE, +}; + +/** + * enum devlink_port_fn_opstate - indicates operational state of the function + * @DEVLINK_PORT_FN_OPSTATE_ATTACHED: Driver is attached to the function. + * For graceful tear down of the function, after inactivation of the + * function, user should wait for operational state to turn DETACHED. + * @DEVLINK_PORT_FN_OPSTATE_DETACHED: Driver is detached from the function. + * It is safe to delete the port. + */ +enum devlink_port_fn_opstate { + DEVLINK_PORT_FN_OPSTATE_DETACHED, + DEVLINK_PORT_FN_OPSTATE_ATTACHED, +}; + #endif /* _UAPI_LINUX_DEVLINK_H_ */ diff --git a/net/core/devlink.c b/net/core/devlink.c index 541b5f5492741..9f1be69bd5f8c 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -87,6 +87,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report); static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = { [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY }, + [DEVLINK_PORT_FN_ATTR_STATE] = + NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE, + DEVLINK_PORT_FN_STATE_ACTIVE), }; static LIST_HEAD(devlink_list); @@ -746,6 +749,58 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops * return 0; } +static bool +devlink_port_fn_state_valid(enum devlink_port_fn_state state) +{ + return state == DEVLINK_PORT_FN_STATE_INACTIVE || + state == DEVLINK_PORT_FN_STATE_ACTIVE; +} + +static bool +devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate) +{ + return opstate == DEVLINK_PORT_FN_OPSTATE_DETACHED || + opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED; +} + +static int +devlink_port_fn_state_fill(struct devlink *devlink, + const struct devlink_ops *ops, + struct devlink_port *port, struct sk_buff *msg, + struct netlink_ext_ack *extack, + bool *msg_updated) +{ + enum devlink_port_fn_opstate opstate; + enum devlink_port_fn_state state; + int err; + + if (!ops->port_fn_state_get) + return 0; + + err = ops->port_fn_state_get(devlink, port, &state, &opstate, extack); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + if (!devlink_port_fn_state_valid(state)) { + WARN_ON_ONCE(1); + NL_SET_ERR_MSG_MOD(extack, "Invalid state read from driver"); + return -EINVAL; + } + if (!devlink_port_fn_opstate_valid(opstate)) { + WARN_ON_ONCE(1); + NL_SET_ERR_MSG_MOD(extack, + "Invalid operational state read from driver"); + return -EINVAL; + } + if (nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_STATE, state) || + nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_OPSTATE, opstate)) + return -EMSGSIZE; + *msg_updated = true; + return 0; +} + static int devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, struct netlink_ext_ack *extack) @@ -763,6 +818,11 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por ops = devlink->ops; err = devlink_port_fn_hw_addr_fill(devlink, ops, port, msg, extack, &msg_updated); + if (err) + goto out; + err = devlink_port_fn_state_fill(devlink, ops, port, msg, extack, + &msg_updated); +out: if (err || !msg_updated) nla_nest_cancel(msg, function_attr); else @@ -1028,6 +1088,24 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port * return ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack); } +static int devlink_port_fn_state_set(struct devlink *devlink, + struct devlink_port *port, + const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + enum devlink_port_fn_state state; + const struct devlink_ops *ops; + + state = nla_get_u8(attr); + ops = devlink->ops; + if (!ops->port_fn_state_set) { + NL_SET_ERR_MSG_MOD(extack, + "Function does not support state setting"); + return -EOPNOTSUPP; + } + return ops->port_fn_state_set(devlink, port, state, extack); +} + static int devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, const struct nlattr *attr, struct netlink_ext_ack *extack) @@ -1043,8 +1121,18 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, } attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]; - if (attr) + if (attr) { err = devlink_port_function_hw_addr_set(devlink, port, attr, extack); + if (err) + return err; + } + /* Keep this as the last function attribute set, so that when + * multiple port function attributes are set along with state, + * Those can be applied first before activating the state. + */ + attr = tb[DEVLINK_PORT_FN_ATTR_STATE]; + if (attr) + err = devlink_port_fn_state_set(devlink, port, attr, extack); if (!err) devlink_port_notify(port, DEVLINK_CMD_PORT_NEW); -- GitLab From f3196bb0f14c0ffb5089c15668bda196c98d3900 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:16 -0800 Subject: [PATCH 1264/2012] net/mlx5: Introduce vhca state event notifier vhca state events indicates change in the state of the vhca that may occur due to a SF allocation, deallocation or enabling/disabling the SF HCA. Introduce vhca state event handler which will be used by SF devlink port manager and SF hardware id allocator in subsequent patches to act on the event. This enables single entity to subscribe, query and rearm the event for a function. Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Kconfig | 9 + .../net/ethernet/mellanox/mlx5/core/Makefile | 4 + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 + drivers/net/ethernet/mellanox/mlx5/core/eq.c | 3 + .../net/ethernet/mellanox/mlx5/core/events.c | 7 + .../net/ethernet/mellanox/mlx5/core/main.c | 16 ++ .../ethernet/mellanox/mlx5/core/mlx5_core.h | 2 + .../mlx5/core/sf/mlx5_ifc_vhca_event.h | 82 ++++++++ .../net/ethernet/mellanox/mlx5/core/sf/sf.h | 45 +++++ .../mellanox/mlx5/core/sf/vhca_event.c | 189 ++++++++++++++++++ .../mellanox/mlx5/core/sf/vhca_event.h | 57 ++++++ include/linux/mlx5/driver.h | 4 + 12 files changed, 422 insertions(+) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 6e4d7bb7fea2f..d6c48582e7a8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -203,3 +203,12 @@ config MLX5_SW_STEERING default y help Build support for software-managed steering in the NIC. + +config MLX5_SF + bool "Mellanox Technologies subfunction device support using auxiliary device" + depends on MLX5_CORE && MLX5_CORE_EN + default n + help + Build support for subfuction device in the NIC. A Mellanox subfunction + device can support RDMA, netdevice and vdpa device. + It is similar to a SRIOV VF but it doesn't require SRIOV support. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 134bd038ae8af..22ef2ebbee966 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -86,3 +86,7 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o steering/dr_ste_v0.o \ steering/dr_cmd.o steering/dr_fw.o \ steering/dr_action.o steering/fs_dr.o +# +# SF device +# +mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 50c7b9ee80c31..47dcc3ac2cf04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -464,6 +464,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_ALLOC_MEMIC: case MLX5_CMD_OP_MODIFY_XRQ: case MLX5_CMD_OP_RELEASE_XRQ_ERROR: + case MLX5_CMD_OP_QUERY_VHCA_STATE: + case MLX5_CMD_OP_MODIFY_VHCA_STATE: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -657,6 +659,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(DESTROY_UMEM); MLX5_COMMAND_STR_CASE(RELEASE_XRQ_ERROR); MLX5_COMMAND_STR_CASE(MODIFY_XRQ); + MLX5_COMMAND_STR_CASE(QUERY_VHCA_STATE); + MLX5_COMMAND_STR_CASE(MODIFY_VHCA_STATE); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index fc0afa03d407b..421febebc658f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -595,6 +595,9 @@ static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4]) async_event_mask |= (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED); + if (MLX5_CAP_GEN_MAX(dev, vhca_state)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_VHCA_STATE_CHANGE); + mask[0] = async_event_mask; if (MLX5_CAP_GEN(dev, event_cap)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 3ce17c3d7a001..5523d218e5fb7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -110,6 +110,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_CMD"; case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED: return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED"; + case MLX5_EVENT_TYPE_VHCA_STATE_CHANGE: + return "MLX5_EVENT_TYPE_VHCA_STATE_CHANGE"; case MLX5_EVENT_TYPE_PAGE_REQUEST: return "MLX5_EVENT_TYPE_PAGE_REQUEST"; case MLX5_EVENT_TYPE_PAGE_FAULT: @@ -403,3 +405,8 @@ int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, voi { return atomic_notifier_call_chain(&events->nh, event, data); } + +void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work) +{ + queue_work(dev->priv.events->wq, work); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index ca6f2fc39ea0a..b16f57befe526 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -73,6 +73,7 @@ #include "ecpf.h" #include "lib/hv_vhca.h" #include "diag/rsc_dump.h" +#include "sf/vhca_event.h" MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -567,6 +568,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) if (MLX5_CAP_GEN_MAX(dev, mkey_by_name)) MLX5_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1); + mlx5_vhca_state_cap_handle(dev, set_hca_cap); + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); } @@ -884,6 +887,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_eswitch_cleanup; } + err = mlx5_vhca_event_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init vhca event notifier %d\n", err); + goto err_fpga_cleanup; + } + dev->dm = mlx5_dm_create(dev); if (IS_ERR(dev->dm)) mlx5_core_warn(dev, "Failed to init device memory%d\n", err); @@ -894,6 +903,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) return 0; +err_fpga_cleanup: + mlx5_fpga_cleanup(dev); err_eswitch_cleanup: mlx5_eswitch_cleanup(dev->priv.eswitch); err_sriov_cleanup: @@ -925,6 +936,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_hv_vhca_destroy(dev->hv_vhca); mlx5_fw_tracer_destroy(dev->tracer); mlx5_dm_cleanup(dev); + mlx5_vhca_event_cleanup(dev); mlx5_fpga_cleanup(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); mlx5_sriov_cleanup(dev); @@ -1129,6 +1141,8 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_sriov; } + mlx5_vhca_event_start(dev); + err = mlx5_ec_init(dev); if (err) { mlx5_core_err(dev, "Failed to init embedded CPU\n"); @@ -1146,6 +1160,7 @@ static int mlx5_load(struct mlx5_core_dev *dev) err_sriov: mlx5_ec_cleanup(dev); err_ec: + mlx5_vhca_event_stop(dev); mlx5_cleanup_fs(dev); err_fs: mlx5_accel_tls_cleanup(dev); @@ -1173,6 +1188,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) { mlx5_sriov_detach(dev); mlx5_ec_cleanup(dev); + mlx5_vhca_event_stop(dev); mlx5_cleanup_fs(dev); mlx5_accel_ipsec_cleanup(dev); mlx5_accel_tls_cleanup(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 0a0302ce7144d..a33b7496d748b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -259,4 +259,6 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state); void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup); int mlx5_load_one(struct mlx5_core_dev *dev, bool boot); + +void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work); #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h new file mode 100644 index 0000000000000..1daf5a122ba30 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_IFC_VHCA_EVENT_H__ +#define __MLX5_IFC_VHCA_EVENT_H__ + +enum mlx5_ifc_vhca_state { + MLX5_VHCA_STATE_INVALID = 0x0, + MLX5_VHCA_STATE_ALLOCATED = 0x1, + MLX5_VHCA_STATE_ACTIVE = 0x2, + MLX5_VHCA_STATE_IN_USE = 0x3, + MLX5_VHCA_STATE_TEARDOWN_REQUEST = 0x4, +}; + +struct mlx5_ifc_vhca_state_context_bits { + u8 arm_change_event[0x1]; + u8 reserved_at_1[0xb]; + u8 vhca_state[0x4]; + u8 reserved_at_10[0x10]; + + u8 sw_function_id[0x20]; + + u8 reserved_at_40[0x80]; +}; + +struct mlx5_ifc_query_vhca_state_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_vhca_state_context_bits vhca_state_context; +}; + +struct mlx5_ifc_query_vhca_state_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; + u8 function_id[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_vhca_state_field_select_bits { + u8 reserved_at_0[0x1e]; + u8 sw_function_id[0x1]; + u8 arm_change_event[0x1]; +}; + +struct mlx5_ifc_modify_vhca_state_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_modify_vhca_state_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 embedded_cpu_function[0x1]; + u8 reserved_at_41[0xf]; + u8 function_id[0x10]; + + struct mlx5_ifc_vhca_state_field_select_bits vhca_state_field_select; + + struct mlx5_ifc_vhca_state_context_bits vhca_state_context; +}; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h new file mode 100644 index 0000000000000..623191679b49c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_SF_H__ +#define __MLX5_SF_H__ + +#include + +static inline u16 mlx5_sf_start_function_id(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, sf_base_id); +} + +#ifdef CONFIG_MLX5_SF + +static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, sf); +} + +static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev) +{ + if (!mlx5_sf_supported(dev)) + return 0; + if (MLX5_CAP_GEN(dev, max_num_sf)) + return MLX5_CAP_GEN(dev, max_num_sf); + else + return 1 << MLX5_CAP_GEN(dev, log_max_sf); +} + +#else + +static inline bool mlx5_sf_supported(const struct mlx5_core_dev *dev) +{ + return false; +} + +static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev) +{ + return 0; +} + +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c new file mode 100644 index 0000000000000..af2f2dd9db250 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include +#include "mlx5_ifc_vhca_event.h" +#include "mlx5_core.h" +#include "vhca_event.h" +#include "ecpf.h" + +struct mlx5_vhca_state_notifier { + struct mlx5_core_dev *dev; + struct mlx5_nb nb; + struct blocking_notifier_head n_head; +}; + +struct mlx5_vhca_event_work { + struct work_struct work; + struct mlx5_vhca_state_notifier *notifier; + struct mlx5_vhca_state_event event; +}; + +int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id, + bool ecpu, u32 *out, u32 outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_vhca_state_in)] = {}; + + MLX5_SET(query_vhca_state_in, in, opcode, MLX5_CMD_OP_QUERY_VHCA_STATE); + MLX5_SET(query_vhca_state_in, in, function_id, function_id); + MLX5_SET(query_vhca_state_in, in, embedded_cpu_function, ecpu); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +static int mlx5_cmd_modify_vhca_state(struct mlx5_core_dev *dev, u16 function_id, + bool ecpu, u32 *in, u32 inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_vhca_state_out)] = {}; + + MLX5_SET(modify_vhca_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VHCA_STATE); + MLX5_SET(modify_vhca_state_in, in, function_id, function_id); + MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, ecpu); + + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); +} + +int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, bool ecpu, u32 sw_fn_id) +{ + u32 out[MLX5_ST_SZ_DW(modify_vhca_state_out)] = {}; + u32 in[MLX5_ST_SZ_DW(modify_vhca_state_in)] = {}; + + MLX5_SET(modify_vhca_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VHCA_STATE); + MLX5_SET(modify_vhca_state_in, in, function_id, function_id); + MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, ecpu); + MLX5_SET(modify_vhca_state_in, in, vhca_state_field_select.sw_function_id, 1); + MLX5_SET(modify_vhca_state_in, in, vhca_state_context.sw_function_id, sw_fn_id); + + return mlx5_cmd_exec_inout(dev, modify_vhca_state, in, out); +} + +int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id, bool ecpu) +{ + u32 in[MLX5_ST_SZ_DW(modify_vhca_state_in)] = {}; + + MLX5_SET(modify_vhca_state_in, in, vhca_state_context.arm_change_event, 1); + MLX5_SET(modify_vhca_state_in, in, vhca_state_field_select.arm_change_event, 1); + + return mlx5_cmd_modify_vhca_state(dev, function_id, ecpu, in, sizeof(in)); +} + +static void +mlx5_vhca_event_notify(struct mlx5_core_dev *dev, struct mlx5_vhca_state_event *event) +{ + u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; + int err; + + err = mlx5_cmd_query_vhca_state(dev, event->function_id, event->ecpu, out, sizeof(out)); + if (err) + return; + + event->sw_function_id = MLX5_GET(query_vhca_state_out, out, + vhca_state_context.sw_function_id); + event->new_vhca_state = MLX5_GET(query_vhca_state_out, out, + vhca_state_context.vhca_state); + + mlx5_vhca_event_arm(dev, event->function_id, event->ecpu); + + blocking_notifier_call_chain(&dev->priv.vhca_state_notifier->n_head, 0, event); +} + +static void mlx5_vhca_state_work_handler(struct work_struct *_work) +{ + struct mlx5_vhca_event_work *work = container_of(_work, struct mlx5_vhca_event_work, work); + struct mlx5_vhca_state_notifier *notifier = work->notifier; + struct mlx5_core_dev *dev = notifier->dev; + + mlx5_vhca_event_notify(dev, &work->event); +} + +static int +mlx5_vhca_state_change_notifier(struct notifier_block *nb, unsigned long type, void *data) +{ + struct mlx5_vhca_state_notifier *notifier = + mlx5_nb_cof(nb, struct mlx5_vhca_state_notifier, nb); + struct mlx5_vhca_event_work *work; + struct mlx5_eqe *eqe = data; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return NOTIFY_DONE; + INIT_WORK(&work->work, &mlx5_vhca_state_work_handler); + work->notifier = notifier; + work->event.function_id = be16_to_cpu(eqe->data.vhca_state.function_id); + work->event.ecpu = be16_to_cpu(eqe->data.vhca_state.ec_function); + mlx5_events_work_enqueue(notifier->dev, &work->work); + return NOTIFY_OK; +} + +void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap) +{ + if (!mlx5_vhca_event_supported(dev)) + return; + + MLX5_SET(cmd_hca_cap, set_hca_cap, vhca_state, 1); + MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_allocated, 1); + MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_active, 1); + MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_in_use, 1); + MLX5_SET(cmd_hca_cap, set_hca_cap, event_on_vhca_state_teardown_request, 1); +} + +int mlx5_vhca_event_init(struct mlx5_core_dev *dev) +{ + struct mlx5_vhca_state_notifier *notifier; + + if (!mlx5_vhca_event_supported(dev)) + return 0; + + notifier = kzalloc(sizeof(*notifier), GFP_KERNEL); + if (!notifier) + return -ENOMEM; + + dev->priv.vhca_state_notifier = notifier; + notifier->dev = dev; + BLOCKING_INIT_NOTIFIER_HEAD(¬ifier->n_head); + MLX5_NB_INIT(¬ifier->nb, mlx5_vhca_state_change_notifier, VHCA_STATE_CHANGE); + return 0; +} + +void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev) +{ + if (!mlx5_vhca_event_supported(dev)) + return; + + kfree(dev->priv.vhca_state_notifier); + dev->priv.vhca_state_notifier = NULL; +} + +void mlx5_vhca_event_start(struct mlx5_core_dev *dev) +{ + struct mlx5_vhca_state_notifier *notifier; + + if (!dev->priv.vhca_state_notifier) + return; + + notifier = dev->priv.vhca_state_notifier; + mlx5_eq_notifier_register(dev, ¬ifier->nb); +} + +void mlx5_vhca_event_stop(struct mlx5_core_dev *dev) +{ + struct mlx5_vhca_state_notifier *notifier; + + if (!dev->priv.vhca_state_notifier) + return; + + notifier = dev->priv.vhca_state_notifier; + mlx5_eq_notifier_unregister(dev, ¬ifier->nb); +} + +int mlx5_vhca_event_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + if (!dev->priv.vhca_state_notifier) + return -EOPNOTSUPP; + return blocking_notifier_chain_register(&dev->priv.vhca_state_notifier->n_head, nb); +} + +void mlx5_vhca_event_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&dev->priv.vhca_state_notifier->n_head, nb); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h new file mode 100644 index 0000000000000..1fe1ec6f4d4b0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_VHCA_EVENT_H__ +#define __MLX5_VHCA_EVENT_H__ + +#ifdef CONFIG_MLX5_SF + +struct mlx5_vhca_state_event { + u16 function_id; + u16 sw_function_id; + u8 new_vhca_state; + bool ecpu; +}; + +static inline bool mlx5_vhca_event_supported(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN_MAX(dev, vhca_state); +} + +void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap); +int mlx5_vhca_event_init(struct mlx5_core_dev *dev); +void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev); +void mlx5_vhca_event_start(struct mlx5_core_dev *dev); +void mlx5_vhca_event_stop(struct mlx5_core_dev *dev); +int mlx5_vhca_event_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb); +void mlx5_vhca_event_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb); +int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, bool ecpu, u32 sw_fn_id); +int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id, bool ecpu); +int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id, + bool ecpu, u32 *out, u32 outlen); +#else + +static inline void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap) +{ +} + +static inline int mlx5_vhca_event_init(struct mlx5_core_dev *dev) +{ + return 0; +} + +static inline void mlx5_vhca_event_cleanup(struct mlx5_core_dev *dev) +{ +} + +static inline void mlx5_vhca_event_start(struct mlx5_core_dev *dev) +{ +} + +static inline void mlx5_vhca_event_stop(struct mlx5_core_dev *dev) +{ +} + +#endif + +#endif diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f93bfe7473aa7..ffba0786051e6 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -507,6 +507,7 @@ struct mlx5_devcom; struct mlx5_fw_reset; struct mlx5_eq_table; struct mlx5_irq_table; +struct mlx5_vhca_state_notifier; struct mlx5_rate_limit { u32 rate; @@ -603,6 +604,9 @@ struct mlx5_priv { struct mlx5_bfreg_data bfregs; struct mlx5_uars_page *uar; +#ifdef CONFIG_MLX5_SF + struct mlx5_vhca_state_notifier *vhca_state_notifier; +#endif }; enum mlx5_device_state { -- GitLab From 90d010b8634b89a97ca3b7aa6a88fd566fc77717 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:17 -0800 Subject: [PATCH 1265/2012] net/mlx5: SF, Add auxiliary device support Introduce API to add and delete an auxiliary device for an SF. Each SF has its own dedicated window in the PCI BAR 2. SF device is similar to PCI PF and VF that supports multiple class of devices such as net, rdma and vdpa. SF device will be added or removed in subsequent patch during SF devlink port function state change command. A subfunction device exposes user supplied subfunction number which will be further used by systemd/udev to have deterministic name for its netdevice and rdma device. An mlx5 subfunction auxiliary device example: $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev $ devlink port show pci/0000:06:00.0/65535: type eth netdev ens2f0np0 flavour physical port 0 splittable false $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88 pci/0000:08:00.0/32768: type eth netdev eth6 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:00:00 state inactive opstate detached $ devlink port show ens2f0npf0sf88 pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:88:88 state inactive opstate detached $ devlink port function set ens2f0npf0sf88 hw_addr 00:00:00:00:88:88 state active On activation, $ ls -l /sys/bus/auxiliary/devices/ mlx5_core.sf.4 -> ../../../devices/pci0000:00/0000:00:03.0/0000:06:00.0/mlx5_core.sf.4 $ cat /sys/bus/auxiliary/devices/mlx5_core.sf.4/sfnum 88 Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- .../device_drivers/ethernet/mellanox/mlx5.rst | 5 + .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../net/ethernet/mellanox/mlx5/core/main.c | 4 + .../ethernet/mellanox/mlx5/core/sf/dev/dev.c | 265 ++++++++++++++++++ .../ethernet/mellanox/mlx5/core/sf/dev/dev.h | 35 +++ include/linux/mlx5/driver.h | 2 + 6 files changed, 312 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst index e9b65035cd472..a5eb22793bb95 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst @@ -97,6 +97,11 @@ Enabling the driver and kconfig options | Provides low-level InfiniBand/RDMA and `RoCE `_ support. +**CONFIG_MLX5_SF=(y/n)** + +| Build support for subfunction. +| Subfunctons are more light weight than PCI SRIOV VFs. Choosing this option +| will enable support for creating subfunction devices. **External options** ( Choose if the corresponding mlx5 feature is required ) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 22ef2ebbee966..f5b2e91013480 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -89,4 +89,4 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o # # SF device # -mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o +mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index b16f57befe526..26b5502712a62 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -74,6 +74,7 @@ #include "lib/hv_vhca.h" #include "diag/rsc_dump.h" #include "sf/vhca_event.h" +#include "sf/dev/dev.h" MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -1155,6 +1156,8 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_sriov; } + mlx5_sf_dev_table_create(dev); + return 0; err_sriov: @@ -1186,6 +1189,7 @@ err_irq_table: static void mlx5_unload(struct mlx5_core_dev *dev) { + mlx5_sf_dev_table_destroy(dev); mlx5_sriov_detach(dev); mlx5_ec_cleanup(dev); mlx5_vhca_event_stop(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c new file mode 100644 index 0000000000000..4a8eeb7c853ee --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include +#include +#include "mlx5_core.h" +#include "dev.h" +#include "sf/vhca_event.h" +#include "sf/sf.h" +#include "sf/mlx5_ifc_vhca_event.h" +#include "ecpf.h" + +struct mlx5_sf_dev_table { + struct xarray devices; + unsigned int max_sfs; + phys_addr_t base_address; + u64 sf_bar_length; + struct notifier_block nb; + struct mlx5_core_dev *dev; +}; + +static bool mlx5_sf_dev_supported(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, sf) && mlx5_vhca_event_supported(dev); +} + +static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct auxiliary_device *adev = container_of(dev, struct auxiliary_device, dev); + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + + return scnprintf(buf, PAGE_SIZE, "%u\n", sf_dev->sfnum); +} +static DEVICE_ATTR_RO(sfnum); + +static struct attribute *sf_device_attrs[] = { + &dev_attr_sfnum.attr, + NULL, +}; + +static const struct attribute_group sf_attr_group = { + .attrs = sf_device_attrs, +}; + +static const struct attribute_group *sf_attr_groups[2] = { + &sf_attr_group, + NULL +}; + +static void mlx5_sf_dev_release(struct device *device) +{ + struct auxiliary_device *adev = container_of(device, struct auxiliary_device, dev); + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + + mlx5_adev_idx_free(adev->id); + kfree(sf_dev); +} + +static void mlx5_sf_dev_remove(struct mlx5_sf_dev *sf_dev) +{ + auxiliary_device_delete(&sf_dev->adev); + auxiliary_device_uninit(&sf_dev->adev); +} + +static void mlx5_sf_dev_add(struct mlx5_core_dev *dev, u16 sf_index, u32 sfnum) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + struct mlx5_sf_dev *sf_dev; + struct pci_dev *pdev; + int err; + int id; + + id = mlx5_adev_idx_alloc(); + if (id < 0) { + err = id; + goto add_err; + } + + sf_dev = kzalloc(sizeof(*sf_dev), GFP_KERNEL); + if (!sf_dev) { + mlx5_adev_idx_free(id); + err = -ENOMEM; + goto add_err; + } + pdev = dev->pdev; + sf_dev->adev.id = id; + sf_dev->adev.name = MLX5_SF_DEV_ID_NAME; + sf_dev->adev.dev.release = mlx5_sf_dev_release; + sf_dev->adev.dev.parent = &pdev->dev; + sf_dev->adev.dev.groups = sf_attr_groups; + sf_dev->sfnum = sfnum; + sf_dev->parent_mdev = dev; + + if (!table->max_sfs) { + mlx5_adev_idx_free(id); + kfree(sf_dev); + err = -EOPNOTSUPP; + goto add_err; + } + sf_dev->bar_base_addr = table->base_address + (sf_index * table->sf_bar_length); + + err = auxiliary_device_init(&sf_dev->adev); + if (err) { + mlx5_adev_idx_free(id); + kfree(sf_dev); + goto add_err; + } + + err = auxiliary_device_add(&sf_dev->adev); + if (err) { + put_device(&sf_dev->adev.dev); + goto add_err; + } + + err = xa_insert(&table->devices, sf_index, sf_dev, GFP_KERNEL); + if (err) + goto xa_err; + return; + +xa_err: + mlx5_sf_dev_remove(sf_dev); +add_err: + mlx5_core_err(dev, "SF DEV: fail device add for index=%d sfnum=%d err=%d\n", + sf_index, sfnum, err); +} + +static void mlx5_sf_dev_del(struct mlx5_core_dev *dev, struct mlx5_sf_dev *sf_dev, u16 sf_index) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + + xa_erase(&table->devices, sf_index); + mlx5_sf_dev_remove(sf_dev); +} + +static int +mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_code, void *data) +{ + struct mlx5_sf_dev_table *table = container_of(nb, struct mlx5_sf_dev_table, nb); + const struct mlx5_vhca_state_event *event = data; + struct mlx5_sf_dev *sf_dev; + u16 sf_index; + + sf_index = event->function_id - MLX5_CAP_GEN(table->dev, sf_base_id); + sf_dev = xa_load(&table->devices, sf_index); + switch (event->new_vhca_state) { + case MLX5_VHCA_STATE_ALLOCATED: + if (sf_dev) + mlx5_sf_dev_del(table->dev, sf_dev, sf_index); + break; + case MLX5_VHCA_STATE_TEARDOWN_REQUEST: + if (sf_dev) + mlx5_sf_dev_del(table->dev, sf_dev, sf_index); + else + mlx5_core_err(table->dev, + "SF DEV: teardown state for invalid dev index=%d fn_id=0x%x\n", + sf_index, event->sw_function_id); + break; + case MLX5_VHCA_STATE_ACTIVE: + if (!sf_dev) + mlx5_sf_dev_add(table->dev, sf_index, event->sw_function_id); + break; + default: + break; + } + return 0; +} + +static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table) +{ + struct mlx5_core_dev *dev = table->dev; + u16 max_functions; + u16 function_id; + int err = 0; + bool ecpu; + int i; + + max_functions = mlx5_sf_max_functions(dev); + function_id = MLX5_CAP_GEN(dev, sf_base_id); + ecpu = mlx5_read_embedded_cpu(dev); + /* Arm the vhca context as the vhca event notifier */ + for (i = 0; i < max_functions; i++) { + err = mlx5_vhca_event_arm(dev, function_id, ecpu); + if (err) + return err; + + function_id++; + } + return 0; +} + +void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_dev_table *table; + unsigned int max_sfs; + int err; + + if (!mlx5_sf_dev_supported(dev) || !mlx5_vhca_event_supported(dev)) + return; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) { + err = -ENOMEM; + goto table_err; + } + + table->nb.notifier_call = mlx5_sf_dev_state_change_handler; + table->dev = dev; + if (MLX5_CAP_GEN(dev, max_num_sf)) + max_sfs = MLX5_CAP_GEN(dev, max_num_sf); + else + max_sfs = 1 << MLX5_CAP_GEN(dev, log_max_sf); + table->sf_bar_length = 1 << (MLX5_CAP_GEN(dev, log_min_sf_size) + 12); + table->base_address = pci_resource_start(dev->pdev, 2); + table->max_sfs = max_sfs; + xa_init(&table->devices); + dev->priv.sf_dev_table = table; + + err = mlx5_vhca_event_notifier_register(dev, &table->nb); + if (err) + goto vhca_err; + err = mlx5_sf_dev_vhca_arm_all(table); + if (err) + goto arm_err; + mlx5_core_dbg(dev, "SF DEV: max sf devices=%d\n", max_sfs); + return; + +arm_err: + mlx5_vhca_event_notifier_unregister(dev, &table->nb); +vhca_err: + table->max_sfs = 0; + kfree(table); + dev->priv.sf_dev_table = NULL; +table_err: + mlx5_core_err(dev, "SF DEV table create err = %d\n", err); +} + +static void mlx5_sf_dev_destroy_all(struct mlx5_sf_dev_table *table) +{ + struct mlx5_sf_dev *sf_dev; + unsigned long index; + + xa_for_each(&table->devices, index, sf_dev) { + xa_erase(&table->devices, index); + mlx5_sf_dev_remove(sf_dev); + } +} + +void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + + if (!table) + return; + + mlx5_vhca_event_notifier_unregister(dev, &table->nb); + + /* Now that event handler is not running, it is safe to destroy + * the sf device without race. + */ + mlx5_sf_dev_destroy_all(table); + + WARN_ON(!xa_empty(&table->devices)); + kfree(table); + dev->priv.sf_dev_table = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h new file mode 100644 index 0000000000000..a6fb7289ba2c8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_SF_DEV_H__ +#define __MLX5_SF_DEV_H__ + +#ifdef CONFIG_MLX5_SF + +#include + +#define MLX5_SF_DEV_ID_NAME "sf" + +struct mlx5_sf_dev { + struct auxiliary_device adev; + struct mlx5_core_dev *parent_mdev; + phys_addr_t bar_base_addr; + u32 sfnum; +}; + +void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev); +void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev); + +#else + +static inline void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) +{ +} + +static inline void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev) +{ +} + +#endif + +#endif diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ffba0786051e6..08e5fbe97df06 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -508,6 +508,7 @@ struct mlx5_fw_reset; struct mlx5_eq_table; struct mlx5_irq_table; struct mlx5_vhca_state_notifier; +struct mlx5_sf_dev_table; struct mlx5_rate_limit { u32 rate; @@ -606,6 +607,7 @@ struct mlx5_priv { struct mlx5_uars_page *uar; #ifdef CONFIG_MLX5_SF struct mlx5_vhca_state_notifier *vhca_state_notifier; + struct mlx5_sf_dev_table *sf_dev_table; #endif }; -- GitLab From 1958fc2f0712ae771bfd2351b55e5a5b6b0bcfa4 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:18 -0800 Subject: [PATCH 1266/2012] net/mlx5: SF, Add auxiliary device driver Add auxiliary device driver for mlx5 subfunction auxiliary device. A mlx5 subfunction is similar to PCI PF and VF. For a subfunction an auxiliary device is created. As a result, when mlx5 SF auxiliary device binds to the driver, its netdev and rdma device are created, they appear as $ ls -l /sys/bus/auxiliary/devices/ mlx5_core.sf.4 -> ../../../devices/pci0000:00/0000:00:03.0/0000:06:00.0/mlx5_core.sf.4 $ ls -l /sys/class/net/eth1/device /sys/class/net/eth1/device -> ../../../mlx5_core.sf.4 $ cat /sys/bus/auxiliary/devices/mlx5_core.sf.4/sfnum 88 $ devlink dev show pci/0000:06:00.0 auxiliary/mlx5_core.sf.4 $ devlink port show auxiliary/mlx5_core.sf.4/1 auxiliary/mlx5_core.sf.4/1: type eth netdev p0sf88 flavour virtual port 0 splittable false $ rdma link show mlx5_0/1 link mlx5_0/1 state ACTIVE physical_state LINK_UP netdev p0sf88 $ rdma dev show 8: rocep6s0f1: node_type ca fw 16.29.0550 node_guid 248a:0703:00b3:d113 sys_image_guid 248a:0703:00b3:d112 13: mlx5_0: node_type ca fw 16.29.0550 node_guid 0000:00ff:fe00:8888 sys_image_guid 248a:0703:00b3:d112 In future, devlink device instance name will adapt to have sfnum annotation using either an alias or as devlink instance name described in RFC [1]. [1] https://lore.kernel.org/netdev/20200519092258.GF4655@nanopsycho/ Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../net/ethernet/mellanox/mlx5/core/devlink.c | 12 +++ drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- .../net/ethernet/mellanox/mlx5/core/main.c | 12 ++- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 10 ++ .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 20 ++++ .../ethernet/mellanox/mlx5/core/sf/dev/dev.c | 10 ++ .../ethernet/mellanox/mlx5/core/sf/dev/dev.h | 20 ++++ .../mellanox/mlx5/core/sf/dev/driver.c | 101 ++++++++++++++++++ include/linux/mlx5/driver.h | 4 +- 10 files changed, 187 insertions(+), 6 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index f5b2e91013480..43fa5efd403cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -89,4 +89,4 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o # # SF device # -mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o +mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 3261d0dc11044..9afe918c5827c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -7,6 +7,7 @@ #include "fw_reset.h" #include "fs_core.h" #include "eswitch.h" +#include "sf/dev/dev.h" static int mlx5_devlink_flash_update(struct devlink *devlink, struct devlink_flash_update_params *params, @@ -127,6 +128,17 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); + bool sf_dev_allocated; + + sf_dev_allocated = mlx5_sf_dev_allocated(dev); + if (sf_dev_allocated) { + /* Reload results in deleting SF device which further results in + * unregistering devlink instance while holding devlink_mutext. + * Hence, do not support reload. + */ + NL_SET_ERR_MSG_MOD(extack, "reload is unsupported when SFs are allocated\n"); + return -EOPNOTSUPP; + } switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 421febebc658f..174dfbc996c61 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -467,7 +467,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); - eq_table->irq_table = dev->priv.irq_table; + eq_table->irq_table = mlx5_irq_table_get(dev); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 26b5502712a62..ab68320e55167 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -84,7 +84,6 @@ unsigned int mlx5_core_debug_mask; module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644); MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0"); -#define MLX5_DEFAULT_PROF 2 static unsigned int prof_sel = MLX5_DEFAULT_PROF; module_param_named(prof_sel, prof_sel, uint, 0444); MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); @@ -1303,7 +1302,7 @@ out: mutex_unlock(&dev->intf_state_mutex); } -static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) +int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) { struct mlx5_priv *priv = &dev->priv; int err; @@ -1353,7 +1352,7 @@ err_health_init: return err; } -static void mlx5_mdev_uninit(struct mlx5_core_dev *dev) +void mlx5_mdev_uninit(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; @@ -1696,6 +1695,10 @@ static int __init init(void) if (err) goto err_debug; + err = mlx5_sf_driver_register(); + if (err) + goto err_sf; + #ifdef CONFIG_MLX5_CORE_EN err = mlx5e_init(); if (err) { @@ -1706,6 +1709,8 @@ static int __init init(void) return 0; +err_sf: + pci_unregister_driver(&mlx5_core_driver); err_debug: mlx5_unregister_debugfs(); return err; @@ -1716,6 +1721,7 @@ static void __exit cleanup(void) #ifdef CONFIG_MLX5_CORE_EN mlx5e_cleanup(); #endif + mlx5_sf_driver_unregister(); pci_unregister_driver(&mlx5_core_driver); mlx5_unregister_debugfs(); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index a33b7496d748b..3754ef98554f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -117,6 +117,8 @@ enum mlx5_semaphore_space_address { MLX5_SEMAPHORE_SW_RESET = 0x20, }; +#define MLX5_DEFAULT_PROF 2 + int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init(struct mlx5_core_dev *dev); @@ -176,6 +178,7 @@ struct cpumask * mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx); struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table); int mlx5_irq_get_num_comp(struct mlx5_irq_table *table); +struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev); int mlx5_events_init(struct mlx5_core_dev *dev); void mlx5_events_cleanup(struct mlx5_core_dev *dev); @@ -257,6 +260,13 @@ enum { u8 mlx5_get_nic_state(struct mlx5_core_dev *dev); void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state); +static inline bool mlx5_core_is_sf(const struct mlx5_core_dev *dev) +{ + return dev->coredev_type == MLX5_COREDEV_SF; +} + +int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx); +void mlx5_mdev_uninit(struct mlx5_core_dev *dev); void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup); int mlx5_load_one(struct mlx5_core_dev *dev, bool boot); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 6fd9749203944..a61e09aff1523 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -30,6 +30,9 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev) { struct mlx5_irq_table *irq_table; + if (mlx5_core_is_sf(dev)) + return 0; + irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL); if (!irq_table) return -ENOMEM; @@ -40,6 +43,9 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev) void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) { + if (mlx5_core_is_sf(dev)) + return; + kvfree(dev->priv.irq_table); } @@ -268,6 +274,9 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev) int nvec; int err; + if (mlx5_core_is_sf(dev)) + return 0; + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + MLX5_IRQ_VEC_COMP_BASE; nvec = min_t(int, nvec, num_eqs); @@ -319,6 +328,9 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) struct mlx5_irq_table *table = dev->priv.irq_table; int i; + if (mlx5_core_is_sf(dev)) + return; + /* free_irq requires that affinity and rmap will be cleared * before calling it. This is why there is asymmetry with set_rmap * which should be called after alloc_irq but before request_irq. @@ -332,3 +344,11 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) kfree(table->irq); } +struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_MLX5_SF + if (mlx5_core_is_sf(dev)) + return dev->priv.parent_mdev->priv.irq_table; +#endif + return dev->priv.irq_table; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c index 4a8eeb7c853ee..b265f27b2166d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c @@ -24,6 +24,16 @@ static bool mlx5_sf_dev_supported(const struct mlx5_core_dev *dev) return MLX5_CAP_GEN(dev, sf) && mlx5_vhca_event_supported(dev); } +bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev) +{ + struct mlx5_sf_dev_table *table = dev->priv.sf_dev_table; + + if (!mlx5_sf_dev_supported(dev)) + return false; + + return !xa_empty(&table->devices); +} + static ssize_t sfnum_show(struct device *dev, struct device_attribute *attr, char *buf) { struct auxiliary_device *adev = container_of(dev, struct auxiliary_device, dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h index a6fb7289ba2c8..4de02902aef11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.h @@ -13,6 +13,7 @@ struct mlx5_sf_dev { struct auxiliary_device adev; struct mlx5_core_dev *parent_mdev; + struct mlx5_core_dev *mdev; phys_addr_t bar_base_addr; u32 sfnum; }; @@ -20,6 +21,11 @@ struct mlx5_sf_dev { void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev); void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev); +int mlx5_sf_driver_register(void); +void mlx5_sf_driver_unregister(void); + +bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev); + #else static inline void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) @@ -30,6 +36,20 @@ static inline void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev) { } +static inline int mlx5_sf_driver_register(void) +{ + return 0; +} + +static inline void mlx5_sf_driver_unregister(void) +{ +} + +static inline bool mlx5_sf_dev_allocated(const struct mlx5_core_dev *dev) +{ + return 0; +} + #endif #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c new file mode 100644 index 0000000000000..daf63a8115e07 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include +#include +#include "mlx5_core.h" +#include "dev.h" +#include "devlink.h" + +static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) +{ + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + struct mlx5_core_dev *mdev; + struct devlink *devlink; + int err; + + devlink = mlx5_devlink_alloc(); + if (!devlink) + return -ENOMEM; + + mdev = devlink_priv(devlink); + mdev->device = &adev->dev; + mdev->pdev = sf_dev->parent_mdev->pdev; + mdev->bar_addr = sf_dev->bar_base_addr; + mdev->iseg_base = sf_dev->bar_base_addr; + mdev->coredev_type = MLX5_COREDEV_SF; + mdev->priv.parent_mdev = sf_dev->parent_mdev; + mdev->priv.adev_idx = adev->id; + sf_dev->mdev = mdev; + + err = mlx5_mdev_init(mdev, MLX5_DEFAULT_PROF); + if (err) { + mlx5_core_warn(mdev, "mlx5_mdev_init on err=%d\n", err); + goto mdev_err; + } + + mdev->iseg = ioremap(mdev->iseg_base, sizeof(*mdev->iseg)); + if (!mdev->iseg) { + mlx5_core_warn(mdev, "remap error\n"); + goto remap_err; + } + + err = mlx5_load_one(mdev, true); + if (err) { + mlx5_core_warn(mdev, "mlx5_load_one err=%d\n", err); + goto load_one_err; + } + return 0; + +load_one_err: + iounmap(mdev->iseg); +remap_err: + mlx5_mdev_uninit(mdev); +mdev_err: + mlx5_devlink_free(devlink); + return err; +} + +static void mlx5_sf_dev_remove(struct auxiliary_device *adev) +{ + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + struct devlink *devlink; + + devlink = priv_to_devlink(sf_dev->mdev); + mlx5_unload_one(sf_dev->mdev, true); + iounmap(sf_dev->mdev->iseg); + mlx5_mdev_uninit(sf_dev->mdev); + mlx5_devlink_free(devlink); +} + +static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev) +{ + struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); + + mlx5_unload_one(sf_dev->mdev, false); +} + +static const struct auxiliary_device_id mlx5_sf_dev_id_table[] = { + { .name = MLX5_ADEV_NAME "." MLX5_SF_DEV_ID_NAME, }, + { }, +}; + +MODULE_DEVICE_TABLE(auxiliary, mlx5_sf_dev_id_table); + +static struct auxiliary_driver mlx5_sf_driver = { + .name = MLX5_SF_DEV_ID_NAME, + .probe = mlx5_sf_dev_probe, + .remove = mlx5_sf_dev_remove, + .shutdown = mlx5_sf_dev_shutdown, + .id_table = mlx5_sf_dev_id_table, +}; + +int mlx5_sf_driver_register(void) +{ + return auxiliary_driver_register(&mlx5_sf_driver); +} + +void mlx5_sf_driver_unregister(void) +{ + auxiliary_driver_unregister(&mlx5_sf_driver); +} diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 08e5fbe97df06..48e3638b1185b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -193,7 +193,8 @@ enum port_state_policy { enum mlx5_coredev_type { MLX5_COREDEV_PF, - MLX5_COREDEV_VF + MLX5_COREDEV_VF, + MLX5_COREDEV_SF, }; struct mlx5_field_desc { @@ -608,6 +609,7 @@ struct mlx5_priv { #ifdef CONFIG_MLX5_SF struct mlx5_vhca_state_notifier *vhca_state_notifier; struct mlx5_sf_dev_table *sf_dev_table; + struct mlx5_core_dev *parent_mdev; #endif }; -- GitLab From d7f33a457beef8d522f346d18ab0a1e3366dc20f Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Fri, 11 Dec 2020 22:12:19 -0800 Subject: [PATCH 1267/2012] net/mlx5: E-switch, Prepare eswitch to handle SF vport Prepare eswitch to handle SF vport during (a) querying eswitch functions (b) egress ACL creation (c) account for SF vports in total vports calculation Assign a dedicated placeholder for SFs vports and their representors. They are placed after VFs vports and before ECPF vports as below: [PF,VF0,...,VFn,SF0,...SFm,ECPF,UPLINK]. Change functions to map SF's vport numbers to indices when accessing the vports or representors arrays, and vice versa. Signed-off-by: Vu Pham Signed-off-by: Parav Pandit Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/esw/acl/egress_ofld.c | 2 +- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 11 +++- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 50 +++++++++++++++++++ .../mellanox/mlx5/core/eswitch_offloads.c | 11 ++++ .../net/ethernet/mellanox/mlx5/core/vport.c | 3 +- 5 files changed, 73 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c index 4c74e2690d57b..26b37a0f87629 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c @@ -150,7 +150,7 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport) static bool esw_acl_egress_needed(const struct mlx5_eswitch *esw, u16 vport_num) { - return mlx5_eswitch_is_vf_vport(esw, vport_num); + return mlx5_eswitch_is_vf_vport(esw, vport_num) || mlx5_esw_is_sf_vport(esw, vport_num); } int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 876e6449edb33..55ebd9474d97d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1365,9 +1365,15 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) { int outlen = MLX5_ST_SZ_BYTES(query_esw_functions_out); u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {}; + u16 max_sf_vports; u32 *out; int err; + max_sf_vports = mlx5_sf_max_functions(dev); + /* Device interface is array of 64-bits */ + if (max_sf_vports) + outlen += DIV_ROUND_UP(max_sf_vports, BITS_PER_TYPE(__be64)) * sizeof(__be64); + out = kvzalloc(outlen, GFP_KERNEL); if (!out) return ERR_PTR(-ENOMEM); @@ -1375,7 +1381,7 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) MLX5_SET(query_esw_functions_in, in, opcode, MLX5_CMD_OP_QUERY_ESW_FUNCTIONS); - err = mlx5_cmd_exec_inout(dev, query_esw_functions, in, out); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); if (!err) return out; @@ -1898,7 +1904,8 @@ static bool is_port_function_supported(const struct mlx5_eswitch *esw, u16 vport_num) { return vport_num == MLX5_VPORT_PF || - mlx5_eswitch_is_vf_vport(esw, vport_num); + mlx5_eswitch_is_vf_vport(esw, vport_num) || + mlx5_esw_is_sf_vport(esw, vport_num); } int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index cf87de94418ff..4e3ed878ff031 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -43,6 +43,7 @@ #include #include "lib/mpfs.h" #include "lib/fs_chains.h" +#include "sf/sf.h" #include "en/tc_ct.h" #ifdef CONFIG_MLX5_ESWITCH @@ -499,6 +500,40 @@ static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev) MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF; } +static inline int mlx5_esw_sf_start_idx(const struct mlx5_eswitch *esw) +{ + /* PF and VF vports indices start from 0 to max_vfs */ + return MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev); +} + +static inline int mlx5_esw_sf_end_idx(const struct mlx5_eswitch *esw) +{ + return mlx5_esw_sf_start_idx(esw) + mlx5_sf_max_functions(esw->dev); +} + +static inline int +mlx5_esw_sf_vport_num_to_index(const struct mlx5_eswitch *esw, u16 vport_num) +{ + return vport_num - mlx5_sf_start_function_id(esw->dev) + + MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev); +} + +static inline u16 +mlx5_esw_sf_vport_index_to_num(const struct mlx5_eswitch *esw, int idx) +{ + return mlx5_sf_start_function_id(esw->dev) + idx - + (MLX5_VPORT_PF_PLACEHOLDER + mlx5_core_max_vfs(esw->dev)); +} + +static inline bool +mlx5_esw_is_sf_vport(const struct mlx5_eswitch *esw, u16 vport_num) +{ + return mlx5_sf_supported(esw->dev) && + vport_num >= mlx5_sf_start_function_id(esw->dev) && + (vport_num < (mlx5_sf_start_function_id(esw->dev) + + mlx5_sf_max_functions(esw->dev))); +} + static inline bool mlx5_eswitch_is_funcs_handler(const struct mlx5_core_dev *dev) { return mlx5_core_is_ecpf_esw_manager(dev); @@ -527,6 +562,10 @@ static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw, if (vport_num == MLX5_VPORT_UPLINK) return mlx5_eswitch_uplink_idx(esw); + if (mlx5_esw_is_sf_vport(esw, vport_num)) + return mlx5_esw_sf_vport_num_to_index(esw, vport_num); + + /* PF and VF vports start from 0 to max_vfs */ return vport_num; } @@ -540,6 +579,12 @@ static inline u16 mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw, if (index == mlx5_eswitch_uplink_idx(esw)) return MLX5_VPORT_UPLINK; + /* SF vports indices are after VFs and before ECPF */ + if (mlx5_sf_supported(esw->dev) && + index > mlx5_core_max_vfs(esw->dev)) + return mlx5_esw_sf_vport_index_to_num(esw, index); + + /* PF and VF vports start from 0 to max_vfs */ return index; } @@ -625,6 +670,11 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); for ((vport) = (nvfs); \ (vport) >= (esw)->first_host_vport; (vport)--) +#define mlx5_esw_for_each_sf_rep(esw, i, rep) \ + for ((i) = mlx5_esw_sf_start_idx(esw); \ + (rep) = &(esw)->offloads.vport_reps[(i)], \ + (i) < mlx5_esw_sf_end_idx(esw); (i++)) + struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink); struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 2f6a0ae206503..2d241f7351b5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1800,11 +1800,22 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, esw->offloads.rep_ops[rep_type]->unload(rep); } +static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type) +{ + struct mlx5_eswitch_rep *rep; + int i; + + mlx5_esw_for_each_sf_rep(esw, i, rep) + __esw_offloads_unload_rep(esw, rep, rep_type); +} + static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; int i; + __unload_reps_sf_vport(esw, rep_type); + mlx5_esw_for_each_vf_rep_reverse(esw, i, rep, esw->esw_funcs.num_vfs) __esw_offloads_unload_rep(esw, rep, rep_type); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index bdafc85fd874d..ba78e0660523c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -36,6 +36,7 @@ #include #include #include "mlx5_core.h" +#include "sf/sf.h" /* Mutex to hold while enabling or disabling RoCE */ static DEFINE_MUTEX(mlx5_roce_en_lock); @@ -1160,6 +1161,6 @@ EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); */ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) { - return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev); + return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev) + mlx5_sf_max_functions(dev); } EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports); -- GitLab From d970812b91d0fa685cde35e9b3f46a48d049f4e3 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:20 -0800 Subject: [PATCH 1268/2012] net/mlx5: E-switch, Add eswitch helpers for SF vport Add helpers to enable/disable eswitch port, register its devlink port and load its representor. Signed-off-by: Vu Pham Signed-off-by: Parav Pandit Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/esw/devlink_port.c | 41 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/eswitch.c | 12 +++--- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 16 ++++++++ .../mellanox/mlx5/core/eswitch_offloads.c | 36 +++++++++++++++- 4 files changed, 97 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index ffff11baa3d04..cb1e181f4c6ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -122,3 +122,44 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1 vport = mlx5_eswitch_get_vport(esw, vport_num); return vport->dl_port; } + +int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, + u16 vport_num, u32 sfnum) +{ + struct mlx5_core_dev *dev = esw->dev; + struct netdev_phys_item_id ppid = {}; + unsigned int dl_port_index; + struct mlx5_vport *vport; + struct devlink *devlink; + u16 pfnum; + int err; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + pfnum = PCI_FUNC(dev->pdev->devfn); + mlx5_esw_get_port_parent_id(dev, &ppid); + memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_sf_set(dl_port, 0, pfnum, sfnum); + devlink = priv_to_devlink(dev); + dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num); + err = devlink_port_register(devlink, dl_port, dl_port_index); + if (err) + return err; + + vport->dl_port = dl_port; + return 0; +} + +void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return; + devlink_port_unregister(vport->dl_port); + vport->dl_port = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 55ebd9474d97d..538d2e44a589f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1272,8 +1272,8 @@ static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport esw_vport_cleanup_acl(esw, vport); } -static int esw_enable_vport(struct mlx5_eswitch *esw, u16 vport_num, - enum mlx5_eswitch_vport_event enabled_events) +int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events) { struct mlx5_vport *vport; int ret; @@ -1309,7 +1309,7 @@ done: return ret; } -static void esw_disable_vport(struct mlx5_eswitch *esw, u16 vport_num) +void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_vport *vport; @@ -1431,7 +1431,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, { int err; - err = esw_enable_vport(esw, vport_num, enabled_events); + err = mlx5_esw_vport_enable(esw, vport_num, enabled_events); if (err) return err; @@ -1442,14 +1442,14 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, return err; err_rep: - esw_disable_vport(esw, vport_num); + mlx5_esw_vport_disable(esw, vport_num); return err; } void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num) { esw_offloads_unload_rep(esw, vport_num); - esw_disable_vport(esw, vport_num); + mlx5_esw_vport_disable(esw, vport_num); } void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 4e3ed878ff031..54514b04808d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -688,6 +688,10 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, enum mlx5_eswitch_vport_event enabled_events); void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw); +int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num, + enum mlx5_eswitch_vport_event enabled_events); +void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num); + int esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport); @@ -706,6 +710,9 @@ esw_get_max_restore_tag(struct mlx5_eswitch *esw); int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num); void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num); +void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num); + int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, enum mlx5_eswitch_vport_event enabled_events); void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num); @@ -717,6 +724,15 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs); int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num); void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, + u16 vport_num, u32 sfnum); +void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); + +int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port, + u16 vport_num, u32 sfnum); +void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 2d241f7351b5a..7f09f2bbf7c1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1833,7 +1833,7 @@ static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) __esw_offloads_unload_rep(esw, rep, rep_type); } -static int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num) +int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_eswitch_rep *rep; int rep_type; @@ -1857,7 +1857,7 @@ err_reps: return err; } -static void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num) +void mlx5_esw_offloads_rep_unload(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_eswitch_rep *rep; int rep_type; @@ -2835,3 +2835,35 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, return vport->metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS); } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); + +int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_port *dl_port, + u16 vport_num, u32 sfnum) +{ + int err; + + err = mlx5_esw_vport_enable(esw, vport_num, MLX5_VPORT_UC_ADDR_CHANGE); + if (err) + return err; + + err = mlx5_esw_devlink_sf_port_register(esw, dl_port, vport_num, sfnum); + if (err) + goto devlink_err; + + err = mlx5_esw_offloads_rep_load(esw, vport_num); + if (err) + goto rep_err; + return 0; + +rep_err: + mlx5_esw_devlink_sf_port_unregister(esw, vport_num); +devlink_err: + mlx5_esw_vport_disable(esw, vport_num); + return err; +} + +void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) +{ + mlx5_esw_offloads_rep_unload(esw, vport_num); + mlx5_esw_devlink_sf_port_unregister(esw, vport_num); + mlx5_esw_vport_disable(esw, vport_num); +} -- GitLab From 8f01054186683fe0986d54d584aa13723d51edce Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:21 -0800 Subject: [PATCH 1269/2012] net/mlx5: SF, Add port add delete functionality To handle SF port management outside of the eswitch as independent software layer, introduce eswitch notifier APIs so that mlx5 upper layer who wish to support sf port management in switchdev mode can perform its task whenever eswitch mode is set to switchdev or before eswitch is disabled. Initialize sf port table on such eswitch event. Add SF port add and delete functionality in switchdev mode. Destroy all SF ports when eswitch is disabled. Expose SF port add and delete to user via devlink commands. $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev $ devlink port show pci/0000:06:00.0/65535: type eth netdev ens2f0np0 flavour physical port 0 splittable false $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88 pci/0000:06:00.0/32768: type eth netdev eth6 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:00:00 state inactive opstate detached $ devlink port show ens2f0npf0sf88 pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:00:00 state inactive opstate detached or by its unique port index: $ devlink port show pci/0000:06:00.0/32768 pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:00:00 state inactive opstate detached $ devlink port show ens2f0npf0sf88 -jp { "port": { "pci/0000:06:00.0/32768": { "type": "eth", "netdev": "ens2f0npf0sf88", "flavour": "pcisf", "controller": 0, "pfnum": 0, "sfnum": 88, "external": false, "splittable": false, "function": { "hw_addr": "00:00:00:00:00:00", "state": "inactive", "opstate": "detached" } } } } Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Kconfig | 10 + .../net/ethernet/mellanox/mlx5/core/Makefile | 5 + drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 + .../net/ethernet/mellanox/mlx5/core/devlink.c | 5 + .../net/ethernet/mellanox/mlx5/core/eswitch.c | 25 ++ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 12 + .../net/ethernet/mellanox/mlx5/core/main.c | 18 + .../net/ethernet/mellanox/mlx5/core/sf/cmd.c | 27 ++ .../ethernet/mellanox/mlx5/core/sf/devlink.c | 316 ++++++++++++++++++ .../ethernet/mellanox/mlx5/core/sf/hw_table.c | 125 +++++++ .../net/ethernet/mellanox/mlx5/core/sf/priv.h | 17 + .../net/ethernet/mellanox/mlx5/core/sf/sf.h | 37 ++ include/linux/mlx5/driver.h | 6 + 13 files changed, 607 insertions(+) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index d6c48582e7a8f..ad45d20f9d44d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -212,3 +212,13 @@ config MLX5_SF Build support for subfuction device in the NIC. A Mellanox subfunction device can support RDMA, netdevice and vdpa device. It is similar to a SRIOV VF but it doesn't require SRIOV support. + +config MLX5_SF_MANAGER + bool + depends on MLX5_SF && MLX5_ESWITCH + default y + help + Build support for subfuction port in the NIC. A Mellanox subfunction + port is managed through devlink. A subfunction supports RDMA, netdevice + and vdpa device. It is similar to a SRIOV VF but it doesn't require + SRIOV support. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 43fa5efd403cb..40ed31574c80c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -90,3 +90,8 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o # SF device # mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o + +# +# SF manager +# +mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o sf/hw_table.o sf/devlink.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 47dcc3ac2cf04..e8cecd50558d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -333,6 +333,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DEALLOC_MEMIC: case MLX5_CMD_OP_PAGE_FAULT_RESUME: case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: + case MLX5_CMD_OP_DEALLOC_SF: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -466,6 +467,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_RELEASE_XRQ_ERROR: case MLX5_CMD_OP_QUERY_VHCA_STATE: case MLX5_CMD_OP_MODIFY_VHCA_STATE: + case MLX5_CMD_OP_ALLOC_SF: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -661,6 +663,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(MODIFY_XRQ); MLX5_COMMAND_STR_CASE(QUERY_VHCA_STATE); MLX5_COMMAND_STR_CASE(MODIFY_VHCA_STATE); + MLX5_COMMAND_STR_CASE(ALLOC_SF); + MLX5_COMMAND_STR_CASE(DEALLOC_SF); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 9afe918c5827c..d4c0cdf5edd9d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -8,6 +8,7 @@ #include "fs_core.h" #include "eswitch.h" #include "sf/dev/dev.h" +#include "sf/sf.h" static int mlx5_devlink_flash_update(struct devlink *devlink, struct devlink_flash_update_params *params, @@ -190,6 +191,10 @@ static const struct devlink_ops mlx5_devlink_ops = { .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, .port_function_hw_addr_get = mlx5_devlink_port_function_hw_addr_get, .port_function_hw_addr_set = mlx5_devlink_port_function_hw_addr_set, +#endif +#ifdef CONFIG_MLX5_SF_MANAGER + .port_new = mlx5_devlink_sf_port_new, + .port_del = mlx5_devlink_sf_port_del, #endif .flash_update = mlx5_devlink_flash_update, .info_get = mlx5_devlink_info_get, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 538d2e44a589f..820305b1664e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1599,6 +1599,15 @@ mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) kvfree(out); } +static void mlx5_esw_mode_change_notify(struct mlx5_eswitch *esw, u16 mode) +{ + struct mlx5_esw_event_info info = {}; + + info.new_mode = mode; + + blocking_notifier_call_chain(&esw->n_head, 0, &info); +} + /** * mlx5_eswitch_enable_locked - Enable eswitch * @esw: Pointer to eswitch @@ -1659,6 +1668,8 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", esw->esw_funcs.num_vfs, esw->enabled_vports); + mlx5_esw_mode_change_notify(esw, mode); + return 0; abort: @@ -1715,6 +1726,11 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", esw->esw_funcs.num_vfs, esw->enabled_vports); + /* Notify eswitch users that it is exiting from current mode. + * So that it can do necessary cleanup before the eswitch is disabled. + */ + mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_NONE); + mlx5_eswitch_event_handlers_unregister(esw); if (esw->mode == MLX5_ESWITCH_LEGACY) @@ -1815,6 +1831,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; dev->priv.eswitch = esw; + BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head); return 0; abort: if (esw->work_queue) @@ -2506,4 +2523,12 @@ bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS); } +int mlx5_esw_event_notifier_register(struct mlx5_eswitch *esw, struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&esw->n_head, nb); +} +void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&esw->n_head, nb); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 54514b04808d0..479d2ac2cd855 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -278,6 +278,7 @@ struct mlx5_eswitch { struct { u32 large_group_num; } params; + struct blocking_notifier_head n_head; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -733,6 +734,17 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p u16 vport_num, u32 sfnum); void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num); +/** + * mlx5_esw_event_info - Indicates eswitch mode changed/changing. + * + * @new_mode: New mode of eswitch. + */ +struct mlx5_esw_event_info { + u16 new_mode; +}; + +int mlx5_esw_event_notifier_register(struct mlx5_eswitch *esw, struct notifier_block *n); +void mlx5_esw_event_notifier_unregister(struct mlx5_eswitch *esw, struct notifier_block *n); #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index ab68320e55167..d6bd09dd7490f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -893,6 +893,18 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_fpga_cleanup; } + err = mlx5_sf_hw_table_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init SF HW table %d\n", err); + goto err_sf_hw_table_cleanup; + } + + err = mlx5_sf_table_init(dev); + if (err) { + mlx5_core_err(dev, "Failed to init SF table %d\n", err); + goto err_sf_table_cleanup; + } + dev->dm = mlx5_dm_create(dev); if (IS_ERR(dev->dm)) mlx5_core_warn(dev, "Failed to init device memory%d\n", err); @@ -903,6 +915,10 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) return 0; +err_sf_table_cleanup: + mlx5_sf_hw_table_cleanup(dev); +err_sf_hw_table_cleanup: + mlx5_vhca_event_cleanup(dev); err_fpga_cleanup: mlx5_fpga_cleanup(dev); err_eswitch_cleanup: @@ -936,6 +952,8 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_hv_vhca_destroy(dev->hv_vhca); mlx5_fw_tracer_destroy(dev->tracer); mlx5_dm_cleanup(dev); + mlx5_sf_table_cleanup(dev); + mlx5_sf_hw_table_cleanup(dev); mlx5_vhca_event_cleanup(dev); mlx5_fpga_cleanup(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c new file mode 100644 index 0000000000000..0bc3075f34faa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include +#include "priv.h" + +int mlx5_cmd_alloc_sf(struct mlx5_core_dev *dev, u16 function_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_sf_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_sf_in)] = {}; + + MLX5_SET(alloc_sf_in, in, opcode, MLX5_CMD_OP_ALLOC_SF); + MLX5_SET(alloc_sf_in, in, function_id, function_id); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id) +{ + u32 out[MLX5_ST_SZ_DW(dealloc_sf_out)] = {}; + u32 in[MLX5_ST_SZ_DW(dealloc_sf_in)] = {}; + + MLX5_SET(dealloc_sf_in, in, opcode, MLX5_CMD_OP_DEALLOC_SF); + MLX5_SET(dealloc_sf_in, in, function_id, function_id); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c new file mode 100644 index 0000000000000..7ad0d210ec300 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#include +#include "eswitch.h" +#include "priv.h" + +struct mlx5_sf { + struct devlink_port dl_port; + unsigned int port_index; + u16 id; +}; + +struct mlx5_sf_table { + struct mlx5_core_dev *dev; /* To refer from notifier context. */ + struct xarray port_indices; /* port index based lookup. */ + refcount_t refcount; + struct completion disable_complete; + struct notifier_block esw_nb; +}; + +static struct mlx5_sf * +mlx5_sf_lookup_by_index(struct mlx5_sf_table *table, unsigned int port_index) +{ + return xa_load(&table->port_indices, port_index); +} + +static int mlx5_sf_id_insert(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + return xa_insert(&table->port_indices, sf->port_index, sf, GFP_KERNEL); +} + +static void mlx5_sf_id_erase(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + xa_erase(&table->port_indices, sf->port_index); +} + +static struct mlx5_sf * +mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *extack) +{ + unsigned int dl_port_index; + struct mlx5_sf *sf; + u16 hw_fn_id; + int id_err; + int err; + + id_err = mlx5_sf_hw_table_sf_alloc(table->dev, sfnum); + if (id_err < 0) { + err = id_err; + goto id_err; + } + + sf = kzalloc(sizeof(*sf), GFP_KERNEL); + if (!sf) { + err = -ENOMEM; + goto alloc_err; + } + sf->id = id_err; + hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, sf->id); + dl_port_index = mlx5_esw_vport_to_devlink_port_index(table->dev, hw_fn_id); + sf->port_index = dl_port_index; + + err = mlx5_sf_id_insert(table, sf); + if (err) + goto insert_err; + + return sf; + +insert_err: + kfree(sf); +alloc_err: + mlx5_sf_hw_table_sf_free(table->dev, id_err); +id_err: + if (err == -EEXIST) + NL_SET_ERR_MSG_MOD(extack, "SF already exist. Choose different sfnum"); + return ERR_PTR(err); +} + +static void mlx5_sf_free(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + mlx5_sf_id_erase(table, sf); + mlx5_sf_hw_table_sf_free(table->dev, sf->id); + kfree(sf); +} + +static struct mlx5_sf_table *mlx5_sf_table_try_get(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_table *table = dev->priv.sf_table; + + if (!table) + return NULL; + + return refcount_inc_not_zero(&table->refcount) ? table : NULL; +} + +static void mlx5_sf_table_put(struct mlx5_sf_table *table) +{ + if (refcount_dec_and_test(&table->refcount)) + complete(&table->disable_complete); +} + +static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, + const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack, + unsigned int *new_port_index) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_sf *sf; + u16 hw_fn_id; + int err; + + sf = mlx5_sf_alloc(table, new_attr->sfnum, extack); + if (IS_ERR(sf)) + return PTR_ERR(sf); + + hw_fn_id = mlx5_sf_sw_to_hw_id(dev, sf->id); + err = mlx5_esw_offloads_sf_vport_enable(esw, &sf->dl_port, hw_fn_id, new_attr->sfnum); + if (err) + goto esw_err; + *new_port_index = sf->port_index; + return 0; + +esw_err: + mlx5_sf_free(table, sf); + return err; +} + +static void mlx5_sf_del(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + struct mlx5_eswitch *esw = dev->priv.eswitch; + u16 hw_fn_id; + + hw_fn_id = mlx5_sf_sw_to_hw_id(dev, sf->id); + mlx5_esw_offloads_sf_vport_disable(esw, hw_fn_id); + mlx5_sf_free(table, sf); +} + +static int +mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack) +{ + if (new_attr->flavour != DEVLINK_PORT_FLAVOUR_PCI_SF) { + NL_SET_ERR_MSG_MOD(extack, "Driver supports only SF port addition"); + return -EOPNOTSUPP; + } + if (new_attr->port_index_valid) { + NL_SET_ERR_MSG_MOD(extack, + "Driver does not support user defined port index assignment"); + return -EOPNOTSUPP; + } + if (!new_attr->sfnum_valid) { + NL_SET_ERR_MSG_MOD(extack, + "User must provide unique sfnum. Driver does not support auto assignment"); + return -EOPNOTSUPP; + } + if (new_attr->controller_valid && new_attr->controller) { + NL_SET_ERR_MSG_MOD(extack, "External controller is unsupported"); + return -EOPNOTSUPP; + } + if (new_attr->pfnum != PCI_FUNC(dev->pdev->devfn)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid pfnum supplied"); + return -EOPNOTSUPP; + } + return 0; +} + +int mlx5_devlink_sf_port_new(struct devlink *devlink, + const struct devlink_port_new_attrs *new_attr, + struct netlink_ext_ack *extack, + unsigned int *new_port_index) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_sf_table *table; + int err; + + err = mlx5_sf_new_check_attr(dev, new_attr, extack); + if (err) + return err; + + table = mlx5_sf_table_try_get(dev); + if (!table) { + NL_SET_ERR_MSG_MOD(extack, + "Port add is only supported in eswitch switchdev mode or SF ports are disabled."); + return -EOPNOTSUPP; + } + err = mlx5_sf_add(dev, table, new_attr, extack, new_port_index); + mlx5_sf_table_put(table); + return err; +} + +int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_sf_table *table; + struct mlx5_sf *sf; + int err = 0; + + table = mlx5_sf_table_try_get(dev); + if (!table) { + NL_SET_ERR_MSG_MOD(extack, + "Port del is only supported in eswitch switchdev mode or SF ports are disabled."); + return -EOPNOTSUPP; + } + sf = mlx5_sf_lookup_by_index(table, port_index); + if (!sf) { + err = -ENODEV; + goto sf_err; + } + + mlx5_sf_del(dev, table, sf); +sf_err: + mlx5_sf_table_put(table); + return err; +} + +static void mlx5_sf_destroy_all(struct mlx5_sf_table *table) +{ + struct mlx5_core_dev *dev = table->dev; + unsigned long index; + struct mlx5_sf *sf; + + xa_for_each(&table->port_indices, index, sf) + mlx5_sf_del(dev, table, sf); +} + +static void mlx5_sf_table_enable(struct mlx5_sf_table *table) +{ + if (!mlx5_sf_max_functions(table->dev)) + return; + + init_completion(&table->disable_complete); + refcount_set(&table->refcount, 1); +} + +static void mlx5_sf_table_disable(struct mlx5_sf_table *table) +{ + if (!mlx5_sf_max_functions(table->dev)) + return; + + if (!refcount_read(&table->refcount)) + return; + + /* Balances with refcount_set; drop the reference so that new user cmd cannot start. */ + mlx5_sf_table_put(table); + wait_for_completion(&table->disable_complete); + + /* At this point, no new user commands can start. + * It is safe to destroy all user created SFs. + */ + mlx5_sf_destroy_all(table); +} + +static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5_sf_table *table = container_of(nb, struct mlx5_sf_table, esw_nb); + const struct mlx5_esw_event_info *mode = data; + + switch (mode->new_mode) { + case MLX5_ESWITCH_OFFLOADS: + mlx5_sf_table_enable(table); + break; + case MLX5_ESWITCH_NONE: + mlx5_sf_table_disable(table); + break; + default: + break; + }; + + return 0; +} + +static bool mlx5_sf_table_supported(const struct mlx5_core_dev *dev) +{ + return dev->priv.eswitch && MLX5_ESWITCH_MANAGER(dev) && mlx5_sf_supported(dev); +} + +int mlx5_sf_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_table *table; + int err; + + if (!mlx5_sf_table_supported(dev)) + return 0; + + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + table->dev = dev; + xa_init(&table->port_indices); + dev->priv.sf_table = table; + table->esw_nb.notifier_call = mlx5_sf_esw_event; + err = mlx5_esw_event_notifier_register(dev->priv.eswitch, &table->esw_nb); + if (err) + goto reg_err; + return 0; + +reg_err: + kfree(table); + dev->priv.sf_table = NULL; + return err; +} + +void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_table *table = dev->priv.sf_table; + + if (!table) + return; + + mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb); + WARN_ON(refcount_read(&table->refcount)); + WARN_ON(!xa_empty(&table->port_indices)); + kfree(table); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c new file mode 100644 index 0000000000000..c7757f399e8a7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd */ +#include +#include "vhca_event.h" +#include "priv.h" +#include "sf.h" +#include "ecpf.h" + +struct mlx5_sf_hw { + u32 usr_sfnum; + u8 allocated: 1; +}; + +struct mlx5_sf_hw_table { + struct mlx5_core_dev *dev; + struct mlx5_sf_hw *sfs; + int max_local_functions; + u8 ecpu: 1; +}; + +u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id) +{ + return sw_id + mlx5_sf_start_function_id(dev); +} + +int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + int sw_id = -ENOSPC; + u16 hw_fn_id; + int err; + int i; + + if (!table->max_local_functions) + return -EOPNOTSUPP; + + /* Check if sf with same sfnum already exists or not. */ + for (i = 0; i < table->max_local_functions; i++) { + if (table->sfs[i].allocated && table->sfs[i].usr_sfnum == usr_sfnum) + return -EEXIST; + } + + /* Find the free entry and allocate the entry from the array */ + for (i = 0; i < table->max_local_functions; i++) { + if (!table->sfs[i].allocated) { + table->sfs[i].usr_sfnum = usr_sfnum; + table->sfs[i].allocated = true; + sw_id = i; + break; + } + } + if (sw_id == -ENOSPC) { + err = -ENOSPC; + goto err; + } + + hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, sw_id); + err = mlx5_cmd_alloc_sf(table->dev, hw_fn_id); + if (err) + goto err; + + err = mlx5_modify_vhca_sw_id(dev, hw_fn_id, table->ecpu, usr_sfnum); + if (err) + goto vhca_err; + + return sw_id; + +vhca_err: + mlx5_cmd_dealloc_sf(table->dev, hw_fn_id); +err: + table->sfs[i].allocated = false; + return err; +} + +void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + u16 hw_fn_id; + + hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, id); + mlx5_cmd_dealloc_sf(table->dev, hw_fn_id); + table->sfs[id].allocated = false; +} + +int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_hw_table *table; + struct mlx5_sf_hw *sfs; + int max_functions; + + if (!mlx5_sf_supported(dev)) + return 0; + + max_functions = mlx5_sf_max_functions(dev); + table = kzalloc(sizeof(*table), GFP_KERNEL); + if (!table) + return -ENOMEM; + + sfs = kcalloc(max_functions, sizeof(*sfs), GFP_KERNEL); + if (!sfs) + goto table_err; + + table->dev = dev; + table->sfs = sfs; + table->max_local_functions = max_functions; + table->ecpu = mlx5_read_embedded_cpu(dev); + dev->priv.sf_hw_table = table; + mlx5_core_dbg(dev, "SF HW table: max sfs = %d\n", max_functions); + return 0; + +table_err: + kfree(table); + return -ENOMEM; +} + +void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + + if (!table) + return; + + kfree(table->sfs); + kfree(table); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h new file mode 100644 index 0000000000000..7f3622375a9c1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd */ + +#ifndef __MLX5_SF_PRIV_H__ +#define __MLX5_SF_PRIV_H__ + +#include + +int mlx5_cmd_alloc_sf(struct mlx5_core_dev *dev, u16 function_id); +int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id); + +u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id); + +int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum); +void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h index 623191679b49c..31278dc42e729 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -42,4 +42,41 @@ static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev) #endif +#ifdef CONFIG_MLX5_SF_MANAGER + +int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev); +void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev); + +int mlx5_sf_table_init(struct mlx5_core_dev *dev); +void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev); + +int mlx5_devlink_sf_port_new(struct devlink *devlink, + const struct devlink_port_new_attrs *add_attr, + struct netlink_ext_ack *extack, + unsigned int *new_port_index); +int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, + struct netlink_ext_ack *extack); + +#else + +static inline int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev) +{ + return 0; +} + +static inline void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev) +{ +} + +static inline int mlx5_sf_table_init(struct mlx5_core_dev *dev) +{ + return 0; +} + +static inline void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) +{ +} + +#endif + #endif diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 48e3638b1185b..7e357c7f0d5e9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -510,6 +510,8 @@ struct mlx5_eq_table; struct mlx5_irq_table; struct mlx5_vhca_state_notifier; struct mlx5_sf_dev_table; +struct mlx5_sf_hw_table; +struct mlx5_sf_table; struct mlx5_rate_limit { u32 rate; @@ -611,6 +613,10 @@ struct mlx5_priv { struct mlx5_sf_dev_table *sf_dev_table; struct mlx5_core_dev *parent_mdev; #endif +#ifdef CONFIG_MLX5_SF_MANAGER + struct mlx5_sf_hw_table *sf_hw_table; + struct mlx5_sf_table *sf_table; +#endif }; enum mlx5_device_state { -- GitLab From 6a3273217469790e6d0abc73893d0ebe6b69180d Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:22 -0800 Subject: [PATCH 1270/2012] net/mlx5: SF, Port function state change support Support changing the state of the SF port's function through devlink. When activating the SF port's function, enable the hca in the device followed by adding its auxiliary device. When deactivating the SF port's function, delete its auxiliary device followed by disabling the vHCA. Port function attributes get/set callbacks are invoked with devlink instance lock held. Such callbacks need to synchronize with sf port table getting disabled either via sriov sysfs callback. Such callbacks synchronize with table disable context holding table refcount. $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev $ devlink port show pci/0000:06:00.0/65535: type eth netdev ens2f0np0 flavour physical port 0 splittable false $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88 pci/0000:06:00.0/32768: type eth netdev eth6 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:00:00 state inactive opstate detached $ devlink port show ens2f0npf0sf88 pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false function: hw_addr 00:00:00:00:88:88 state inactive opstate detached $ devlink port function set pci/0000:06:00.0/32768 hw_addr 00:00:00:00:88:88 state active $ devlink port show ens2f0npf0sf88 -jp { "port": { "pci/0000:06:00.0/32768": { "type": "eth", "netdev": "ens2f0npf0sf88", "flavour": "pcisf", "controller": 0, "pfnum": 0, "sfnum": 88, "external": false, "splittable": false, "function": { "hw_addr": "00:00:00:00:88:88", "state": "active", "opstate": "attached" } } } } On port function activation, an auxiliary device is created in below example. $ devlink dev show devlink dev show auxiliary/mlx5_core.sf.4 $ devlink port show auxiliary/mlx5_core.sf.4/1 auxiliary/mlx5_core.sf.4/1: type eth netdev p0sf88 flavour virtual port 0 splittable false Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 2 + .../net/ethernet/mellanox/mlx5/core/main.c | 10 + .../net/ethernet/mellanox/mlx5/core/sf/cmd.c | 22 ++ .../ethernet/mellanox/mlx5/core/sf/devlink.c | 284 ++++++++++++++++-- .../ethernet/mellanox/mlx5/core/sf/hw_table.c | 116 ++++++- .../net/ethernet/mellanox/mlx5/core/sf/priv.h | 4 + .../net/ethernet/mellanox/mlx5/core/sf/sf.h | 20 +- 7 files changed, 431 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index d4c0cdf5edd9d..7712311e86843 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -195,6 +195,8 @@ static const struct devlink_ops mlx5_devlink_ops = { #ifdef CONFIG_MLX5_SF_MANAGER .port_new = mlx5_devlink_sf_port_new, .port_del = mlx5_devlink_sf_port_del, + .port_fn_state_get = mlx5_devlink_sf_port_fn_state_get, + .port_fn_state_set = mlx5_devlink_sf_port_fn_state_set, #endif .flash_update = mlx5_devlink_flash_update, .info_get = mlx5_devlink_info_get, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d6bd09dd7490f..604ac8bdebe05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -75,6 +75,7 @@ #include "diag/rsc_dump.h" #include "sf/vhca_event.h" #include "sf/dev/dev.h" +#include "sf/sf.h" MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@ -1161,6 +1162,12 @@ static int mlx5_load(struct mlx5_core_dev *dev) mlx5_vhca_event_start(dev); + err = mlx5_sf_hw_table_create(dev); + if (err) { + mlx5_core_err(dev, "sf table create failed %d\n", err); + goto err_vhca; + } + err = mlx5_ec_init(dev); if (err) { mlx5_core_err(dev, "Failed to init embedded CPU\n"); @@ -1180,6 +1187,8 @@ static int mlx5_load(struct mlx5_core_dev *dev) err_sriov: mlx5_ec_cleanup(dev); err_ec: + mlx5_sf_hw_table_destroy(dev); +err_vhca: mlx5_vhca_event_stop(dev); mlx5_cleanup_fs(dev); err_fs: @@ -1209,6 +1218,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_sf_dev_table_destroy(dev); mlx5_sriov_detach(dev); mlx5_ec_cleanup(dev); + mlx5_sf_hw_table_destroy(dev); mlx5_vhca_event_stop(dev); mlx5_cleanup_fs(dev); mlx5_accel_ipsec_cleanup(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c index 0bc3075f34faa..a8d75c2f02754 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/cmd.c @@ -25,3 +25,25 @@ int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id) return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } + +int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id) +{ + u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; + + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); +} + +int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id) +{ + u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {}; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; + + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, func_id); + MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 7ad0d210ec300..c2ba41bb7a701 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -4,11 +4,17 @@ #include #include "eswitch.h" #include "priv.h" +#include "sf/dev/dev.h" +#include "mlx5_ifc_vhca_event.h" +#include "vhca_event.h" +#include "ecpf.h" struct mlx5_sf { struct devlink_port dl_port; unsigned int port_index; u16 id; + u16 hw_fn_id; + u16 hw_state; }; struct mlx5_sf_table { @@ -16,7 +22,10 @@ struct mlx5_sf_table { struct xarray port_indices; /* port index based lookup. */ refcount_t refcount; struct completion disable_complete; + struct mutex sf_state_lock; /* Serializes sf state among user cmds & vhca event handler. */ struct notifier_block esw_nb; + struct notifier_block vhca_nb; + u8 ecpu: 1; }; static struct mlx5_sf * @@ -25,6 +34,19 @@ mlx5_sf_lookup_by_index(struct mlx5_sf_table *table, unsigned int port_index) return xa_load(&table->port_indices, port_index); } +static struct mlx5_sf * +mlx5_sf_lookup_by_function_id(struct mlx5_sf_table *table, unsigned int fn_id) +{ + unsigned long index; + struct mlx5_sf *sf; + + xa_for_each(&table->port_indices, index, sf) { + if (sf->hw_fn_id == fn_id) + return sf; + } + return NULL; +} + static int mlx5_sf_id_insert(struct mlx5_sf_table *table, struct mlx5_sf *sf) { return xa_insert(&table->port_indices, sf->port_index, sf, GFP_KERNEL); @@ -59,6 +81,8 @@ mlx5_sf_alloc(struct mlx5_sf_table *table, u32 sfnum, struct netlink_ext_ack *ex hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, sf->id); dl_port_index = mlx5_esw_vport_to_devlink_port_index(table->dev, hw_fn_id); sf->port_index = dl_port_index; + sf->hw_fn_id = hw_fn_id; + sf->hw_state = MLX5_VHCA_STATE_ALLOCATED; err = mlx5_sf_id_insert(table, sf); if (err) @@ -99,6 +123,146 @@ static void mlx5_sf_table_put(struct mlx5_sf_table *table) complete(&table->disable_complete); } +static enum devlink_port_fn_state mlx5_sf_to_devlink_state(u8 hw_state) +{ + switch (hw_state) { + case MLX5_VHCA_STATE_ACTIVE: + case MLX5_VHCA_STATE_IN_USE: + case MLX5_VHCA_STATE_TEARDOWN_REQUEST: + return DEVLINK_PORT_FN_STATE_ACTIVE; + case MLX5_VHCA_STATE_INVALID: + case MLX5_VHCA_STATE_ALLOCATED: + default: + return DEVLINK_PORT_FN_STATE_INACTIVE; + } +} + +static enum devlink_port_fn_opstate mlx5_sf_to_devlink_opstate(u8 hw_state) +{ + switch (hw_state) { + case MLX5_VHCA_STATE_IN_USE: + case MLX5_VHCA_STATE_TEARDOWN_REQUEST: + return DEVLINK_PORT_FN_OPSTATE_ATTACHED; + case MLX5_VHCA_STATE_INVALID: + case MLX5_VHCA_STATE_ALLOCATED: + case MLX5_VHCA_STATE_ACTIVE: + default: + return DEVLINK_PORT_FN_OPSTATE_DETACHED; + } +} + +static bool mlx5_sf_is_active(const struct mlx5_sf *sf) +{ + return sf->hw_state == MLX5_VHCA_STATE_ACTIVE || sf->hw_state == MLX5_VHCA_STATE_IN_USE; +} + +int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port, + enum devlink_port_fn_state *state, + enum devlink_port_fn_opstate *opstate, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_sf_table *table; + struct mlx5_sf *sf; + int err = 0; + + table = mlx5_sf_table_try_get(dev); + if (!table) + return -EOPNOTSUPP; + + sf = mlx5_sf_lookup_by_index(table, dl_port->index); + if (!sf) { + err = -EOPNOTSUPP; + goto sf_err; + } + mutex_lock(&table->sf_state_lock); + *state = mlx5_sf_to_devlink_state(sf->hw_state); + *opstate = mlx5_sf_to_devlink_opstate(sf->hw_state); + mutex_unlock(&table->sf_state_lock); +sf_err: + mlx5_sf_table_put(table); + return err; +} + +static int mlx5_sf_activate(struct mlx5_core_dev *dev, struct mlx5_sf *sf) +{ + int err; + + if (mlx5_sf_is_active(sf)) + return 0; + if (sf->hw_state != MLX5_VHCA_STATE_ALLOCATED) + return -EINVAL; + + err = mlx5_cmd_sf_enable_hca(dev, sf->hw_fn_id); + if (err) + return err; + + sf->hw_state = MLX5_VHCA_STATE_ACTIVE; + return 0; +} + +static int mlx5_sf_deactivate(struct mlx5_core_dev *dev, struct mlx5_sf *sf) +{ + int err; + + if (!mlx5_sf_is_active(sf)) + return 0; + + err = mlx5_cmd_sf_disable_hca(dev, sf->hw_fn_id); + if (err) + return err; + + sf->hw_state = MLX5_VHCA_STATE_TEARDOWN_REQUEST; + return 0; +} + +static int mlx5_sf_state_set(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, + struct mlx5_sf *sf, + enum devlink_port_fn_state state) +{ + int err = 0; + + mutex_lock(&table->sf_state_lock); + if (state == mlx5_sf_to_devlink_state(sf->hw_state)) + goto out; + if (state == DEVLINK_PORT_FN_STATE_ACTIVE) + err = mlx5_sf_activate(dev, sf); + else if (state == DEVLINK_PORT_FN_STATE_INACTIVE) + err = mlx5_sf_deactivate(dev, sf); + else + err = -EINVAL; +out: + mutex_unlock(&table->sf_state_lock); + return err; +} + +int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port, + enum devlink_port_fn_state state, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_sf_table *table; + struct mlx5_sf *sf; + int err; + + table = mlx5_sf_table_try_get(dev); + if (!table) { + NL_SET_ERR_MSG_MOD(extack, + "Port state set is only supported in eswitch switchdev mode or SF ports are disabled."); + return -EOPNOTSUPP; + } + sf = mlx5_sf_lookup_by_index(table, dl_port->index); + if (!sf) { + err = -ENODEV; + goto out; + } + + err = mlx5_sf_state_set(dev, table, sf, state); +out: + mlx5_sf_table_put(table); + return err; +} + static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, const struct devlink_port_new_attrs *new_attr, struct netlink_ext_ack *extack, @@ -125,16 +289,6 @@ esw_err: return err; } -static void mlx5_sf_del(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, struct mlx5_sf *sf) -{ - struct mlx5_eswitch *esw = dev->priv.eswitch; - u16 hw_fn_id; - - hw_fn_id = mlx5_sf_sw_to_hw_id(dev, sf->id); - mlx5_esw_offloads_sf_vport_disable(esw, hw_fn_id); - mlx5_sf_free(table, sf); -} - static int mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_attrs *new_attr, struct netlink_ext_ack *extack) @@ -188,10 +342,30 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink, return err; } +static void mlx5_sf_dealloc(struct mlx5_sf_table *table, struct mlx5_sf *sf) +{ + if (sf->hw_state == MLX5_VHCA_STATE_ALLOCATED) { + mlx5_sf_free(table, sf); + } else if (mlx5_sf_is_active(sf)) { + /* Even if its active, it is treated as in_use because by the time, + * it is disabled here, it may getting used. So it is safe to + * always look for the event to ensure that it is recycled only after + * firmware gives confirmation that it is detached by the driver. + */ + mlx5_cmd_sf_disable_hca(table->dev, sf->hw_fn_id); + mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->id); + kfree(sf); + } else { + mlx5_sf_hw_table_sf_deferred_free(table->dev, sf->id); + kfree(sf); + } +} + int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5_sf_table *table; struct mlx5_sf *sf; int err = 0; @@ -208,20 +382,58 @@ int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, goto sf_err; } - mlx5_sf_del(dev, table, sf); + mlx5_esw_offloads_sf_vport_disable(esw, sf->hw_fn_id); + mlx5_sf_id_erase(table, sf); + + mutex_lock(&table->sf_state_lock); + mlx5_sf_dealloc(table, sf); + mutex_unlock(&table->sf_state_lock); sf_err: mlx5_sf_table_put(table); return err; } -static void mlx5_sf_destroy_all(struct mlx5_sf_table *table) +static bool mlx5_sf_state_update_check(const struct mlx5_sf *sf, u8 new_state) { - struct mlx5_core_dev *dev = table->dev; - unsigned long index; + if (sf->hw_state == MLX5_VHCA_STATE_ACTIVE && new_state == MLX5_VHCA_STATE_IN_USE) + return true; + + if (sf->hw_state == MLX5_VHCA_STATE_IN_USE && new_state == MLX5_VHCA_STATE_ACTIVE) + return true; + + if (sf->hw_state == MLX5_VHCA_STATE_TEARDOWN_REQUEST && + new_state == MLX5_VHCA_STATE_ALLOCATED) + return true; + + return false; +} + +static int mlx5_sf_vhca_event(struct notifier_block *nb, unsigned long opcode, void *data) +{ + struct mlx5_sf_table *table = container_of(nb, struct mlx5_sf_table, vhca_nb); + const struct mlx5_vhca_state_event *event = data; + bool update = false; struct mlx5_sf *sf; - xa_for_each(&table->port_indices, index, sf) - mlx5_sf_del(dev, table, sf); + table = mlx5_sf_table_try_get(table->dev); + if (!table) + return 0; + + mutex_lock(&table->sf_state_lock); + sf = mlx5_sf_lookup_by_function_id(table, event->function_id); + if (!sf) + goto sf_err; + + /* When driver is attached or detached to a function, an event + * notifies such state change. + */ + update = mlx5_sf_state_update_check(sf, event->new_vhca_state); + if (update) + sf->hw_state = event->new_vhca_state; +sf_err: + mutex_unlock(&table->sf_state_lock); + mlx5_sf_table_put(table); + return 0; } static void mlx5_sf_table_enable(struct mlx5_sf_table *table) @@ -233,6 +445,22 @@ static void mlx5_sf_table_enable(struct mlx5_sf_table *table) refcount_set(&table->refcount, 1); } +static void mlx5_sf_deactivate_all(struct mlx5_sf_table *table) +{ + struct mlx5_eswitch *esw = table->dev->priv.eswitch; + unsigned long index; + struct mlx5_sf *sf; + + /* At this point, no new user commands can start and no vhca event can + * arrive. It is safe to destroy all user created SFs. + */ + xa_for_each(&table->port_indices, index, sf) { + mlx5_esw_offloads_sf_vport_disable(esw, sf->hw_fn_id); + mlx5_sf_id_erase(table, sf); + mlx5_sf_dealloc(table, sf); + } +} + static void mlx5_sf_table_disable(struct mlx5_sf_table *table) { if (!mlx5_sf_max_functions(table->dev)) @@ -241,14 +469,13 @@ static void mlx5_sf_table_disable(struct mlx5_sf_table *table) if (!refcount_read(&table->refcount)) return; - /* Balances with refcount_set; drop the reference so that new user cmd cannot start. */ + /* Balances with refcount_set; drop the reference so that new user cmd cannot start + * and new vhca event handler cannnot run. + */ mlx5_sf_table_put(table); wait_for_completion(&table->disable_complete); - /* At this point, no new user commands can start. - * It is safe to destroy all user created SFs. - */ - mlx5_sf_destroy_all(table); + mlx5_sf_deactivate_all(table); } static int mlx5_sf_esw_event(struct notifier_block *nb, unsigned long event, void *data) @@ -280,23 +507,34 @@ int mlx5_sf_table_init(struct mlx5_core_dev *dev) struct mlx5_sf_table *table; int err; - if (!mlx5_sf_table_supported(dev)) + if (!mlx5_sf_table_supported(dev) || !mlx5_vhca_event_supported(dev)) return 0; table = kzalloc(sizeof(*table), GFP_KERNEL); if (!table) return -ENOMEM; + mutex_init(&table->sf_state_lock); table->dev = dev; xa_init(&table->port_indices); dev->priv.sf_table = table; + refcount_set(&table->refcount, 0); table->esw_nb.notifier_call = mlx5_sf_esw_event; err = mlx5_esw_event_notifier_register(dev->priv.eswitch, &table->esw_nb); if (err) goto reg_err; + + table->vhca_nb.notifier_call = mlx5_sf_vhca_event; + err = mlx5_vhca_event_notifier_register(table->dev, &table->vhca_nb); + if (err) + goto vhca_err; + return 0; +vhca_err: + mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb); reg_err: + mutex_destroy(&table->sf_state_lock); kfree(table); dev->priv.sf_table = NULL; return err; @@ -309,8 +547,10 @@ void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) if (!table) return; + mlx5_vhca_event_notifier_unregister(table->dev, &table->vhca_nb); mlx5_esw_event_notifier_unregister(dev->priv.eswitch, &table->esw_nb); WARN_ON(refcount_read(&table->refcount)); + mutex_destroy(&table->sf_state_lock); WARN_ON(!xa_empty(&table->port_indices)); kfree(table); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c index c7757f399e8a7..58b6be0b03d7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c @@ -4,11 +4,14 @@ #include "vhca_event.h" #include "priv.h" #include "sf.h" +#include "mlx5_ifc_vhca_event.h" +#include "vhca_event.h" #include "ecpf.h" struct mlx5_sf_hw { u32 usr_sfnum; u8 allocated: 1; + u8 pending_delete: 1; }; struct mlx5_sf_hw_table { @@ -16,6 +19,8 @@ struct mlx5_sf_hw_table { struct mlx5_sf_hw *sfs; int max_local_functions; u8 ecpu: 1; + struct mutex table_lock; /* Serializes sf deletion and vhca state change handler. */ + struct notifier_block vhca_nb; }; u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id) @@ -23,6 +28,11 @@ u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id) return sw_id + mlx5_sf_start_function_id(dev); } +static u16 mlx5_sf_hw_to_sw_id(const struct mlx5_core_dev *dev, u16 hw_id) +{ + return hw_id - mlx5_sf_start_function_id(dev); +} + int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum) { struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; @@ -34,10 +44,13 @@ int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum) if (!table->max_local_functions) return -EOPNOTSUPP; + mutex_lock(&table->table_lock); /* Check if sf with same sfnum already exists or not. */ for (i = 0; i < table->max_local_functions; i++) { - if (table->sfs[i].allocated && table->sfs[i].usr_sfnum == usr_sfnum) - return -EEXIST; + if (table->sfs[i].allocated && table->sfs[i].usr_sfnum == usr_sfnum) { + err = -EEXIST; + goto exist_err; + } } /* Find the free entry and allocate the entry from the array */ @@ -63,16 +76,19 @@ int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum) if (err) goto vhca_err; + mutex_unlock(&table->table_lock); return sw_id; vhca_err: mlx5_cmd_dealloc_sf(table->dev, hw_fn_id); err: table->sfs[i].allocated = false; +exist_err: + mutex_unlock(&table->table_lock); return err; } -void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id) +static void _mlx5_sf_hw_id_free(struct mlx5_core_dev *dev, u16 id) { struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; u16 hw_fn_id; @@ -80,6 +96,50 @@ void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id) hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, id); mlx5_cmd_dealloc_sf(table->dev, hw_fn_id); table->sfs[id].allocated = false; + table->sfs[id].pending_delete = false; +} + +void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + + mutex_lock(&table->table_lock); + _mlx5_sf_hw_id_free(dev, id); + mutex_unlock(&table->table_lock); +} + +void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; + u16 hw_fn_id; + u8 state; + int err; + + hw_fn_id = mlx5_sf_sw_to_hw_id(dev, id); + mutex_lock(&table->table_lock); + err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, table->ecpu, out, sizeof(out)); + if (err) + goto err; + state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state); + if (state == MLX5_VHCA_STATE_ALLOCATED) { + mlx5_cmd_dealloc_sf(table->dev, hw_fn_id); + table->sfs[id].allocated = false; + } else { + table->sfs[id].pending_delete = true; + } +err: + mutex_unlock(&table->table_lock); +} + +static void mlx5_sf_hw_dealloc_all(struct mlx5_sf_hw_table *table) +{ + int i; + + for (i = 0; i < table->max_local_functions; i++) { + if (table->sfs[i].allocated) + _mlx5_sf_hw_id_free(table->dev, i); + } } int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev) @@ -88,7 +148,7 @@ int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev) struct mlx5_sf_hw *sfs; int max_functions; - if (!mlx5_sf_supported(dev)) + if (!mlx5_sf_supported(dev) || !mlx5_vhca_event_supported(dev)) return 0; max_functions = mlx5_sf_max_functions(dev); @@ -100,6 +160,7 @@ int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev) if (!sfs) goto table_err; + mutex_init(&table->table_lock); table->dev = dev; table->sfs = sfs; table->max_local_functions = max_functions; @@ -120,6 +181,53 @@ void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev) if (!table) return; + mutex_destroy(&table->table_lock); kfree(table->sfs); kfree(table); } + +static int mlx5_sf_hw_vhca_event(struct notifier_block *nb, unsigned long opcode, void *data) +{ + struct mlx5_sf_hw_table *table = container_of(nb, struct mlx5_sf_hw_table, vhca_nb); + const struct mlx5_vhca_state_event *event = data; + struct mlx5_sf_hw *sf_hw; + u16 sw_id; + + if (event->new_vhca_state != MLX5_VHCA_STATE_ALLOCATED) + return 0; + + sw_id = mlx5_sf_hw_to_sw_id(table->dev, event->function_id); + sf_hw = &table->sfs[sw_id]; + + mutex_lock(&table->table_lock); + /* SF driver notified through firmware that SF is finally detached. + * Hence recycle the sf hardware id for reuse. + */ + if (sf_hw->allocated && sf_hw->pending_delete) + _mlx5_sf_hw_id_free(table->dev, sw_id); + mutex_unlock(&table->table_lock); + return 0; +} + +int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + + if (!table) + return 0; + + table->vhca_nb.notifier_call = mlx5_sf_hw_vhca_event; + return mlx5_vhca_event_notifier_register(table->dev, &table->vhca_nb); +} + +void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_sf_hw_table *table = dev->priv.sf_hw_table; + + if (!table) + return; + + mlx5_vhca_event_notifier_unregister(table->dev, &table->vhca_nb); + /* Dealloc SFs whose firmware event has been missed. */ + mlx5_sf_hw_dealloc_all(table); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h index 7f3622375a9c1..cb02a51d09861 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/priv.h @@ -9,9 +9,13 @@ int mlx5_cmd_alloc_sf(struct mlx5_core_dev *dev, u16 function_id); int mlx5_cmd_dealloc_sf(struct mlx5_core_dev *dev, u16 function_id); +int mlx5_cmd_sf_enable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_cmd_sf_disable_hca(struct mlx5_core_dev *dev, u16 func_id); + u16 mlx5_sf_sw_to_hw_id(const struct mlx5_core_dev *dev, u16 sw_id); int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum); void mlx5_sf_hw_table_sf_free(struct mlx5_core_dev *dev, u16 id); +void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h index 31278dc42e729..0b6aea1e6a947 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -47,6 +47,9 @@ static inline u16 mlx5_sf_max_functions(const struct mlx5_core_dev *dev) int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev); void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev); +void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev); + int mlx5_sf_table_init(struct mlx5_core_dev *dev); void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev); @@ -56,7 +59,13 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink, unsigned int *new_port_index); int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, struct netlink_ext_ack *extack); - +int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port, + enum devlink_port_fn_state *state, + enum devlink_port_fn_opstate *opstate, + struct netlink_ext_ack *extack); +int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port, + enum devlink_port_fn_state state, + struct netlink_ext_ack *extack); #else static inline int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev) @@ -68,6 +77,15 @@ static inline void mlx5_sf_hw_table_cleanup(struct mlx5_core_dev *dev) { } +static inline int mlx5_sf_hw_table_create(struct mlx5_core_dev *dev) +{ + return 0; +} + +static inline void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev) +{ +} + static inline int mlx5_sf_table_init(struct mlx5_core_dev *dev) { return 0; -- GitLab From c736111cf8d519d46ac62af36378aed472faaa12 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:23 -0800 Subject: [PATCH 1271/2012] devlink: Add devlink port documentation Added documentation for devlink port and port function related commands. Signed-off-by: Parav Pandit Reviewed-by: Jiri Pirko Reviewed-by: Jacob Keller Signed-off-by: Saeed Mahameed --- .../networking/devlink/devlink-port.rst | 118 ++++++++++++++++++ Documentation/networking/devlink/index.rst | 1 + 2 files changed, 119 insertions(+) create mode 100644 Documentation/networking/devlink/devlink-port.rst diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst new file mode 100644 index 0000000000000..c564b557e757b --- /dev/null +++ b/Documentation/networking/devlink/devlink-port.rst @@ -0,0 +1,118 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. _devlink_port: + +============ +Devlink Port +============ + +``devlink-port`` is a port that exists on the device. It has a logically +separate ingress/egress point of the device. A devlink port can be any one +of many flavours. A devlink port flavour along with port attributes +describe what a port represents. + +A device driver that intends to publish a devlink port sets the +devlink port attributes and registers the devlink port. + +Devlink port flavours are described below. + +.. list-table:: List of devlink port flavours + :widths: 33 90 + + * - Flavour + - Description + * - ``DEVLINK_PORT_FLAVOUR_PHYSICAL`` + - Any kind of physical port. This can be an eswitch physical port or any + other physical port on the device. + * - ``DEVLINK_PORT_FLAVOUR_DSA`` + - This indicates a DSA interconnect port. + * - ``DEVLINK_PORT_FLAVOUR_CPU`` + - This indicates a CPU port applicable only to DSA. + * - ``DEVLINK_PORT_FLAVOUR_PCI_PF`` + - This indicates an eswitch port representing a port of PCI + physical function (PF). + * - ``DEVLINK_PORT_FLAVOUR_PCI_VF`` + - This indicates an eswitch port representing a port of PCI + virtual function (VF). + * - ``DEVLINK_PORT_FLAVOUR_VIRTUAL`` + - This indicates a virtual port for the PCI virtual function. + +Devlink port can have a different type based on the link layer described below. + +.. list-table:: List of devlink port types + :widths: 23 90 + + * - Type + - Description + * - ``DEVLINK_PORT_TYPE_ETH`` + - Driver should set this port type when a link layer of the port is + Ethernet. + * - ``DEVLINK_PORT_TYPE_IB`` + - Driver should set this port type when a link layer of the port is + InfiniBand. + * - ``DEVLINK_PORT_TYPE_AUTO`` + - This type is indicated by the user when driver should detect the port + type automatically. + +PCI controllers +--------------- +In most cases a PCI device has only one controller. A controller consists of +potentially multiple physical and virtual functions. A function consists +of one or more ports. This port is represented by the devlink eswitch port. + +A PCI device connected to multiple CPUs or multiple PCI root complexes or a +SmartNIC, however, may have multiple controllers. For a device with multiple +controllers, each controller is distinguished by a unique controller number. +An eswitch is on the PCI device which supports ports of multiple controllers. + +An example view of a system with two controllers:: + + --------------------------------------------------------- + | | + | --------- --------- ------- ------- | + ----------- | | vf(s) | | sf(s) | |vf(s)| |sf(s)| | + | server | | ------- ----/---- ---/----- ------- ---/--- ---/--- | + | pci rc |=== | pf0 |______/________/ | pf1 |___/_______/ | + | connect | | ------- ------- | + ----------- | | controller_num=1 (no eswitch) | + ------|-------------------------------------------------- + (internal wire) + | + --------------------------------------------------------- + | devlink eswitch ports and reps | + | ----------------------------------------------------- | + | |ctrl-0 | ctrl-0 | ctrl-0 | ctrl-0 | ctrl-0 |ctrl-0 | | + | |pf0 | pf0vfN | pf0sfN | pf1 | pf1vfN |pf1sfN | | + | ----------------------------------------------------- | + | |ctrl-1 | ctrl-1 | ctrl-1 | ctrl-1 | ctrl-1 |ctrl-1 | | + | |pf0 | pf0vfN | pf0sfN | pf1 | pf1vfN |pf1sfN | | + | ----------------------------------------------------- | + | | + | | + ----------- | --------- --------- ------- ------- | + | smartNIC| | | vf(s) | | sf(s) | |vf(s)| |sf(s)| | + | pci rc |==| ------- ----/---- ---/----- ------- ---/--- ---/--- | + | connect | | | pf0 |______/________/ | pf1 |___/_______/ | + ----------- | ------- ------- | + | | + | local controller_num=0 (eswitch) | + --------------------------------------------------------- + +In the above example, the external controller (identified by controller number = 1) +doesn't have the eswitch. Local controller (identified by controller number = 0) +has the eswitch. The Devlink instance on the local controller has eswitch +devlink ports for both the controllers. + +Function configuration +====================== + +A user can configure the function attribute before enumerating the PCI +function. Usually it means, user should configure function attribute +before a bus specific device for the function is created. However, when +SRIOV is enabled, virtual function devices are created on the PCI bus. +Hence, function attribute should be configured before binding virtual +function device to the driver. + +A user may set the hardware address of the function using +'devlink port function set hw_addr' command. For Ethernet port function +this means a MAC address. diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index d82874760ae26..aab79667f97b5 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -18,6 +18,7 @@ general. devlink-info devlink-flash devlink-params + devlink-port devlink-region devlink-resource devlink-reload -- GitLab From 6474ce7ecd80c5071861ba96c864f03d84319e73 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:24 -0800 Subject: [PATCH 1272/2012] devlink: Extend devlink port documentation for subfunctions Add devlink port documentation for subfunction management. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- Documentation/driver-api/auxiliary_bus.rst | 2 + .../networking/devlink/devlink-port.rst | 87 ++++++++++++++++++- 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/Documentation/driver-api/auxiliary_bus.rst b/Documentation/driver-api/auxiliary_bus.rst index 2312506b06740..fff96c7ba7a85 100644 --- a/Documentation/driver-api/auxiliary_bus.rst +++ b/Documentation/driver-api/auxiliary_bus.rst @@ -1,5 +1,7 @@ .. SPDX-License-Identifier: GPL-2.0-only +.. _auxiliary_bus: + ============= Auxiliary Bus ============= diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst index c564b557e757b..e99b415994651 100644 --- a/Documentation/networking/devlink/devlink-port.rst +++ b/Documentation/networking/devlink/devlink-port.rst @@ -34,6 +34,9 @@ Devlink port flavours are described below. * - ``DEVLINK_PORT_FLAVOUR_PCI_VF`` - This indicates an eswitch port representing a port of PCI virtual function (VF). + * - ``DEVLINK_PORT_FLAVOUR_PCI_SF`` + - This indicates an eswitch port representing a port of PCI + subfunction (SF). * - ``DEVLINK_PORT_FLAVOUR_VIRTUAL`` - This indicates a virtual port for the PCI virtual function. @@ -57,8 +60,9 @@ Devlink port can have a different type based on the link layer described below. PCI controllers --------------- In most cases a PCI device has only one controller. A controller consists of -potentially multiple physical and virtual functions. A function consists -of one or more ports. This port is represented by the devlink eswitch port. +potentially multiple physical, virtual functions and subfunctions. A function +consists of one or more ports. This port is represented by the devlink eswitch +port. A PCI device connected to multiple CPUs or multiple PCI root complexes or a SmartNIC, however, may have multiple controllers. For a device with multiple @@ -111,8 +115,85 @@ function. Usually it means, user should configure function attribute before a bus specific device for the function is created. However, when SRIOV is enabled, virtual function devices are created on the PCI bus. Hence, function attribute should be configured before binding virtual -function device to the driver. +function device to the driver. For subfunctions, this means user should +configure port function attribute before activating the port function. A user may set the hardware address of the function using 'devlink port function set hw_addr' command. For Ethernet port function this means a MAC address. + +Subfunction +============ + +Subfunction is a lightweight function that has a parent PCI function on which +it is deployed. Subfunction is created and deployed in unit of 1. Unlike +SRIOV VFs, a subfunction doesn't require its own PCI virtual function. +A subfunction communicates with the hardware through the parent PCI function. + +To use a subfunction, 3 steps setup sequence is followed. +(1) create - create a subfunction; +(2) configure - configure subfunction attributes; +(3) deploy - deploy the subfunction; + +Subfunction management is done using devlink port user interface. +User performs setup on the subfunction management device. + +(1) Create +---------- +A subfunction is created using a devlink port interface. A user adds the +subfunction by adding a devlink port of subfunction flavour. The devlink +kernel code calls down to subfunction management driver (devlink ops) and asks +it to create a subfunction devlink port. Driver then instantiates the +subfunction port and any associated objects such as health reporters and +representor netdevice. + +(2) Configure +------------- +A subfunction devlink port is created but it is not active yet. That means the +entities are created on devlink side, the e-switch port representor is created, +but the subfunction device itself it not created. A user might use e-switch port +representor to do settings, putting it into bridge, adding TC rules, etc. A user +might as well configure the hardware address (such as MAC address) of the +subfunction while subfunction is inactive. + +(3) Deploy +---------- +Once a subfunction is configured, user must activate it to use it. Upon +activation, subfunction management driver asks the subfunction management +device to instantiate the subfunction device on particular PCI function. +A subfunction device is created on the :ref:`Documentation/driver-api/auxiliary_bus.rst `. +At this point a matching subfunction driver binds to the subfunction's auxiliary device. + +Terms and Definitions +===================== + +.. list-table:: Terms and Definitions + :widths: 22 90 + + * - Term + - Definitions + * - ``PCI device`` + - A physical PCI device having one or more PCI bus consists of one or + more PCI controllers. + * - ``PCI controller`` + - A controller consists of potentially multiple physical functions, + virtual functions and subfunctions. + * - ``Port function`` + - An object to manage the function of a port. + * - ``Subfunction`` + - A lightweight function that has parent PCI function on which it is + deployed. + * - ``Subfunction device`` + - A bus device of the subfunction, usually on a auxiliary bus. + * - ``Subfunction driver`` + - A device driver for the subfunction auxiliary device. + * - ``Subfunction management device`` + - A PCI physical function that supports subfunction management. + * - ``Subfunction management driver`` + - A device driver for PCI physical function that supports + subfunction management using devlink port interface. + * - ``Subfunction host driver`` + - A device driver for PCI physical function that hosts subfunction + devices. In most cases it is same as subfunction management driver. When + subfunction is used on external controller, subfunction management and + host drivers are different. -- GitLab From 142d93d12dc187f6a32aae2048da0c8230636b86 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Fri, 11 Dec 2020 22:12:25 -0800 Subject: [PATCH 1273/2012] net/mlx5: Add devlink subfunction port documentation Add documentation for subfunction management using devlink port. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../device_drivers/ethernet/mellanox/mlx5.rst | 210 ++++++++++++++++++ 1 file changed, 210 insertions(+) diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst index a5eb22793bb95..a1b32fcd0d76f 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst @@ -12,6 +12,8 @@ Contents - `Enabling the driver and kconfig options`_ - `Devlink info`_ - `Devlink parameters`_ +- `mlx5 subfunction`_ +- `mlx5 port function`_ - `Devlink health reporters`_ - `mlx5 tracepoints`_ @@ -181,6 +183,214 @@ User command examples: values: cmode driverinit value true +mlx5 subfunction +================ +mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst `) interface. + +A Subfunction has its own function capabilities and its own resources. This +means a subfunction has its own dedicated queues (txq, rxq, cq, eq). These +queues are neither shared nor stolen from the parent PCI function. + +When a subfunction is RDMA capable, it has its own QP1, GID table and rdma +resources neither shared nor stolen from the parent PCI function. + +A subfunction has a dedicated window in PCI BAR space that is not shared +with ther other subfunctions or the parent PCI function. This ensures that all +devices (netdev, rdma, vdpa etc.) of the subfunction accesses only assigned +PCI BAR space. + +A Subfunction supports eswitch representation through which it supports tc +offloads. The user configures eswitch to send/receive packets from/to +the subfunction port. + +Subfunctions share PCI level resources such as PCI MSI-X IRQs with +other subfunctions and/or with its parent PCI function. + +Example mlx5 software, system and device view:: + + _______ + | admin | + | user |---------- + |_______| | + | | + ____|____ __|______ _________________ + | | | | | | + | devlink | | tc tool | | user | + | tool | |_________| | applications | + |_________| | |_________________| + | | | | + | | | | Userspace + +---------|-------------|-------------------|----------|--------------------+ + | | +----------+ +----------+ Kernel + | | | netdev | | rdma dev | + | | +----------+ +----------+ + (devlink port add/del | ^ ^ + port function set) | | | + | | +---------------| + _____|___ | | _______|_______ + | | | | | mlx5 class | + | devlink | +------------+ | | drivers | + | kernel | | rep netdev | | |(mlx5_core,ib) | + |_________| +------------+ | |_______________| + | | | ^ + (devlink ops) | | (probe/remove) + _________|________ | | ____|________ + | subfunction | | +---------------+ | subfunction | + | management driver|----- | subfunction |---| driver | + | (mlx5_core) | | auxiliary dev | | (mlx5_core) | + |__________________| +---------------+ |_____________| + | ^ + (sf add/del, vhca events) | + | (device add/del) + _____|____ ____|________ + | | | subfunction | + | PCI NIC |---- activate/deactive events---->| host driver | + |__________| | (mlx5_core) | + |_____________| + +Subfunction is created using devlink port interface. + +- Change device to switchdev mode:: + + $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev + +- Add a devlink port of subfunction flaovur:: + + $ devlink port add pci/0000:06:00.0 flavour pcisf pfnum 0 sfnum 88 + pci/0000:06:00.0/32768: type eth netdev eth6 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false + function: + hw_addr 00:00:00:00:00:00 state inactive opstate detached + +- Show a devlink port of the subfunction:: + + $ devlink port show pci/0000:06:00.0/32768 + pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88 + function: + hw_addr 00:00:00:00:00:00 state inactive opstate detached + +- Delete a devlink port of subfunction after use:: + + $ devlink port del pci/0000:06:00.0/32768 + +mlx5 function attributes +======================== +The mlx5 driver provides a mechanism to setup PCI VF/SF function attributes in +a unified way for SmartNIC and non-SmartNIC. + +This is supported only when the eswitch mode is set to switchdev. Port function +configuration of the PCI VF/SF is supported through devlink eswitch port. + +Port function attributes should be set before PCI VF/SF is enumerated by the +driver. + +MAC address setup +----------------- +mlx5 driver provides mechanism to setup the MAC address of the PCI VF/SF. + +The configured MAC address of the PCI VF/SF will be used by netdevice and rdma +device created for the PCI VF/SF. + +- Get the MAC address of the VF identified by its unique devlink port index:: + + $ devlink port show pci/0000:06:00.0/2 + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 + function: + hw_addr 00:00:00:00:00:00 + +- Set the MAC address of the VF identified by its unique devlink port index:: + + $ devlink port function set pci/0000:06:00.0/2 hw_addr 00:11:22:33:44:55 + + $ devlink port show pci/0000:06:00.0/2 + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 + function: + hw_addr 00:11:22:33:44:55 + +- Get the MAC address of the SF identified by its unique devlink port index:: + + $ devlink port show pci/0000:06:00.0/32768 + pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88 + function: + hw_addr 00:00:00:00:00:00 + +- Set the MAC address of the VF identified by its unique devlink port index:: + + $ devlink port function set pci/0000:06:00.0/32768 hw_addr 00:00:00:00:88:88 + + $ devlink port show pci/0000:06:00.0/32768 + pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcivf pfnum 0 sfnum 88 + function: + hw_addr 00:00:00:00:88:88 + +SF state setup +-------------- +To use the SF, the user must active the SF using the SF function state +attribute. + +- Get the state of the SF identified by its unique devlink port index:: + + $ devlink port show ens2f0npf0sf88 + pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false + function: + hw_addr 00:00:00:00:88:88 state inactive opstate detached + +- Activate the function and verify its state is active:: + + $ devlink port function set ens2f0npf0sf88 state active + + $ devlink port show ens2f0npf0sf88 + pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false + function: + hw_addr 00:00:00:00:88:88 state active opstate detached + +Upon function activation, the PF driver instance gets the event from the device +that a particular SF was activated. It's the cue to put the device on bus, probe +it and instantiate the devlink instance and class specific auxiliary devices +for it. + +- Show the auxiliary device and port of the subfunction:: + + $ devlink dev show + devlink dev show auxiliary/mlx5_core.sf.4 + + $ devlink port show auxiliary/mlx5_core.sf.4/1 + auxiliary/mlx5_core.sf.4/1: type eth netdev p0sf88 flavour virtual port 0 splittable false + + $ rdma link show mlx5_0/1 + link mlx5_0/1 state ACTIVE physical_state LINK_UP netdev p0sf88 + + $ rdma dev show + 8: rocep6s0f1: node_type ca fw 16.29.0550 node_guid 248a:0703:00b3:d113 sys_image_guid 248a:0703:00b3:d112 + 13: mlx5_0: node_type ca fw 16.29.0550 node_guid 0000:00ff:fe00:8888 sys_image_guid 248a:0703:00b3:d112 + +- Subfunction auxiliary device and class device hierarchy:: + + mlx5_core.sf.4 + (subfunction auxiliary device) + /\ + / \ + / \ + / \ + / \ + mlx5_core.eth.4 mlx5_core.rdma.4 + (sf eth aux dev) (sf rdma aux dev) + | | + | | + p0sf88 mlx5_0 + (sf netdev) (sf rdma device) + +Additionally, the SF port also gets the event when the driver attaches to the +auxiliary device of the subfunction. This results in changing the operational +state of the function. This provides visiblity to the user to decide when is it +safe to delete the SF port for graceful termination of the subfunction. + +- Show the SF port operational state:: + + $ devlink port show ens2f0npf0sf88 + pci/0000:06:00.0/32768: type eth netdev ens2f0npf0sf88 flavour pcisf controller 0 pfnum 0 sfnum 88 external false splittable false + function: + hw_addr 00:00:00:00:88:88 state active opstate attached + Devlink health reporters ======================== -- GitLab From 607ec89ed18f49ca59689572659b9c0076f1991f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 19 Jan 2021 10:10:54 -0700 Subject: [PATCH 1274/2012] io_uring: fix SQPOLL IORING_OP_CLOSE cancelation state IORING_OP_CLOSE is special in terms of cancelation, since it has an intermediate state where we've removed the file descriptor but hasn't closed the file yet. For that reason, it's currently marked with IO_WQ_WORK_NO_CANCEL to prevent cancelation. This ensures that the op is always run even if canceled, to prevent leaving us with a live file but an fd that is gone. However, with SQPOLL, since a cancel request doesn't carry any resources on behalf of the request being canceled, if we cancel before any of the close op has been run, we can end up with io-wq not having the ->files assigned. This can result in the following oops reported by Joseph: BUG: kernel NULL pointer dereference, address: 00000000000000d8 PGD 800000010b76f067 P4D 800000010b76f067 PUD 10b462067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 1 PID: 1788 Comm: io_uring-sq Not tainted 5.11.0-rc4 #1 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:__lock_acquire+0x19d/0x18c0 Code: 00 00 8b 1d fd 56 dd 08 85 db 0f 85 43 05 00 00 48 c7 c6 98 7b 95 82 48 c7 c7 57 96 93 82 e8 9a bc f5 ff 0f 0b e9 2b 05 00 00 <48> 81 3f c0 ca 67 8a b8 00 00 00 00 41 0f 45 c0 89 04 24 e9 81 fe RSP: 0018:ffffc90001933828 EFLAGS: 00010002 RAX: 0000000000000001 RBX: 0000000000000001 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000000000d8 RBP: 0000000000000246 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: ffff888106e8a140 R15: 00000000000000d8 FS: 0000000000000000(0000) GS:ffff88813bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000d8 CR3: 0000000106efa004 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: lock_acquire+0x31a/0x440 ? close_fd_get_file+0x39/0x160 ? __lock_acquire+0x647/0x18c0 _raw_spin_lock+0x2c/0x40 ? close_fd_get_file+0x39/0x160 close_fd_get_file+0x39/0x160 io_issue_sqe+0x1334/0x14e0 ? lock_acquire+0x31a/0x440 ? __io_free_req+0xcf/0x2e0 ? __io_free_req+0x175/0x2e0 ? find_held_lock+0x28/0xb0 ? io_wq_submit_work+0x7f/0x240 io_wq_submit_work+0x7f/0x240 io_wq_cancel_cb+0x161/0x580 ? io_wqe_wake_worker+0x114/0x360 ? io_uring_get_socket+0x40/0x40 io_async_find_and_cancel+0x3b/0x140 io_issue_sqe+0xbe1/0x14e0 ? __lock_acquire+0x647/0x18c0 ? __io_queue_sqe+0x10b/0x5f0 __io_queue_sqe+0x10b/0x5f0 ? io_req_prep+0xdb/0x1150 ? mark_held_locks+0x6d/0xb0 ? mark_held_locks+0x6d/0xb0 ? io_queue_sqe+0x235/0x4b0 io_queue_sqe+0x235/0x4b0 io_submit_sqes+0xd7e/0x12a0 ? _raw_spin_unlock_irq+0x24/0x30 ? io_sq_thread+0x3ae/0x940 io_sq_thread+0x207/0x940 ? do_wait_intr_irq+0xc0/0xc0 ? __ia32_sys_io_uring_enter+0x650/0x650 kthread+0x134/0x180 ? kthread_create_worker_on_cpu+0x90/0x90 ret_from_fork+0x1f/0x30 Fix this by moving the IO_WQ_WORK_NO_CANCEL until _after_ we've modified the fdtable. Canceling before this point is totally fine, and running it in the io-wq context _after_ that point is also fine. For 5.12, we'll handle this internally and get rid of the no-cancel flag, as IORING_OP_CLOSE is the only user of it. Cc: stable@vger.kernel.org Fixes: b5dba59e0cf7 ("io_uring: add support for IORING_OP_CLOSE") Reported-by: "Abaci " Reviewed-and-tested-by: Joseph Qi Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 383ff6ed37342..e4c1cdf0325d5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4472,7 +4472,6 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) * io_wq_work.flags, so initialize io_wq_work firstly. */ io_req_init_async(req); - req->work.flags |= IO_WQ_WORK_NO_CANCEL; if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; @@ -4505,6 +4504,8 @@ static int io_close(struct io_kiocb *req, bool force_nonblock, /* if the file has a flush method, be safe and punt to async */ if (close->put_file->f_op->flush && force_nonblock) { + /* not safe to cancel at this point */ + req->work.flags |= IO_WQ_WORK_NO_CANCEL; /* was never set, but play safe */ req->flags &= ~REQ_F_NOWAIT; /* avoid grabbing files - we don't need the files */ -- GitLab From 9a173346bd9e16ab19c7addb8862d95a5cea9feb Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 21 Jan 2021 12:01:08 +0000 Subject: [PATCH 1275/2012] io_uring: fix short read retries for non-reg files Sockets and other non-regular files may actually expect short reads to happen, don't retry reads for them. Because non-reg files don't set FMODE_BUF_RASYNC and so it won't do second/retry do_read, we can filter out those cases after first do_read() attempt with ret>0. Cc: stable@vger.kernel.org # 5.9+ Suggested-by: Jens Axboe Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e4c1cdf0325d5..862113a9364f8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3552,7 +3552,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, /* read it all, or we did blocking attempt. no retry. */ if (!iov_iter_count(iter) || !force_nonblock || - (req->file->f_flags & O_NONBLOCK)) + (req->file->f_flags & O_NONBLOCK) || !(req->flags & REQ_F_ISREG)) goto done; io_size -= ret; -- GitLab From bb8b81e396f7afbe7c50d789e2107512274d2a35 Mon Sep 17 00:00:00 2001 From: Loris Reiff Date: Fri, 22 Jan 2021 17:42:31 +0100 Subject: [PATCH 1276/2012] bpf, cgroup: Fix optlen WARN_ON_ONCE toctou A toctou issue in `__cgroup_bpf_run_filter_getsockopt` can trigger a WARN_ON_ONCE in a check of `copy_from_user`. `*optlen` is checked to be non-negative in the individual getsockopt functions beforehand. Changing `*optlen` in a race to a negative value will result in a `copy_from_user(ctx.optval, optval, ctx.optlen)` with `ctx.optlen` being a negative integer. Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Signed-off-by: Loris Reiff Signed-off-by: Daniel Borkmann Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20210122164232.61770-1-loris.reiff@liblor.ch --- kernel/bpf/cgroup.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 96555a8a2c545..6ec8f02f463b6 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1442,6 +1442,11 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, goto out; } + if (ctx.optlen < 0) { + ret = -EFAULT; + goto out; + } + if (copy_from_user(ctx.optval, optval, min(ctx.optlen, max_optlen)) != 0) { ret = -EFAULT; -- GitLab From f4a2da755a7e1f5d845c52aee71336cee289935a Mon Sep 17 00:00:00 2001 From: Loris Reiff Date: Fri, 22 Jan 2021 17:42:32 +0100 Subject: [PATCH 1277/2012] bpf, cgroup: Fix problematic bounds check Since ctx.optlen is signed, a larger value than max_value could be passed, as it is later on used as unsigned, which causes a WARN_ON_ONCE in the copy_to_user. Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Signed-off-by: Loris Reiff Signed-off-by: Daniel Borkmann Reviewed-by: Stanislav Fomichev Link: https://lore.kernel.org/bpf/20210122164232.61770-2-loris.reiff@liblor.ch --- kernel/bpf/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 6ec8f02f463b6..6aa9e10c6335a 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1464,7 +1464,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, goto out; } - if (ctx.optlen > max_optlen) { + if (ctx.optlen > max_optlen || ctx.optlen < 0) { ret = -EFAULT; goto out; } -- GitLab From b9557caaf872271671bdc1ef003d72f421eb72f6 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 20 Jan 2021 18:08:56 -0800 Subject: [PATCH 1278/2012] bpf, inode_storage: Put file handler if no storage was found Put file f if inode_storage_ptr() returns NULL. Fixes: 8ea636848aca ("bpf: Implement bpf_local_storage for inodes") Signed-off-by: Pan Bian Signed-off-by: Daniel Borkmann Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210121020856.25507-1-bianpan2016@163.com --- kernel/bpf/bpf_inode_storage.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 2f0597320b6d2..6639640523c0b 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -125,8 +125,12 @@ static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, fd = *(int *)key; f = fget_raw(fd); - if (!f || !inode_storage_ptr(f->f_inode)) + if (!f) + return -EBADF; + if (!inode_storage_ptr(f->f_inode)) { + fput(f); return -EBADF; + } sdata = bpf_local_storage_update(f->f_inode, (struct bpf_local_storage_map *)map, -- GitLab From caab13b4960416b9fee83169a758eb0f31e65109 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 11 Dec 2020 13:58:46 +0000 Subject: [PATCH 1279/2012] drivers: soc: atmel: Avoid calling at91_soc_init on non AT91 SoCs Since at91_soc_init is called unconditionally from atmel_soc_device_init, we get the following warning on all non AT91 SoCs: " AT91: Could not find identification node" Fix the same by filtering with allowed AT91 SoC list. Cc: Nicolas Ferre Cc: Alexandre Belloni Cc: Ludovic Desroches Cc: stable@vger.kernel.org #4.12+ Signed-off-by: Sudeep Holla Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20201211135846.1334322-1-sudeep.holla@arm.com Signed-off-by: Arnd Bergmann --- drivers/soc/atmel/soc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/soc/atmel/soc.c b/drivers/soc/atmel/soc.c index c4472b68b7c2a..728d461ad6d65 100644 --- a/drivers/soc/atmel/soc.c +++ b/drivers/soc/atmel/soc.c @@ -271,8 +271,20 @@ struct soc_device * __init at91_soc_init(const struct at91_soc *socs) return soc_dev; } +static const struct of_device_id at91_soc_allowed_list[] __initconst = { + { .compatible = "atmel,at91rm9200", }, + { .compatible = "atmel,at91sam9", }, + { .compatible = "atmel,sama5", }, + { .compatible = "atmel,samv7", } +}; + static int __init atmel_soc_device_init(void) { + struct device_node *np = of_find_node_by_path("/"); + + if (!of_match_node(at91_soc_allowed_list, np)) + return 0; + at91_soc_init(socs); return 0; -- GitLab From 680896556805d3ad3fa47f6002b87b3041a45ac2 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Fri, 22 Jan 2021 14:21:34 +0200 Subject: [PATCH 1280/2012] drivers: soc: atmel: add null entry at the end of at91_soc_allowed_list[] of_match_node() calls __of_match_node() which loops though the entries of matches array. It stops when condition: (matches->name[0] || matches->type[0] || matches->compatible[0]) is false. Thus, add a null entry at the end of at91_soc_allowed_list[] array. Fixes: caab13b49604 ("drivers: soc: atmel: Avoid calling at91_soc_init on non AT91 SoCs") Cc: stable@vger.kernel.org #4.12+ Signed-off-by: Claudiu Beznea Signed-off-by: Arnd Bergmann --- drivers/soc/atmel/soc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soc/atmel/soc.c b/drivers/soc/atmel/soc.c index 728d461ad6d65..698d21f505165 100644 --- a/drivers/soc/atmel/soc.c +++ b/drivers/soc/atmel/soc.c @@ -275,7 +275,8 @@ static const struct of_device_id at91_soc_allowed_list[] __initconst = { { .compatible = "atmel,at91rm9200", }, { .compatible = "atmel,at91sam9", }, { .compatible = "atmel,sama5", }, - { .compatible = "atmel,samv7", } + { .compatible = "atmel,samv7", }, + { } }; static int __init atmel_soc_device_init(void) -- GitLab From da8ee66f56071aef0b5b0de41d2c2a97fa30c8a1 Mon Sep 17 00:00:00 2001 From: Bharat Gooty Date: Tue, 19 Jan 2021 11:04:44 +0530 Subject: [PATCH 1281/2012] arm64: dts: broadcom: Fix USB DMA address translation for Stingray Add a non-empty dma-ranges so that DMA address translation happens. Fixes: 2013a4b684b6 ("arm64: dts: broadcom: clear the warnings caused by empty dma-ranges") Signed-off-by: Bharat Gooty Signed-off-by: Rayagonda Kokatanur Reviewed-by: Arnd Bergmann Acked-by: Ray Jui Signed-off-by: Florian Fainelli Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi index aef8f2b00778d..5401a646c8406 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi @@ -4,11 +4,16 @@ */ usb { compatible = "simple-bus"; - dma-ranges; #address-cells = <2>; #size-cells = <2>; ranges = <0x0 0x0 0x0 0x68500000 0x0 0x00400000>; + /* + * Internally, USB bus to the interconnect can only address up + * to 40-bit + */ + dma-ranges = <0 0 0 0 0x100 0x0>; + usbphy0: usb-phy@0 { compatible = "brcm,sr-usb-combo-phy"; reg = <0x0 0x00000000 0x0 0x100>; -- GitLab From 68e89bc868e190365930f914fdbe154064851ec9 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 3 Jan 2021 13:24:35 +0100 Subject: [PATCH 1282/2012] MAINTAINERS: Include bcm2835 subsequents into search Change the bcm2835 maintainer info in order to handle subsequent SoCs. After this get_maintainers.pl provides the proper maintainers for irqchip-bcm2836. Signed-off-by: Stefan Wahren Signed-off-by: Florian Fainelli Signed-off-by: Arnd Bergmann --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index cc1e6a5ee6e67..f545261885f10 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3413,7 +3413,7 @@ F: Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml F: drivers/pci/controller/pcie-brcmstb.c F: drivers/staging/vc04_services N: bcm2711 -N: bcm2835 +N: bcm283* BROADCOM BCM281XX/BCM11XXX/BCM216XX ARM ARCHITECTURE M: Florian Fainelli -- GitLab From 7e0e63d09516e96994c879f07c5a3c3269d7015e Mon Sep 17 00:00:00 2001 From: Giacinto Cifelli Date: Wed, 20 Jan 2021 05:56:50 +0100 Subject: [PATCH 1283/2012] net: usb: qmi_wwan: added support for Thales Cinterion PLSx3 modem family MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bus 003 Device 009: ID 1e2d:006f Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 239 Miscellaneous Device bDeviceSubClass 2 ? bDeviceProtocol 1 Interface Association bMaxPacketSize0 64 idVendor 0x1e2d idProduct 0x006f bcdDevice 0.00 iManufacturer 3 Cinterion Wireless Modules iProduct 2 PLSx3 iSerial 4 fa3c1419 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 303 bNumInterfaces 9 bConfigurationValue 1 iConfiguration 1 Cinterion Configuration bmAttributes 0xe0 Self Powered Remote Wakeup MaxPower 500mA Interface Association: bLength 8 bDescriptorType 11 bFirstInterface 0 bInterfaceCount 2 bFunctionClass 2 Communications bFunctionSubClass 2 Abstract (modem) bFunctionProtocol 1 AT-commands (v.25ter) iFunction 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 1 AT-commands (v.25ter) iInterface 0 CDC Header: bcdCDC 1.10 CDC ACM: bmCapabilities 0x02 line coding and serial state CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 1 CDC Union: bMasterInterface 0 bSlaveInterface 1 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Association: bLength 8 bDescriptorType 11 bFirstInterface 2 bInterfaceCount 2 bFunctionClass 2 Communications bFunctionSubClass 2 Abstract (modem) bFunctionProtocol 1 AT-commands (v.25ter) iFunction 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 2 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 1 AT-commands (v.25ter) iInterface 0 CDC Header: bcdCDC 1.10 CDC ACM: bmCapabilities 0x02 line coding and serial state CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 3 CDC Union: bMasterInterface 2 bSlaveInterface 3 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x83 EP 3 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 3 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x84 EP 4 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Association: bLength 8 bDescriptorType 11 bFirstInterface 4 bInterfaceCount 2 bFunctionClass 2 Communications bFunctionSubClass 2 Abstract (modem) bFunctionProtocol 1 AT-commands (v.25ter) iFunction 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 4 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 1 AT-commands (v.25ter) iInterface 0 CDC Header: bcdCDC 1.10 CDC ACM: bmCapabilities 0x02 line coding and serial state CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 5 CDC Union: bMasterInterface 4 bSlaveInterface 5 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x85 EP 5 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 5 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x86 EP 6 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x03 EP 3 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Association: bLength 8 bDescriptorType 11 bFirstInterface 6 bInterfaceCount 2 bFunctionClass 2 Communications bFunctionSubClass 2 Abstract (modem) bFunctionProtocol 1 AT-commands (v.25ter) iFunction 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 6 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 1 AT-commands (v.25ter) iInterface 0 CDC Header: bcdCDC 1.10 CDC ACM: bmCapabilities 0x02 line coding and serial state CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 7 CDC Union: bMasterInterface 6 bSlaveInterface 7 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x87 EP 7 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 7 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x88 EP 8 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x04 EP 4 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 8 bAlternateSetting 0 bNumEndpoints 3 bInterfaceClass 255 Vendor Specific Class bInterfaceSubClass 255 Vendor Specific Subclass bInterfaceProtocol 255 Vendor Specific Protocol iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x89 EP 9 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x8a EP 10 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x05 EP 5 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Device Qualifier (for other device speed): bLength 10 bDescriptorType 6 bcdUSB 2.00 bDeviceClass 239 Miscellaneous Device bDeviceSubClass 2 ? bDeviceProtocol 1 Interface Association bMaxPacketSize0 64 bNumConfigurations 1 Device Status: 0x0000 (Bus Powered) Cc: stable@vger.kernel.org Signed-off-by: Giacinto Cifelli Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/20210120045650.10855-1-gciofono@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index af19513a9f75b..cc4819282820b 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1302,6 +1302,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x0b3c, 0xc00a, 6)}, /* Olivetti Olicard 160 */ {QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)}, /* Olivetti Olicard 500 */ {QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */ + {QMI_QUIRK_SET_DTR(0x1e2d, 0x006f, 8)}, /* Cinterion PLS83/PLS63 */ {QMI_FIXED_INTF(0x1e2d, 0x0053, 4)}, /* Cinterion PHxx,PXxx */ {QMI_FIXED_INTF(0x1e2d, 0x0063, 10)}, /* Cinterion ALASxx (1 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0082, 4)}, /* Cinterion PHxx,PXxx (2 RmNet) */ -- GitLab From db2805150a0f27c00ad286a29109397a7723adad Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 21 Jan 2021 15:09:06 +0800 Subject: [PATCH 1284/2012] net: octeontx2: Make sure the buffer is 128 byte aligned The octeontx2 hardware needs the buffer to be 128 byte aligned. But in the current implementation of napi_alloc_frag(), it can't guarantee the return address is 128 byte aligned even the request size is a multiple of 128 bytes, so we have to request an extra 128 bytes and use the PTR_ALIGN() to make sure that the buffer is aligned correctly. Fixes: 7a36e4918e30 ("octeontx2-pf: Use the napi_alloc_frag() to alloc the pool buffers") Reported-by: Subbaraya Sundeep Signed-off-by: Kevin Hao Tested-by: Subbaraya Sundeep Link: https://lore.kernel.org/r/20210121070906.25380-1-haokexin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 73fb94dd5fbcc..e6869435e1f32 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -478,10 +478,11 @@ dma_addr_t __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool) dma_addr_t iova; u8 *buf; - buf = napi_alloc_frag(pool->rbsize); + buf = napi_alloc_frag(pool->rbsize + OTX2_ALIGN); if (unlikely(!buf)) return -ENOMEM; + buf = PTR_ALIGN(buf, OTX2_ALIGN); iova = dma_map_single_attrs(pfvf->dev, buf, pool->rbsize, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); if (unlikely(dma_mapping_error(pfvf->dev, iova))) { -- GitLab From ca649ccae45d64b3b8e22a9fee9af7d796494e42 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 21 Jan 2021 11:06:15 +0100 Subject: [PATCH 1285/2012] dt-bindings: net: renesas,etheravb: Add r8a779a0 support Document the compatible value for the RAVB block in the Renesas R-Car V3U (R8A779A0) SoC. This variant has no stream buffer, so we only need to add the new compatible and add it to the TX delay block. Reviewed-by: Geert Uytterhoeven Acked-by: Rob Herring Signed-off-by: Wolfram Sang Link: https://lore.kernel.org/r/20210121100619.5653-2-wsa+renesas@sang-engineering.com Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/renesas,etheravb.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml index de9dd574a2f95..91ba96d43c6c1 100644 --- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml +++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml @@ -40,6 +40,7 @@ properties: - renesas,etheravb-r8a77980 # R-Car V3H - renesas,etheravb-r8a77990 # R-Car E3 - renesas,etheravb-r8a77995 # R-Car D3 + - renesas,etheravb-r8a779a0 # R-Car V3U - const: renesas,etheravb-rcar-gen3 # R-Car Gen3 and RZ/G2 reg: true @@ -170,6 +171,7 @@ allOf: - renesas,etheravb-r8a77965 - renesas,etheravb-r8a77970 - renesas,etheravb-r8a77980 + - renesas,etheravb-r8a779a0 then: required: - tx-internal-delay-ps -- GitLab From 2d8983f9246ed197ae5737344190b6bc35fb155b Mon Sep 17 00:00:00 2001 From: Yuusuke Ashizuka Date: Thu, 21 Jan 2021 17:02:54 +0900 Subject: [PATCH 1286/2012] net: phy: realtek: Add support for RTL9000AA/AN RTL9000AA/AN as 100BASE-T1 is following: - 100 Mbps - Full duplex - Link Status Change Interrupt - Master/Slave configuration Signed-off-by: Yuusuke Ashizuka Signed-off-by: Torii Kenichi Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210121080254.21286-1-ashiduka@fujitsu.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/realtek.c | 132 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 99ecd6c4c15a0..821e85a973679 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -60,6 +60,9 @@ #define RTL_LPADV_5000FULL BIT(6) #define RTL_LPADV_2500FULL BIT(5) +#define RTL9000A_GINMR 0x14 +#define RTL9000A_GINMR_LINK_STATUS BIT(4) + #define RTLGEN_SPEED_MASK 0x0630 #define RTL_GENERIC_PHYID 0x001cc800 @@ -655,6 +658,122 @@ static int rtlgen_resume(struct phy_device *phydev) return ret; } +static int rtl9000a_config_init(struct phy_device *phydev) +{ + phydev->autoneg = AUTONEG_DISABLE; + phydev->speed = SPEED_100; + phydev->duplex = DUPLEX_FULL; + + return 0; +} + +static int rtl9000a_config_aneg(struct phy_device *phydev) +{ + int ret; + u16 ctl = 0; + + switch (phydev->master_slave_set) { + case MASTER_SLAVE_CFG_MASTER_FORCE: + ctl |= CTL1000_AS_MASTER; + break; + case MASTER_SLAVE_CFG_SLAVE_FORCE: + break; + case MASTER_SLAVE_CFG_UNKNOWN: + case MASTER_SLAVE_CFG_UNSUPPORTED: + return 0; + default: + phydev_warn(phydev, "Unsupported Master/Slave mode\n"); + return -EOPNOTSUPP; + } + + ret = phy_modify_changed(phydev, MII_CTRL1000, CTL1000_AS_MASTER, ctl); + if (ret == 1) + ret = genphy_soft_reset(phydev); + + return ret; +} + +static int rtl9000a_read_status(struct phy_device *phydev) +{ + int ret; + + phydev->master_slave_get = MASTER_SLAVE_CFG_UNKNOWN; + phydev->master_slave_state = MASTER_SLAVE_STATE_UNKNOWN; + + ret = genphy_update_link(phydev); + if (ret) + return ret; + + ret = phy_read(phydev, MII_CTRL1000); + if (ret < 0) + return ret; + if (ret & CTL1000_AS_MASTER) + phydev->master_slave_get = MASTER_SLAVE_CFG_MASTER_FORCE; + else + phydev->master_slave_get = MASTER_SLAVE_CFG_SLAVE_FORCE; + + ret = phy_read(phydev, MII_STAT1000); + if (ret < 0) + return ret; + if (ret & LPA_1000MSRES) + phydev->master_slave_state = MASTER_SLAVE_STATE_MASTER; + else + phydev->master_slave_state = MASTER_SLAVE_STATE_SLAVE; + + return 0; +} + +static int rtl9000a_ack_interrupt(struct phy_device *phydev) +{ + int err; + + err = phy_read(phydev, RTL8211F_INSR); + + return (err < 0) ? err : 0; +} + +static int rtl9000a_config_intr(struct phy_device *phydev) +{ + u16 val; + int err; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = rtl9000a_ack_interrupt(phydev); + if (err) + return err; + + val = (u16)~RTL9000A_GINMR_LINK_STATUS; + err = phy_write_paged(phydev, 0xa42, RTL9000A_GINMR, val); + } else { + val = ~0; + err = phy_write_paged(phydev, 0xa42, RTL9000A_GINMR, val); + if (err) + return err; + + err = rtl9000a_ack_interrupt(phydev); + } + + return phy_write_paged(phydev, 0xa42, RTL9000A_GINMR, val); +} + +static irqreturn_t rtl9000a_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + irq_status = phy_read(phydev, RTL8211F_INSR); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & RTL8211F_INER_LINK_STATUS)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} + static struct phy_driver realtek_drvs[] = { { PHY_ID_MATCH_EXACT(0x00008201), @@ -823,6 +942,19 @@ static struct phy_driver realtek_drvs[] = { .handle_interrupt = genphy_handle_interrupt_no_ack, .suspend = genphy_suspend, .resume = genphy_resume, + }, { + PHY_ID_MATCH_EXACT(0x001ccb00), + .name = "RTL9000AA_RTL9000AN Ethernet", + .features = PHY_BASIC_T1_FEATURES, + .config_init = rtl9000a_config_init, + .config_aneg = rtl9000a_config_aneg, + .read_status = rtl9000a_read_status, + .config_intr = rtl9000a_config_intr, + .handle_interrupt = rtl9000a_handle_interrupt, + .suspend = genphy_suspend, + .resume = genphy_resume, + .read_page = rtl821x_read_page, + .write_page = rtl821x_write_page, }, }; -- GitLab From a05a7280f5453ed24c2001eb66b359776ab18cb5 Mon Sep 17 00:00:00 2001 From: Pengcheng Yang Date: Thu, 21 Jan 2021 22:31:13 +0800 Subject: [PATCH 1287/2012] tcp: remove unused ICSK_TIME_EARLY_RETRANS Since the early retransmit has been removed by commit bec41a11dd3d ("tcp: remove early retransmit"), we also remove the unused ICSK_TIME_EARLY_RETRANS macro. Signed-off-by: Pengcheng Yang Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/1611239473-27304-1-git-send-email-yangpc@wangsu.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 111d7771b2081..c11f80f328f1d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -141,7 +141,6 @@ struct inet_connection_sock { #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ #define ICSK_TIME_DACK 2 /* Delayed ack timer */ #define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ -#define ICSK_TIME_EARLY_RETRANS 4 /* Early retransmit timer */ #define ICSK_TIME_LOSS_PROBE 5 /* Tail loss probe timer */ #define ICSK_TIME_REO_TIMEOUT 6 /* Reordering timer */ @@ -227,8 +226,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, } if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 || - what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE || - what == ICSK_TIME_REO_TIMEOUT) { + what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) { icsk->icsk_pending = what; icsk->icsk_timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); -- GitLab From e7ed11ee945438b737e2ae2370e35591e16ec371 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Wed, 20 Jan 2021 12:41:55 -0800 Subject: [PATCH 1288/2012] tcp: add TTL to SCM_TIMESTAMPING_OPT_STATS This patch adds TCP_NLA_TTL to SCM_TIMESTAMPING_OPT_STATS that exports the time-to-live or hop limit of the latest incoming packet with SCM_TSTAMP_ACK. The value exported may not be from the packet that acks the sequence when incoming packets are aggregated. Exporting the time-to-live or hop limit value of incoming packets helps to estimate the hop count of the path of the flow that may change over time. Signed-off-by: Yousuk Seung Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Link: https://lore.kernel.org/r/20210120204155.552275-1-ysseung@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 +- include/linux/tcp.h | 3 ++- include/uapi/linux/tcp.h | 1 + net/core/dev.c | 2 +- net/core/skbuff.c | 6 ++++-- net/ipv4/tcp.c | 18 +++++++++++++++++- net/ipv4/tcp_input.c | 16 ++++++++-------- 7 files changed, 34 insertions(+), 14 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 186dad231e302..9313b5aaf45b6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3859,7 +3859,7 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps); -void __skb_tstamp_tx(struct sk_buff *orig_skb, +void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb, struct skb_shared_hwtstamps *hwtstamps, struct sock *sk, int tstype); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2f87377e9af70..48d8a363319e5 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -496,7 +496,8 @@ static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn) } struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, - const struct sk_buff *orig_skb); + const struct sk_buff *orig_skb, + const struct sk_buff *ack_skb); static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss) { diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 768e93bd5b511..16dfa40bdac38 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -314,6 +314,7 @@ enum { TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */ TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */ TCP_NLA_EDT, /* Earliest departure time (CLOCK_MONOTONIC) */ + TCP_NLA_TTL, /* TTL or hop limit of a packet received */ }; /* for TCP_MD5SIG socket option */ diff --git a/net/core/dev.c b/net/core/dev.c index d9ce02e959926..6df3f1bcdc686 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4084,7 +4084,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) skb_reset_mac_header(skb); if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)) - __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED); + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED); /* Disable soft irqs for various locks below. Also * stops preemption for RCU. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 145503d3f06b3..2af12f7e170c7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4721,6 +4721,7 @@ err: EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); void __skb_tstamp_tx(struct sk_buff *orig_skb, + const struct sk_buff *ack_skb, struct skb_shared_hwtstamps *hwtstamps, struct sock *sk, int tstype) { @@ -4743,7 +4744,8 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && sk->sk_protocol == IPPROTO_TCP && sk->sk_type == SOCK_STREAM) { - skb = tcp_get_timestamping_opt_stats(sk, orig_skb); + skb = tcp_get_timestamping_opt_stats(sk, orig_skb, + ack_skb); opt_stats = true; } else #endif @@ -4772,7 +4774,7 @@ EXPORT_SYMBOL_GPL(__skb_tstamp_tx); void skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps) { - return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk, + return __skb_tstamp_tx(orig_skb, NULL, hwtstamps, orig_skb->sk, SCM_TSTAMP_SND); } EXPORT_SYMBOL_GPL(skb_tstamp_tx); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 856ae516ac18b..a1a17b64f8cd6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3767,11 +3767,24 @@ static size_t tcp_opt_stats_get_size(void) nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */ nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_EDT */ + nla_total_size(sizeof(u8)) + /* TCP_NLA_TTL */ 0; } +/* Returns TTL or hop limit of an incoming packet from skb. */ +static u8 tcp_skb_ttl_or_hop_limit(const struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return ip_hdr(skb)->ttl; + else if (skb->protocol == htons(ETH_P_IPV6)) + return ipv6_hdr(skb)->hop_limit; + else + return 0; +} + struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, - const struct sk_buff *orig_skb) + const struct sk_buff *orig_skb, + const struct sk_buff *ack_skb) { const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *stats; @@ -3827,6 +3840,9 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, max_t(int, 0, tp->write_seq - tp->snd_nxt)); nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns, TCP_NLA_PAD); + if (ack_skb) + nla_put_u8(stats, TCP_NLA_TTL, + tcp_skb_ttl_or_hop_limit(ack_skb)); return stats; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a7dfca0a38cd7..d4f66aba9fd87 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3145,7 +3145,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) } static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, - u32 prior_snd_una) + const struct sk_buff *ack_skb, u32 prior_snd_una) { const struct skb_shared_info *shinfo; @@ -3157,7 +3157,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, if (!before(shinfo->tskey, prior_snd_una) && before(shinfo->tskey, tcp_sk(sk)->snd_una)) { tcp_skb_tsorted_save(skb) { - __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); + __skb_tstamp_tx(skb, ack_skb, NULL, sk, SCM_TSTAMP_ACK); } tcp_skb_tsorted_restore(skb); } } @@ -3166,8 +3166,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, * is before the ack sequence we can discard it as it's confirmed to have * arrived at the other end. */ -static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, - u32 prior_snd_una, +static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb, + u32 prior_fack, u32 prior_snd_una, struct tcp_sacktag_state *sack, bool ece_ack) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -3256,7 +3256,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, if (!fully_acked) break; - tcp_ack_tstamp(sk, skb, prior_snd_una); + tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una); next = skb_rb_next(skb); if (unlikely(skb == tp->retransmit_skb_hint)) @@ -3274,7 +3274,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, tp->snd_up = tp->snd_una; if (skb) { - tcp_ack_tstamp(sk, skb, prior_snd_una); + tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una); if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) flag |= FLAG_SACK_RENEGING; } @@ -3809,8 +3809,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state, - flag & FLAG_ECE); + flag |= tcp_clean_rtx_queue(sk, skb, prior_fack, prior_snd_una, + &sack_state, flag & FLAG_ECE); tcp_rack_update_reo_wnd(sk, &rs); -- GitLab From 3765d86ffcd346913c372d69cdc05dc8d56119ac Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 20 Jan 2021 03:07:44 -0800 Subject: [PATCH 1289/2012] net: stmmac: dwmac-intel-plat: remove config data on error Remove the config data when rate setting fails. Fixes: 9efc9b2b04c7 ("net: stmmac: Add dwmac-intel-plat for GBE driver") Signed-off-by: Pan Bian Link: https://lore.kernel.org/r/20210120110745.36412-1-bianpan2016@163.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c index 82b1c7a5a7a94..ba0e4d2b256a4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c @@ -129,7 +129,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "Failed to set tx_clk\n"); - return ret; + goto err_remove_config_dt; } } } @@ -143,7 +143,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "Failed to set clk_ptp_ref\n"); - return ret; + goto err_remove_config_dt; } } } -- GitLab From 0607a2cddb60f4548b55e28ac56a8d73493a45bb Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 20 Jan 2021 04:20:37 -0800 Subject: [PATCH 1290/2012] net: fec: put child node on error path Also decrement the reference count of child device on error path. Fixes: 3e782985cb3c ("net: ethernet: fec: Allow configuration of MDIO bus speed") Signed-off-by: Pan Bian Link: https://lore.kernel.org/r/20210120122037.83897-1-bianpan2016@163.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 04f24c66cf366..55c28fbc5f9ea 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2165,9 +2165,9 @@ static int fec_enet_mii_init(struct platform_device *pdev) fep->mii_bus->parent = &pdev->dev; err = of_mdiobus_register(fep->mii_bus, node); - of_node_put(node); if (err) goto err_out_free_mdiobus; + of_node_put(node); mii_cnt++; @@ -2180,6 +2180,7 @@ static int fec_enet_mii_init(struct platform_device *pdev) err_out_free_mdiobus: mdiobus_free(fep->mii_bus); err_out: + of_node_put(node); return err; } -- GitLab From e26ca4b535820b1445dcef3c0f82b3fb5b45108b Mon Sep 17 00:00:00 2001 From: Ivan Babrou Date: Wed, 20 Jan 2021 13:27:59 -0800 Subject: [PATCH 1291/2012] sfc: reduce the number of requested xdp ev queues Without this change the driver tries to allocate too many queues, breaching the number of available msi-x interrupts on machines with many logical cpus and default adapter settings: Insufficient resources for 12 XDP event queues (24 other channels, max 32) Which in turn triggers EINVAL on XDP processing: sfc 0000:86:00.0 ext0: XDP TX failed (-22) Signed-off-by: Ivan Babrou Acked-by: Edward Cree Link: https://lore.kernel.org/r/20210120212759.81548-1-ivan@cloudflare.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/efx_channels.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index a4a626e9cd9a1..1bfeee283ea90 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -17,6 +17,7 @@ #include "rx_common.h" #include "nic.h" #include "sriov.h" +#include "workarounds.h" /* This is the first interrupt mode to try out of: * 0 => MSI-X @@ -137,6 +138,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, { unsigned int n_channels = parallelism; int vec_count; + int tx_per_ev; int n_xdp_tx; int n_xdp_ev; @@ -149,9 +151,9 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, * multiple tx queues, assuming tx and ev queues are both * maximum size. */ - + tx_per_ev = EFX_MAX_EVQ_SIZE / EFX_TXQ_MAX_ENT(efx); n_xdp_tx = num_possible_cpus(); - n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_MAX_TXQ_PER_CHANNEL); + n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, tx_per_ev); vec_count = pci_msix_vec_count(efx->pci_dev); if (vec_count < 0) -- GitLab From 866f26f2a9c33bc70eb0f07ffc37fd9424ffe501 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 20 Jan 2021 15:39:10 +0100 Subject: [PATCH 1292/2012] mptcp: always graft subflow socket to parent Currently, incoming subflows link to the parent socket, while outgoing ones link to a per subflow socket. The latter is not really needed, except at the initial connect() time and for the first subflow. Always graft the outgoing subflow to the parent socket and free the unneeded ones early. This allows some code cleanup, reduces the amount of memory used and will simplify the next patch Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 36 ++++++++++-------------------------- net/mptcp/protocol.h | 1 + net/mptcp/subflow.c | 3 +++ 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f998a077c7dd0..c5c80f9253832 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -114,11 +114,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) list_add(&subflow->node, &msk->conn_list); sock_hold(ssock->sk); subflow->request_mptcp = 1; - - /* accept() will wait on first subflow sk_wq, and we always wakes up - * via msk->sk_socket - */ - RCU_INIT_POINTER(msk->first->sk_wq, &sk->sk_socket->wq); + mptcp_sock_graft(msk->first, sk->sk_socket); return 0; } @@ -2116,9 +2112,6 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow) { - bool dispose_socket = false; - struct socket *sock; - list_del(&subflow->node); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); @@ -2126,11 +2119,8 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, /* if we are invoked by the msk cleanup code, the subflow is * already orphaned */ - sock = ssk->sk_socket; - if (sock) { - dispose_socket = sock != sk->sk_socket; + if (ssk->sk_socket) sock_orphan(ssk); - } subflow->disposable = 1; @@ -2148,8 +2138,6 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, __sock_put(ssk); } release_sock(ssk); - if (dispose_socket) - iput(SOCK_INODE(sock)); sock_put(ssk); } @@ -2536,6 +2524,12 @@ static void __mptcp_destroy_sock(struct sock *sk) pr_debug("msk=%p", msk); + /* dispose the ancillatory tcp socket, if any */ + if (msk->subflow) { + iput(SOCK_INODE(msk->subflow)); + msk->subflow = NULL; + } + /* be sure to always acquire the join list lock, to sync vs * mptcp_finish_join(). */ @@ -2586,20 +2580,10 @@ cleanup: inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; list_for_each_entry(subflow, &mptcp_sk(sk)->conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow, dispose_socket; - struct socket *sock; + bool slow = lock_sock_fast(ssk); - slow = lock_sock_fast(ssk); - sock = ssk->sk_socket; - dispose_socket = sock && sock != sk->sk_socket; sock_orphan(ssk); unlock_sock_fast(ssk, slow); - - /* for the outgoing subflows we additionally need to free - * the associated socket - */ - if (dispose_socket) - iput(SOCK_INODE(sock)); } sock_orphan(sk); @@ -3041,7 +3025,7 @@ void mptcp_finish_connect(struct sock *ssk) mptcp_rcv_space_init(msk, ssk); } -static void mptcp_sock_graft(struct sock *sk, struct socket *parent) +void mptcp_sock_graft(struct sock *sk, struct socket *parent) { write_lock_bh(&sk->sk_callback_lock); rcu_assign_pointer(sk->sk_wq, &parent->wq); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d6400ad2d6156..65d200a1072bf 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -473,6 +473,7 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void mptcp_subflow_reset(struct sock *ssk); +void mptcp_sock_graft(struct sock *sk, struct socket *parent); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 278cbe3e539ea..22313710d7696 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1159,6 +1159,9 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (err && err != -EINPROGRESS) goto failed_unlink; + /* discard the subflow socket */ + mptcp_sock_graft(ssk, sk->sk_socket); + iput(SOCK_INODE(sf)); return err; failed_unlink: -- GitLab From 5cf92bbadc585e1bcb710df75293e07b7c846bb6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 20 Jan 2021 15:39:11 +0100 Subject: [PATCH 1293/2012] mptcp: re-enable sndbuf autotune After commit 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks"), MPTCP never sets the flag bit SOCK_NOSPACE on its subflow. As a side effect, autotune never takes place, as it happens inside tcp_new_space(), which in turn is called only when the mentioned bit is set. Let's sendmsg() set the subflows NOSPACE bit when looking for more memory and use the subflow write_space callback to propagate the snd buf update and wake-up the user-space. Additionally, this allows dropping a bunch of duplicate code and makes the SNDBUF_LIMITED chrono relevant again for MPTCP subflows. Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks") Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 52 +++++++++++++++++--------------------------- net/mptcp/protocol.h | 19 ++++++++++++++++ net/mptcp/subflow.c | 7 +++++- 3 files changed, 45 insertions(+), 33 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c5c80f9253832..d07e60330df56 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -730,10 +730,14 @@ wake: void __mptcp_flush_join_list(struct mptcp_sock *msk) { + struct mptcp_subflow_context *subflow; + if (likely(list_empty(&msk->join_list))) return; spin_lock_bh(&msk->join_list_lock); + list_for_each_entry(subflow, &msk->join_list, node) + mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow)); list_splice_tail_init(&msk->join_list, &msk->conn_list); spin_unlock_bh(&msk->join_list_lock); } @@ -1033,13 +1037,6 @@ out: __mptcp_update_wmem(sk); sk_mem_reclaim_partial(sk); } - - if (sk_stream_is_writeable(sk)) { - /* pairs with memory barrier in mptcp_poll */ - smp_mb(); - if (test_and_clear_bit(MPTCP_NOSPACE, &msk->flags)) - sk_stream_write_space(sk); - } } if (snd_una == READ_ONCE(msk->snd_nxt)) { @@ -1358,8 +1355,7 @@ struct subflow_send_info { u64 ratio; }; -static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk, - u32 *sndbuf) +static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) { struct subflow_send_info send_info[2]; struct mptcp_subflow_context *subflow; @@ -1370,24 +1366,17 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk, sock_owned_by_me((struct sock *)msk); - *sndbuf = 0; if (__mptcp_check_fallback(msk)) { if (!msk->first) return NULL; - *sndbuf = msk->first->sk_sndbuf; return sk_stream_memory_free(msk->first) ? msk->first : NULL; } /* re-use last subflow, if the burst allow that */ if (msk->last_snd && msk->snd_burst > 0 && sk_stream_memory_free(msk->last_snd) && - mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) { - mptcp_for_each_subflow(msk, subflow) { - ssk = mptcp_subflow_tcp_sock(subflow); - *sndbuf = max(tcp_sk(ssk)->snd_wnd, *sndbuf); - } + mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) return msk->last_snd; - } /* pick the subflow with the lower wmem/wspace ratio */ for (i = 0; i < 2; ++i) { @@ -1400,7 +1389,6 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk, continue; nr_active += !subflow->backup; - *sndbuf = max(tcp_sk(ssk)->snd_wnd, *sndbuf); if (!sk_stream_memory_free(subflow->tcp_sock)) continue; @@ -1430,6 +1418,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk, sk_stream_wspace(msk->last_snd)); return msk->last_snd; } + return NULL; } @@ -1450,7 +1439,6 @@ static void mptcp_push_pending(struct sock *sk, unsigned int flags) }; struct mptcp_data_frag *dfrag; int len, copied = 0; - u32 sndbuf; while ((dfrag = mptcp_send_head(sk))) { info.sent = dfrag->already_sent; @@ -1461,12 +1449,7 @@ static void mptcp_push_pending(struct sock *sk, unsigned int flags) prev_ssk = ssk; __mptcp_flush_join_list(msk); - ssk = mptcp_subflow_get_send(msk, &sndbuf); - - /* do auto tuning */ - if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) && - sndbuf > READ_ONCE(sk->sk_sndbuf)) - WRITE_ONCE(sk->sk_sndbuf, sndbuf); + ssk = mptcp_subflow_get_send(msk); /* try to keep the subflow socket lock across * consecutive xmit on the same socket @@ -1533,11 +1516,6 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) while (len > 0) { int ret = 0; - /* do auto tuning */ - if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) && - ssk->sk_sndbuf > READ_ONCE(sk->sk_sndbuf)) - WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf); - if (unlikely(mptcp_must_reclaim_memory(sk, ssk))) { __mptcp_update_wmem(sk); sk_mem_reclaim_partial(sk); @@ -1575,6 +1553,15 @@ out: } } +static void mptcp_set_nospace(struct sock *sk) +{ + /* enable autotune */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + + /* will be cleared on avail space */ + set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags); +} + static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -1676,7 +1663,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) continue; wait_for_memory: - set_bit(MPTCP_NOSPACE, &msk->flags); + mptcp_set_nospace(sk); mptcp_push_pending(sk, msg->msg_flags); ret = sk_stream_wait_memory(sk, &timeo); if (ret) @@ -3268,6 +3255,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, mptcp_copy_inaddrs(newsk, msk->first); mptcp_rcv_space_init(msk, msk->first); + mptcp_propagate_sndbuf(newsk, msk->first); /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. @@ -3308,7 +3296,7 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) if (sk_stream_is_writeable(sk)) return EPOLLOUT | EPOLLWRNORM; - set_bit(MPTCP_NOSPACE, &msk->flags); + mptcp_set_nospace(sk); smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */ if (sk_stream_is_writeable(sk)) return EPOLLOUT | EPOLLWRNORM; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 65d200a1072bf..871534df6140f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -522,6 +522,25 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); } +static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) +{ + if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= READ_ONCE(sk->sk_sndbuf)) + return false; + + WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf); + return true; +} + +static inline void mptcp_write_space(struct sock *sk) +{ + if (sk_stream_is_writeable(sk)) { + /* pairs with memory barrier in mptcp_poll */ + smp_mb(); + if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags)) + sk_stream_write_space(sk); + } +} + void mptcp_destroy_common(struct mptcp_sock *msk); void __init mptcp_token_init(void); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 22313710d7696..1ca0c82b0dbde 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -343,6 +343,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (subflow->conn_finished) return; + mptcp_propagate_sndbuf(parent, sk); subflow->rel_write_seq = 1; subflow->conn_finished = 1; subflow->ssn_offset = TCP_SKB_CB(skb)->seq; @@ -1040,7 +1041,10 @@ static void subflow_data_ready(struct sock *sk) static void subflow_write_space(struct sock *ssk) { - /* we take action in __mptcp_clean_una() */ + struct sock *sk = mptcp_subflow_ctx(ssk)->conn; + + mptcp_propagate_sndbuf(sk, ssk); + mptcp_write_space(sk); } static struct inet_connection_sock_af_ops * @@ -1302,6 +1306,7 @@ static void subflow_state_change(struct sock *sk) __subflow_state_change(sk); if (subflow_simultaneous_connect(sk)) { + mptcp_propagate_sndbuf(parent, sk); mptcp_do_fallback(sk); mptcp_rcv_space_init(mptcp_sk(parent), sk); pr_fallback(mptcp_sk(parent)); -- GitLab From ec369c3a337fe075a7bd4da88d163d44c62ccbb1 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 20 Jan 2021 15:39:12 +0100 Subject: [PATCH 1294/2012] mptcp: do not queue excessive data on subflows The current packet scheduler can enqueue up to sndbuf data on each subflow. If the send buffer is large and the subflows are not symmetric, this could lead to suboptimal aggregate bandwidth utilization. Limit the amount of queued data to the maximum send window. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index d07e60330df56..e741201acc98f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1389,7 +1389,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) continue; nr_active += !subflow->backup; - if (!sk_stream_memory_free(subflow->tcp_sock)) + if (!sk_stream_memory_free(subflow->tcp_sock) || !tcp_sk(ssk)->snd_wnd) continue; pace = READ_ONCE(ssk->sk_pacing_rate); @@ -1415,7 +1415,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) if (send_info[0].ssk) { msk->last_snd = send_info[0].ssk; msk->snd_burst = min_t(int, MPTCP_SEND_BURST_SIZE, - sk_stream_wspace(msk->last_snd)); + tcp_sk(msk->last_snd)->snd_wnd); return msk->last_snd; } -- GitLab From 40dc9416cc957ac8b74d09550a808fabfd4435f8 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 20 Jan 2021 15:39:13 +0100 Subject: [PATCH 1295/2012] mptcp: schedule work for better snd subflow selection Otherwise the packet scheduler policy will not be enforced when pushing pending data at MPTCP-level ack reception time. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e741201acc98f..8cb582eee2862 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2242,6 +2242,7 @@ static void mptcp_worker(struct work_struct *work) if (unlikely(state == TCP_CLOSE)) goto unlock; + mptcp_push_pending(sk, 0); mptcp_check_data_fin_ack(sk); __mptcp_flush_join_list(msk); @@ -2899,10 +2900,14 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk) if (!mptcp_send_head(sk)) return; - if (!sock_owned_by_user(sk)) - __mptcp_subflow_push_pending(sk, ssk); - else + if (!sock_owned_by_user(sk)) { + if (mptcp_subflow_get_send(mptcp_sk(sk)) == ssk) + __mptcp_subflow_push_pending(sk, ssk); + else + mptcp_schedule_work(sk); + } else { set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); + } } #define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED) -- GitLab From b19bc2945b40b9fd38e835700907ffe8534ef0de Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 20 Jan 2021 15:39:14 +0100 Subject: [PATCH 1296/2012] mptcp: implement delegated actions On MPTCP-level ack reception, the packet scheduler may select a subflow other then the current one. Prior to this commit we rely on the workqueue to trigger action on such subflow. This changeset introduces an infrastructure that allows any MPTCP subflow to schedule actions (MPTCP xmit) on others subflows without resorting to (multiple) process reschedule. A dummy NAPI instance is used instead. When MPTCP needs to trigger action an a different subflow, it enqueues the target subflow on the NAPI backlog and schedule such instance as needed. The dummy NAPI poll method walks the sockets backlog and tries to acquire the (BH) socket lock on each of them. If the socket is owned by the user space, the action will be completed by the sock release cb, otherwise push is started. This change leverages the delegated action infrastructure to avoid invoking the MPTCP worker to spool the pending data, when the packet scheduler picks a subflow other then the one currently processing the incoming MPTCP-level ack. Additionally we further refine the subflow selection invoking the packet scheduler for each chunk of data even inside __mptcp_subflow_push_pending(). v1 -> v2: - fix possible UaF at shutdown time, resetting sock ops after removing the ulp context Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 86 +++++++++++++++++++++++++++++++++++++++++--- net/mptcp/protocol.h | 67 ++++++++++++++++++++++++++++++++++ net/mptcp/subflow.c | 45 +++++++++++++++++++++++ 3 files changed, 194 insertions(+), 4 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 8cb582eee2862..a033bf9c26ee1 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -45,6 +45,9 @@ static struct percpu_counter mptcp_sockets_allocated; static void __mptcp_destroy_sock(struct sock *sk); static void __mptcp_check_send_data_fin(struct sock *sk); +DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); +static struct net_device mptcp_napi_dev; + /* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not * completed yet or has failed, return the subflow socket. * Otherwise return NULL. @@ -1506,7 +1509,9 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sendmsg_info info; struct mptcp_data_frag *dfrag; + struct sock *xmit_ssk; int len, copied = 0; + bool first = true; info.flags = 0; while ((dfrag = mptcp_send_head(sk))) { @@ -1516,6 +1521,18 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) while (len > 0) { int ret = 0; + /* the caller already invoked the packet scheduler, + * check for a different subflow usage only after + * spooling the first chunk of data + */ + xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk)); + if (!xmit_ssk) + goto out; + if (xmit_ssk != ssk) { + mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk)); + goto out; + } + if (unlikely(mptcp_must_reclaim_memory(sk, ssk))) { __mptcp_update_wmem(sk); sk_mem_reclaim_partial(sk); @@ -1534,6 +1551,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) msk->tx_pending_data -= ret; copied += ret; len -= ret; + first = false; } WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); } @@ -2242,7 +2260,6 @@ static void mptcp_worker(struct work_struct *work) if (unlikely(state == TCP_CLOSE)) goto unlock; - mptcp_push_pending(sk, 0); mptcp_check_data_fin_ack(sk); __mptcp_flush_join_list(msk); @@ -2901,10 +2918,12 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk) return; if (!sock_owned_by_user(sk)) { - if (mptcp_subflow_get_send(mptcp_sk(sk)) == ssk) + struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk)); + + if (xmit_ssk == ssk) __mptcp_subflow_push_pending(sk, ssk); - else - mptcp_schedule_work(sk); + else if (xmit_ssk) + mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk)); } else { set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); } @@ -2955,6 +2974,20 @@ static void mptcp_release_cb(struct sock *sk) } } +void mptcp_subflow_process_delegated(struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct sock *sk = subflow->conn; + + mptcp_data_lock(sk); + if (!sock_owned_by_user(sk)) + __mptcp_subflow_push_pending(sk, ssk); + else + set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); + mptcp_data_unlock(sk); + mptcp_subflow_delegated_done(subflow); +} + static int mptcp_hash(struct sock *sk) { /* should never be called, @@ -3365,13 +3398,58 @@ static struct inet_protosw mptcp_protosw = { .flags = INET_PROTOSW_ICSK, }; +static int mptcp_napi_poll(struct napi_struct *napi, int budget) +{ + struct mptcp_delegated_action *delegated; + struct mptcp_subflow_context *subflow; + int work_done = 0; + + delegated = container_of(napi, struct mptcp_delegated_action, napi); + while ((subflow = mptcp_subflow_delegated_next(delegated)) != NULL) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + bh_lock_sock_nested(ssk); + if (!sock_owned_by_user(ssk) && + mptcp_subflow_has_delegated_action(subflow)) + mptcp_subflow_process_delegated(ssk); + /* ... elsewhere tcp_release_cb_override already processed + * the action or will do at next release_sock(). + * In both case must dequeue the subflow here - on the same + * CPU that scheduled it. + */ + bh_unlock_sock(ssk); + sock_put(ssk); + + if (++work_done == budget) + return budget; + } + + /* always provide a 0 'work_done' argument, so that napi_complete_done + * will not try accessing the NULL napi->dev ptr + */ + napi_complete_done(napi, 0); + return work_done; +} + void __init mptcp_proto_init(void) { + struct mptcp_delegated_action *delegated; + int cpu; + mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo; if (percpu_counter_init(&mptcp_sockets_allocated, 0, GFP_KERNEL)) panic("Failed to allocate MPTCP pcpu counter\n"); + init_dummy_netdev(&mptcp_napi_dev); + for_each_possible_cpu(cpu) { + delegated = per_cpu_ptr(&mptcp_delegated_actions, cpu); + INIT_LIST_HEAD(&delegated->head); + netif_tx_napi_add(&mptcp_napi_dev, &delegated->napi, mptcp_napi_poll, + NAPI_POLL_WEIGHT); + napi_enable(&delegated->napi); + } + mptcp_subflow_init(); mptcp_pm_init(); mptcp_token_init(); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 871534df6140f..1460705aaad05 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -378,6 +378,15 @@ enum mptcp_data_avail { MPTCP_SUBFLOW_OOO_DATA }; +struct mptcp_delegated_action { + struct napi_struct napi; + struct list_head head; +}; + +DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); + +#define MPTCP_DELEGATE_SEND 0 + /* MPTCP subflow context */ struct mptcp_subflow_context { struct list_head node;/* conn_list of subflows */ @@ -415,6 +424,9 @@ struct mptcp_subflow_context { u8 local_id; u8 remote_id; + long delegated_status; + struct list_head delegated_node; /* link into delegated_action, protected by local BH */ + struct sock *tcp_sock; /* tcp sk backpointer */ struct sock *conn; /* parent mptcp_sock */ const struct inet_connection_sock_af_ops *icsk_af_ops; @@ -463,6 +475,61 @@ static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk, spin_unlock_bh(&msk->join_list_lock); } +void mptcp_subflow_process_delegated(struct sock *ssk); + +static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow) +{ + struct mptcp_delegated_action *delegated; + bool schedule; + + /* The implied barrier pairs with mptcp_subflow_delegated_done(), and + * ensures the below list check sees list updates done prior to status + * bit changes + */ + if (!test_and_set_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) { + /* still on delegated list from previous scheduling */ + if (!list_empty(&subflow->delegated_node)) + return; + + /* the caller held the subflow bh socket lock */ + lockdep_assert_in_softirq(); + + delegated = this_cpu_ptr(&mptcp_delegated_actions); + schedule = list_empty(&delegated->head); + list_add_tail(&subflow->delegated_node, &delegated->head); + sock_hold(mptcp_subflow_tcp_sock(subflow)); + if (schedule) + napi_schedule(&delegated->napi); + } +} + +static inline struct mptcp_subflow_context * +mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated) +{ + struct mptcp_subflow_context *ret; + + if (list_empty(&delegated->head)) + return NULL; + + ret = list_first_entry(&delegated->head, struct mptcp_subflow_context, delegated_node); + list_del_init(&ret->delegated_node); + return ret; +} + +static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow) +{ + return test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status); +} + +static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow) +{ + /* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before + * touching the status bit + */ + smp_wmb(); + clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status); +} + int mptcp_is_enabled(struct net *net); unsigned int mptcp_get_add_addr_timeout(struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 1ca0c82b0dbde..721059916c968 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -18,12 +18,15 @@ #include #if IS_ENABLED(CONFIG_MPTCP_IPV6) #include +#include #endif #include #include #include "protocol.h" #include "mib.h" +static void mptcp_subflow_ops_undo_override(struct sock *ssk); + static void SUBFLOW_REQ_INC_STATS(struct request_sock *req, enum linux_mptcp_mib_field field) { @@ -428,6 +431,7 @@ drop: static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops; static struct inet_connection_sock_af_ops subflow_v6_specific; static struct inet_connection_sock_af_ops subflow_v6m_specific; +static struct proto tcpv6_prot_override; static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) { @@ -509,6 +513,8 @@ static void subflow_ulp_fallback(struct sock *sk, icsk->icsk_ulp_ops = NULL; rcu_assign_pointer(icsk->icsk_ulp_data, NULL); tcp_sk(sk)->is_mptcp = 0; + + mptcp_subflow_ops_undo_override(sk); } static void subflow_drop_ctx(struct sock *ssk) @@ -682,6 +688,7 @@ dispose_child: } static struct inet_connection_sock_af_ops subflow_specific; +static struct proto tcp_prot_override; enum mapping_status { MAPPING_OK, @@ -1203,6 +1210,25 @@ static void mptcp_attach_cgroup(struct sock *parent, struct sock *child) #endif /* CONFIG_SOCK_CGROUP_DATA */ } +static void mptcp_subflow_ops_override(struct sock *ssk) +{ +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (ssk->sk_prot == &tcpv6_prot) + ssk->sk_prot = &tcpv6_prot_override; + else +#endif + ssk->sk_prot = &tcp_prot_override; +} + +static void mptcp_subflow_ops_undo_override(struct sock *ssk) +{ +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (ssk->sk_prot == &tcpv6_prot_override) + ssk->sk_prot = &tcpv6_prot; + else +#endif + ssk->sk_prot = &tcp_prot; +} int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) { struct mptcp_subflow_context *subflow; @@ -1258,6 +1284,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) *new_sock = sf; sock_hold(sk); subflow->conn = sk; + mptcp_subflow_ops_override(sf->sk); return 0; } @@ -1274,6 +1301,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, rcu_assign_pointer(icsk->icsk_ulp_data, ctx); INIT_LIST_HEAD(&ctx->node); + INIT_LIST_HEAD(&ctx->delegated_node); pr_debug("subflow=%p", ctx); @@ -1386,6 +1414,7 @@ static void subflow_ulp_release(struct sock *ssk) sock_put(sk); } + mptcp_subflow_ops_undo_override(ssk); if (release) kfree_rcu(ctx, rcu); } @@ -1439,6 +1468,16 @@ static void subflow_ulp_clone(const struct request_sock *req, } } +static void tcp_release_cb_override(struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + + if (mptcp_subflow_has_delegated_action(subflow)) + mptcp_subflow_process_delegated(ssk); + + tcp_release_cb(ssk); +} + static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = { .name = "mptcp", .owner = THIS_MODULE, @@ -1479,6 +1518,9 @@ void __init mptcp_subflow_init(void) subflow_specific.syn_recv_sock = subflow_syn_recv_sock; subflow_specific.sk_rx_dst_set = subflow_finish_connect; + tcp_prot_override = tcp_prot; + tcp_prot_override.release_cb = tcp_release_cb_override; + #if IS_ENABLED(CONFIG_MPTCP_IPV6) subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req; @@ -1494,6 +1536,9 @@ void __init mptcp_subflow_init(void) subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len; subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced; subflow_v6m_specific.net_frag_header_len = 0; + + tcpv6_prot_override = tcpv6_prot; + tcpv6_prot_override.release_cb = tcp_release_cb_override; #endif mptcp_diag_subflow_init(&subflow_ulp_ops); -- GitLab From b9046e88f6be56f420052822a3a7ac80e3c4d98a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 22 Jan 2021 17:31:01 +0800 Subject: [PATCH 1297/2012] net: hns3: replace skb->csum_not_inet with skb_csum_is_sctp Commit fa8211701043 ("net: add inline function skb_csum_is_sctp") missed replacing skb->csum_not_inet check in hns3. This patch is to replace it with skb_csum_is_sctp(). Reported-by: Jakub Kicinski Signed-off-by: Xin Long Link: https://lore.kernel.org/r/3ad3c22c08beb0947f5978e790bd98d2aa063df9.1611307861.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 405e490334178..512080640cbc2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1070,7 +1070,7 @@ static bool hns3_check_hw_tx_csum(struct sk_buff *skb) * HW checksum of the non-IP packets and GSO packets is handled at * different place in the following code */ - if (skb->csum_not_inet || skb_is_gso(skb) || + if (skb_csum_is_sctp(skb) || skb_is_gso(skb) || !test_bit(HNS3_NIC_STATE_HW_TX_CSUM_ENABLE, &priv->state)) return false; -- GitLab From 9e10b9e65699394bae44429dc2958132738736a3 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 20 Jan 2021 16:51:50 +0200 Subject: [PATCH 1298/2012] net: bridge: multicast: rename src_size to addr_size Rename src_size argument to addr_size in preparation for passing host address as an argument to IGMPv3/MLDv2 functions. No functional change. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 78 +++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 257ac4e25f6d9..3ae2cef6f7ec3 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1800,7 +1800,7 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg) * EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI */ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) + void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge *br = pg->key.port->br; struct net_bridge_group_src *ent; @@ -1812,7 +1812,7 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs, addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (!ent) { ent = br_multicast_new_group_src(pg, &src_ip); @@ -1822,7 +1822,7 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, if (ent) __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); - srcs += src_size; + srcs += addr_size; } return changed; @@ -1834,7 +1834,7 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, * Group Timer=GMI */ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) + void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge_group_src *ent; struct br_ip src_ip; @@ -1846,7 +1846,7 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs, addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) ent->flags &= ~BR_SGRP_F_DELETE; @@ -1854,7 +1854,7 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, ent = br_multicast_new_group_src(pg, &src_ip); if (ent) br_multicast_fwd_src_handle(ent); - srcs += src_size; + srcs += addr_size; } __grp_src_delete_marked(pg); @@ -1867,7 +1867,7 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, * Group Timer=GMI */ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) + void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge *br = pg->key.port->br; struct net_bridge_group_src *ent; @@ -1882,7 +1882,7 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs, addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags &= ~BR_SGRP_F_DELETE; @@ -1894,7 +1894,7 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, changed = true; } } - srcs += src_size; + srcs += addr_size; } if (__grp_src_delete_marked(pg)) @@ -1904,19 +1904,19 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, } static bool br_multicast_isexc(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) + void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge *br = pg->key.port->br; bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_isexc_incl(pg, srcs, nsrcs, src_size); + __grp_src_isexc_incl(pg, srcs, nsrcs, addr_size); br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); changed = true; break; case MCAST_EXCLUDE: - changed = __grp_src_isexc_excl(pg, srcs, nsrcs, src_size); + changed = __grp_src_isexc_excl(pg, srcs, nsrcs, addr_size); break; } @@ -1931,7 +1931,7 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg, * Send Q(G,A-B) */ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) + void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge *br = pg->key.port->br; u32 src_idx, to_send = pg->src_ents; @@ -1946,7 +1946,7 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs, addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags &= ~BR_SGRP_F_SEND; @@ -1958,7 +1958,7 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, } if (ent) __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); - srcs += src_size; + srcs += addr_size; } if (to_send) @@ -1973,7 +1973,7 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, * Send Q(G) */ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) + void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge *br = pg->key.port->br; u32 src_idx, to_send = pg->src_ents; @@ -1989,7 +1989,7 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs, addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { if (timer_pending(&ent->timer)) { @@ -2003,7 +2003,7 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, } if (ent) __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); - srcs += src_size; + srcs += addr_size; } if (to_send) @@ -2015,16 +2015,16 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, } static bool br_multicast_toin(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) + void *srcs, u32 nsrcs, size_t addr_size) { bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - changed = __grp_src_toin_incl(pg, srcs, nsrcs, src_size); + changed = __grp_src_toin_incl(pg, srcs, nsrcs, addr_size); break; case MCAST_EXCLUDE: - changed = __grp_src_toin_excl(pg, srcs, nsrcs, src_size); + changed = __grp_src_toin_excl(pg, srcs, nsrcs, addr_size); break; } @@ -2038,7 +2038,7 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, * Group Timer=GMI */ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) + void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; @@ -2050,7 +2050,7 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs, addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags = (ent->flags & ~BR_SGRP_F_DELETE) | @@ -2061,7 +2061,7 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, } if (ent) br_multicast_fwd_src_handle(ent); - srcs += src_size; + srcs += addr_size; } __grp_src_delete_marked(pg); @@ -2077,7 +2077,7 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, * Group Timer=GMI */ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) + void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; @@ -2090,7 +2090,7 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs, addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags &= ~BR_SGRP_F_DELETE; @@ -2105,7 +2105,7 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, ent->flags |= BR_SGRP_F_SEND; to_send++; } - srcs += src_size; + srcs += addr_size; } if (__grp_src_delete_marked(pg)) @@ -2117,19 +2117,19 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, } static bool br_multicast_toex(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) + void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge *br = pg->key.port->br; bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_toex_incl(pg, srcs, nsrcs, src_size); + __grp_src_toex_incl(pg, srcs, nsrcs, addr_size); br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); changed = true; break; case MCAST_EXCLUDE: - changed = __grp_src_toex_excl(pg, srcs, nsrcs, src_size); + changed = __grp_src_toex_excl(pg, srcs, nsrcs, addr_size); break; } @@ -2143,7 +2143,7 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg, * INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B) */ static void __grp_src_block_incl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) + void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; @@ -2155,13 +2155,13 @@ static void __grp_src_block_incl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs, addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags |= BR_SGRP_F_SEND; to_send++; } - srcs += src_size; + srcs += addr_size; } if (to_send) @@ -2176,7 +2176,7 @@ static void __grp_src_block_incl(struct net_bridge_port_group *pg, * Send Q(G,A-Y) */ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) + void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; @@ -2189,7 +2189,7 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs, addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (!ent) { ent = br_multicast_new_group_src(pg, &src_ip); @@ -2202,7 +2202,7 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, ent->flags |= BR_SGRP_F_SEND; to_send++; } - srcs += src_size; + srcs += addr_size; } if (to_send) @@ -2212,16 +2212,16 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, } static bool br_multicast_block(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) + void *srcs, u32 nsrcs, size_t addr_size) { bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_block_incl(pg, srcs, nsrcs, src_size); + __grp_src_block_incl(pg, srcs, nsrcs, addr_size); break; case MCAST_EXCLUDE: - changed = __grp_src_block_excl(pg, srcs, nsrcs, src_size); + changed = __grp_src_block_excl(pg, srcs, nsrcs, addr_size); break; } -- GitLab From 54bea721964166cbb26490faa0bd414a1c75b406 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 20 Jan 2021 16:51:51 +0200 Subject: [PATCH 1299/2012] net: bridge: multicast: pass host src address to IGMPv3/MLDv2 functions We need to pass the host address so later it can be used for explicit host tracking. No functional change. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 90 +++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 3ae2cef6f7ec3..861545094d673 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1799,7 +1799,7 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg) * INCLUDE (A) ALLOW (B) INCLUDE (A+B) (B)=GMI * EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI */ -static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, +static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge *br = pg->key.port->br; @@ -1833,7 +1833,7 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, * Delete (A-B) * Group Timer=GMI */ -static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, +static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge_group_src *ent; @@ -1866,7 +1866,7 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, * Delete (Y-A) * Group Timer=GMI */ -static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, +static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge *br = pg->key.port->br; @@ -1903,7 +1903,7 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, return changed; } -static bool br_multicast_isexc(struct net_bridge_port_group *pg, +static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge *br = pg->key.port->br; @@ -1911,12 +1911,12 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg, switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_isexc_incl(pg, srcs, nsrcs, addr_size); + __grp_src_isexc_incl(pg, h_addr, srcs, nsrcs, addr_size); br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); changed = true; break; case MCAST_EXCLUDE: - changed = __grp_src_isexc_excl(pg, srcs, nsrcs, addr_size); + changed = __grp_src_isexc_excl(pg, h_addr, srcs, nsrcs, addr_size); break; } @@ -1930,7 +1930,7 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg, * INCLUDE (A) TO_IN (B) INCLUDE (A+B) (B)=GMI * Send Q(G,A-B) */ -static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, +static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge *br = pg->key.port->br; @@ -1972,7 +1972,7 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, * Send Q(G,X-A) * Send Q(G) */ -static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, +static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge *br = pg->key.port->br; @@ -2014,17 +2014,17 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, return changed; } -static bool br_multicast_toin(struct net_bridge_port_group *pg, +static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size) { bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - changed = __grp_src_toin_incl(pg, srcs, nsrcs, addr_size); + changed = __grp_src_toin_incl(pg, h_addr, srcs, nsrcs, addr_size); break; case MCAST_EXCLUDE: - changed = __grp_src_toin_excl(pg, srcs, nsrcs, addr_size); + changed = __grp_src_toin_excl(pg, h_addr, srcs, nsrcs, addr_size); break; } @@ -2037,7 +2037,7 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, * Send Q(G,A*B) * Group Timer=GMI */ -static void __grp_src_toex_incl(struct net_bridge_port_group *pg, +static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge_group_src *ent; @@ -2076,7 +2076,7 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, * Send Q(G,A-Y) * Group Timer=GMI */ -static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, +static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge_group_src *ent; @@ -2116,7 +2116,7 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, return changed; } -static bool br_multicast_toex(struct net_bridge_port_group *pg, +static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge *br = pg->key.port->br; @@ -2124,12 +2124,12 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg, switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_toex_incl(pg, srcs, nsrcs, addr_size); + __grp_src_toex_incl(pg, h_addr, srcs, nsrcs, addr_size); br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); changed = true; break; case MCAST_EXCLUDE: - changed = __grp_src_toex_excl(pg, srcs, nsrcs, addr_size); + changed = __grp_src_toex_excl(pg, h_addr, srcs, nsrcs, addr_size); break; } @@ -2142,7 +2142,7 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg, /* State Msg type New state Actions * INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B) */ -static void __grp_src_block_incl(struct net_bridge_port_group *pg, +static void __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge_group_src *ent; @@ -2175,7 +2175,7 @@ static void __grp_src_block_incl(struct net_bridge_port_group *pg, * EXCLUDE (X,Y) BLOCK (A) EXCLUDE (X+(A-Y),Y) (A-X-Y)=Group Timer * Send Q(G,A-Y) */ -static bool __grp_src_block_excl(struct net_bridge_port_group *pg, +static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge_group_src *ent; @@ -2211,17 +2211,17 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, return changed; } -static bool br_multicast_block(struct net_bridge_port_group *pg, +static bool br_multicast_block(struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size) { bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_block_incl(pg, srcs, nsrcs, addr_size); + __grp_src_block_incl(pg, h_addr, srcs, nsrcs, addr_size); break; case MCAST_EXCLUDE: - changed = __grp_src_block_excl(pg, srcs, nsrcs, addr_size); + changed = __grp_src_block_excl(pg, h_addr, srcs, nsrcs, addr_size); break; } @@ -2257,8 +2257,8 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, struct igmpv3_report *ih; struct igmpv3_grec *grec; int i, len, num, type; + __be32 group, *h_addr; bool changed = false; - __be32 group; int err = 0; u16 nsrcs; @@ -2318,32 +2318,33 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, pg = br_multicast_find_port(mdst, port, src); if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT)) goto unlock_continue; - /* reload grec */ + /* reload grec and host addr */ grec = (void *)(skb->data + len - sizeof(*grec) - (nsrcs * 4)); + h_addr = &ip_hdr(skb)->saddr; switch (type) { case IGMPV3_ALLOW_NEW_SOURCES: - changed = br_multicast_isinc_allow(pg, grec->grec_src, + changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src, nsrcs, sizeof(__be32)); break; case IGMPV3_MODE_IS_INCLUDE: - changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs, - sizeof(__be32)); + changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src, + nsrcs, sizeof(__be32)); break; case IGMPV3_MODE_IS_EXCLUDE: - changed = br_multicast_isexc(pg, grec->grec_src, nsrcs, - sizeof(__be32)); + changed = br_multicast_isexc(pg, h_addr, grec->grec_src, + nsrcs, sizeof(__be32)); break; case IGMPV3_CHANGE_TO_INCLUDE: - changed = br_multicast_toin(pg, grec->grec_src, nsrcs, - sizeof(__be32)); + changed = br_multicast_toin(pg, h_addr, grec->grec_src, + nsrcs, sizeof(__be32)); break; case IGMPV3_CHANGE_TO_EXCLUDE: - changed = br_multicast_toex(pg, grec->grec_src, nsrcs, - sizeof(__be32)); + changed = br_multicast_toex(pg, h_addr, grec->grec_src, + nsrcs, sizeof(__be32)); break; case IGMPV3_BLOCK_OLD_SOURCES: - changed = br_multicast_block(pg, grec->grec_src, nsrcs, - sizeof(__be32)); + changed = br_multicast_block(pg, h_addr, grec->grec_src, + nsrcs, sizeof(__be32)); break; } if (changed) @@ -2367,6 +2368,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, unsigned int nsrcs_offset; const unsigned char *src; struct icmp6hdr *icmp6h; + struct in6_addr *h_addr; struct mld2_grec *grec; unsigned int grec_len; bool changed = false; @@ -2445,30 +2447,36 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, pg = br_multicast_find_port(mdst, port, src); if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT)) goto unlock_continue; + h_addr = &ipv6_hdr(skb)->saddr; switch (grec->grec_type) { case MLD2_ALLOW_NEW_SOURCES: - changed = br_multicast_isinc_allow(pg, grec->grec_src, - nsrcs, + changed = br_multicast_isinc_allow(pg, h_addr, + grec->grec_src, nsrcs, sizeof(struct in6_addr)); break; case MLD2_MODE_IS_INCLUDE: - changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs, + changed = br_multicast_isinc_allow(pg, h_addr, + grec->grec_src, nsrcs, sizeof(struct in6_addr)); break; case MLD2_MODE_IS_EXCLUDE: - changed = br_multicast_isexc(pg, grec->grec_src, nsrcs, + changed = br_multicast_isexc(pg, h_addr, + grec->grec_src, nsrcs, sizeof(struct in6_addr)); break; case MLD2_CHANGE_TO_INCLUDE: - changed = br_multicast_toin(pg, grec->grec_src, nsrcs, + changed = br_multicast_toin(pg, h_addr, + grec->grec_src, nsrcs, sizeof(struct in6_addr)); break; case MLD2_CHANGE_TO_EXCLUDE: - changed = br_multicast_toex(pg, grec->grec_src, nsrcs, + changed = br_multicast_toex(pg, h_addr, + grec->grec_src, nsrcs, sizeof(struct in6_addr)); break; case MLD2_BLOCK_OLD_SOURCES: - changed = br_multicast_block(pg, grec->grec_src, nsrcs, + changed = br_multicast_block(pg, h_addr, + grec->grec_src, nsrcs, sizeof(struct in6_addr)); break; } -- GitLab From 0ad57c99e857f4c7354c3629d4168061fba4a22a Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 20 Jan 2021 16:51:52 +0200 Subject: [PATCH 1300/2012] net: bridge: multicast: __grp_src_block_incl can modify pg Prepare __grp_src_block_incl() for being able to cause a notification due to changes. Currently it cannot happen, but EHT would change that since we'll be deleting sources immediately. Make sure that if the pg is deleted we don't return true as that would cause the caller to access freed pg. This patch shouldn't cause any functional change. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 861545094d673..79569a3986699 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2142,11 +2142,12 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr, /* State Msg type New state Actions * INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B) */ -static void __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, +static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, void *srcs, u32 nsrcs, size_t addr_size) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; + bool changed = false; struct br_ip src_ip; hlist_for_each_entry(ent, &pg->src_list, node) @@ -2167,8 +2168,15 @@ static void __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, if (to_send) __grp_src_query_marked_and_rexmit(pg); - if (pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list)) + if (pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list)) { br_multicast_find_del_pg(pg->key.port->br, pg); + /* a notification has already been sent and we shouldn't access + * pg after the delete thus we have to return false + */ + changed = false; + } + + return changed; } /* State Msg type New state Actions @@ -2218,7 +2226,7 @@ static bool br_multicast_block(struct net_bridge_port_group *pg, void *h_addr, switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_block_incl(pg, h_addr, srcs, nsrcs, addr_size); + changed = __grp_src_block_incl(pg, h_addr, srcs, nsrcs, addr_size); break; case MCAST_EXCLUDE: changed = __grp_src_block_excl(pg, h_addr, srcs, nsrcs, addr_size); -- GitLab From e7cfcf2c18c5fd96320a69e468fdec8ed1c55443 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 20 Jan 2021 16:51:53 +0200 Subject: [PATCH 1301/2012] net: bridge: multicast: calculate idx position without changing ptr We need to preserve the srcs pointer since we'll be passing it for EHT handling later. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 79569a3986699..f8b685ae56d47 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1812,7 +1812,7 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, addr_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (!ent) { ent = br_multicast_new_group_src(pg, &src_ip); @@ -1822,7 +1822,6 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a if (ent) __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); - srcs += addr_size; } return changed; @@ -1846,7 +1845,7 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, addr_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) ent->flags &= ~BR_SGRP_F_DELETE; @@ -1854,7 +1853,6 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr, ent = br_multicast_new_group_src(pg, &src_ip); if (ent) br_multicast_fwd_src_handle(ent); - srcs += addr_size; } __grp_src_delete_marked(pg); @@ -1882,7 +1880,7 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, addr_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags &= ~BR_SGRP_F_DELETE; @@ -1894,7 +1892,6 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr, changed = true; } } - srcs += addr_size; } if (__grp_src_delete_marked(pg)) @@ -1946,7 +1943,7 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, addr_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags &= ~BR_SGRP_F_SEND; @@ -1958,7 +1955,6 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr, } if (ent) __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); - srcs += addr_size; } if (to_send) @@ -1989,7 +1985,7 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, addr_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { if (timer_pending(&ent->timer)) { @@ -2003,7 +1999,6 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr, } if (ent) __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); - srcs += addr_size; } if (to_send) @@ -2050,7 +2045,7 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, addr_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags = (ent->flags & ~BR_SGRP_F_DELETE) | @@ -2061,7 +2056,6 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr, } if (ent) br_multicast_fwd_src_handle(ent); - srcs += addr_size; } __grp_src_delete_marked(pg); @@ -2090,7 +2084,7 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, addr_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags &= ~BR_SGRP_F_DELETE; @@ -2105,7 +2099,6 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr, ent->flags |= BR_SGRP_F_SEND; to_send++; } - srcs += addr_size; } if (__grp_src_delete_marked(pg)) @@ -2156,13 +2149,12 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, addr_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags |= BR_SGRP_F_SEND; to_send++; } - srcs += addr_size; } if (to_send) @@ -2197,7 +2189,7 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, addr_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (!ent) { ent = br_multicast_new_group_src(pg, &src_ip); @@ -2210,7 +2202,6 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr, ent->flags |= BR_SGRP_F_SEND; to_send++; } - srcs += addr_size; } if (to_send) -- GitLab From 8f07b831197e0809e59f16149b878e8334c3433f Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 20 Jan 2021 16:51:54 +0200 Subject: [PATCH 1302/2012] net: bridge: multicast: add EHT structures and definitions Add EHT structures for tracking hosts and sources per group. We keep one set for each host which has all of the host's S,G entries, and one set for each multicast source which has all hosts that have joined that S,G. For each host, source entry we record the filter_mode and we keep an expiry timer. There is also one global expiry timer per source set, it is updated with each set entry update, it will be later used to lower the set's timer instead of lowering each entry's timer separately. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 1 + net/bridge/br_private.h | 2 ++ net/bridge/br_private_mcast_eht.h | 50 +++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+) create mode 100644 net/bridge/br_private_mcast_eht.h diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index f8b685ae56d47..3aaa6adbff828 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -33,6 +33,7 @@ #endif #include "br_private.h" +#include "br_private_mcast_eht.h" static const struct rhashtable_params br_mdb_rht_params = { .head_offset = offsetof(struct net_bridge_mdb_entry, rhnode), diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d62c6e1af64a1..0bf4c544a5da4 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -252,6 +252,8 @@ struct net_bridge_port_group { struct timer_list timer; struct timer_list rexmit_timer; struct hlist_node mglist; + struct rb_root eht_set_tree; + struct rb_root eht_host_tree; struct rhash_head rhnode; struct net_bridge_mcast_gc mcast_gc; diff --git a/net/bridge/br_private_mcast_eht.h b/net/bridge/br_private_mcast_eht.h new file mode 100644 index 0000000000000..0c9c4267969dc --- /dev/null +++ b/net/bridge/br_private_mcast_eht.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright (c) 2020, Nikolay Aleksandrov + */ +#ifndef _BR_PRIVATE_MCAST_EHT_H_ +#define _BR_PRIVATE_MCAST_EHT_H_ + +union net_bridge_eht_addr { + __be32 ip4; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr ip6; +#endif +}; + +/* single host's list of set entries and filter_mode */ +struct net_bridge_group_eht_host { + struct rb_node rb_node; + + union net_bridge_eht_addr h_addr; + struct hlist_head set_entries; + unsigned int num_entries; + unsigned char filter_mode; + struct net_bridge_port_group *pg; +}; + +/* (host, src entry) added to a per-src set and host's list */ +struct net_bridge_group_eht_set_entry { + struct rb_node rb_node; + struct hlist_node host_list; + + union net_bridge_eht_addr h_addr; + struct timer_list timer; + struct net_bridge *br; + struct net_bridge_group_eht_set *eht_set; + struct net_bridge_group_eht_host *h_parent; + struct net_bridge_mcast_gc mcast_gc; +}; + +/* per-src set */ +struct net_bridge_group_eht_set { + struct rb_node rb_node; + + union net_bridge_eht_addr src_addr; + struct rb_root entry_tree; + struct timer_list timer; + struct net_bridge_port_group *pg; + struct net_bridge *br; + struct net_bridge_mcast_gc mcast_gc; +}; + +#endif /* _BR_PRIVATE_MCAST_EHT_H_ */ -- GitLab From 5b16328879302695101f403f261ff7c5f1ee4b84 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 20 Jan 2021 16:51:55 +0200 Subject: [PATCH 1303/2012] net: bridge: multicast: add EHT host handling functions Add functions to create, destroy and lookup an EHT host. These are per-host entries contained in the eht_host_tree in net_bridge_port_group which are used to store a list of all sources (S,G) entries joined for that group by each host, the host's current filter mode and total number of joined entries. No functional changes yet, these would be used in later patches. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/Makefile | 2 +- net/bridge/br_multicast.c | 1 + net/bridge/br_multicast_eht.c | 115 ++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 net/bridge/br_multicast_eht.c diff --git a/net/bridge/Makefile b/net/bridge/Makefile index 4702702a74d34..7fb9a021873be 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -18,7 +18,7 @@ br_netfilter-y := br_netfilter_hooks.o br_netfilter-$(subst m,y,$(CONFIG_IPV6)) += br_netfilter_ipv6.o obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o -bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o +bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o br_multicast_eht.o bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o br_vlan_options.o diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 3aaa6adbff828..dc6e879dc8407 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1173,6 +1173,7 @@ struct net_bridge_port_group *br_multicast_new_port_group( p->flags = flags; p->filter_mode = filter_mode; p->rt_protocol = rt_protocol; + p->eht_host_tree = RB_ROOT; p->mcast_gc.destroy = br_multicast_destroy_port_group; INIT_HLIST_HEAD(&p->src_list); diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c new file mode 100644 index 0000000000000..5cebca45e72c1 --- /dev/null +++ b/net/bridge/br_multicast_eht.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2020, Nikolay Aleksandrov +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if IS_ENABLED(CONFIG_IPV6) +#include +#include +#include +#include +#include +#endif + +#include "br_private.h" +#include "br_private_mcast_eht.h" + +static struct net_bridge_group_eht_host * +br_multicast_eht_host_lookup(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr) +{ + struct rb_node *node = pg->eht_host_tree.rb_node; + + while (node) { + struct net_bridge_group_eht_host *this; + int result; + + this = rb_entry(node, struct net_bridge_group_eht_host, + rb_node); + result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr)); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return this; + } + + return NULL; +} + +static int br_multicast_eht_host_filter_mode(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr) +{ + struct net_bridge_group_eht_host *eht_host; + + eht_host = br_multicast_eht_host_lookup(pg, h_addr); + if (!eht_host) + return MCAST_INCLUDE; + + return eht_host->filter_mode; +} + +static void __eht_destroy_host(struct net_bridge_group_eht_host *eht_host) +{ + WARN_ON(!hlist_empty(&eht_host->set_entries)); + + rb_erase(&eht_host->rb_node, &eht_host->pg->eht_host_tree); + RB_CLEAR_NODE(&eht_host->rb_node); + kfree(eht_host); +} + +static struct net_bridge_group_eht_host * +__eht_lookup_create_host(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + unsigned char filter_mode) +{ + struct rb_node **link = &pg->eht_host_tree.rb_node, *parent = NULL; + struct net_bridge_group_eht_host *eht_host; + + while (*link) { + struct net_bridge_group_eht_host *this; + int result; + + this = rb_entry(*link, struct net_bridge_group_eht_host, + rb_node); + result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr)); + parent = *link; + if (result < 0) + link = &((*link)->rb_left); + else if (result > 0) + link = &((*link)->rb_right); + else + return this; + } + + eht_host = kzalloc(sizeof(*eht_host), GFP_ATOMIC); + if (!eht_host) + return NULL; + + memcpy(&eht_host->h_addr, h_addr, sizeof(*h_addr)); + INIT_HLIST_HEAD(&eht_host->set_entries); + eht_host->pg = pg; + eht_host->filter_mode = filter_mode; + + rb_link_node(&eht_host->rb_node, parent, link); + rb_insert_color(&eht_host->rb_node, &pg->eht_host_tree); + + return eht_host; +} -- GitLab From baa74d39ca39f2b22eeac5c3b069b58491ecd418 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 20 Jan 2021 16:51:56 +0200 Subject: [PATCH 1304/2012] net: bridge: multicast: add EHT source set handling functions Add EHT source set and set-entry create, delete and lookup functions. These allow to manipulate source sets which contain their own host sets with entries which joined that S,G. We're limiting the maximum number of tracked S,G entries per host to PG_SRC_ENT_LIMIT (currently 32) which is the current maximum of S,G entries for a group. There's a per-set timer which will be used to destroy the whole set later. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 2 + net/bridge/br_multicast_eht.c | 321 ++++++++++++++++++++++++++++++ net/bridge/br_private_mcast_eht.h | 2 + 3 files changed, 325 insertions(+) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index dc6e879dc8407..ac363b1042397 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -594,6 +594,7 @@ void br_multicast_del_pg(struct net_bridge_mdb_entry *mp, rcu_assign_pointer(*pp, pg->next); hlist_del_init(&pg->mglist); + br_multicast_eht_clean_sets(pg); hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node) br_multicast_del_group_src(ent); br_mdb_notify(br->dev, mp, pg, RTM_DELMDB); @@ -1174,6 +1175,7 @@ struct net_bridge_port_group *br_multicast_new_port_group( p->filter_mode = filter_mode; p->rt_protocol = rt_protocol; p->eht_host_tree = RB_ROOT; + p->eht_set_tree = RB_ROOT; p->mcast_gc.destroy = br_multicast_destroy_port_group; INIT_HLIST_HEAD(&p->src_list); diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c index 5cebca45e72c1..f4bbf2dc9fc84 100644 --- a/net/bridge/br_multicast_eht.c +++ b/net/bridge/br_multicast_eht.c @@ -30,6 +30,15 @@ #include "br_private.h" #include "br_private_mcast_eht.h" +static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *src_addr, + union net_bridge_eht_addr *h_addr); +static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *src_addr, + union net_bridge_eht_addr *h_addr, + int filter_mode, + bool allow_zero_src); + static struct net_bridge_group_eht_host * br_multicast_eht_host_lookup(struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr) @@ -66,6 +75,54 @@ static int br_multicast_eht_host_filter_mode(struct net_bridge_port_group *pg, return eht_host->filter_mode; } +static struct net_bridge_group_eht_set_entry * +br_multicast_eht_set_entry_lookup(struct net_bridge_group_eht_set *eht_set, + union net_bridge_eht_addr *h_addr) +{ + struct rb_node *node = eht_set->entry_tree.rb_node; + + while (node) { + struct net_bridge_group_eht_set_entry *this; + int result; + + this = rb_entry(node, struct net_bridge_group_eht_set_entry, + rb_node); + result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr)); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return this; + } + + return NULL; +} + +static struct net_bridge_group_eht_set * +br_multicast_eht_set_lookup(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *src_addr) +{ + struct rb_node *node = pg->eht_set_tree.rb_node; + + while (node) { + struct net_bridge_group_eht_set *this; + int result; + + this = rb_entry(node, struct net_bridge_group_eht_set, + rb_node); + result = memcmp(src_addr, &this->src_addr, sizeof(*src_addr)); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return this; + } + + return NULL; +} + static void __eht_destroy_host(struct net_bridge_group_eht_host *eht_host) { WARN_ON(!hlist_empty(&eht_host->set_entries)); @@ -75,6 +132,107 @@ static void __eht_destroy_host(struct net_bridge_group_eht_host *eht_host) kfree(eht_host); } +static void br_multicast_destroy_eht_set_entry(struct net_bridge_mcast_gc *gc) +{ + struct net_bridge_group_eht_set_entry *set_h; + + set_h = container_of(gc, struct net_bridge_group_eht_set_entry, mcast_gc); + WARN_ON(!RB_EMPTY_NODE(&set_h->rb_node)); + + del_timer_sync(&set_h->timer); + kfree(set_h); +} + +static void br_multicast_destroy_eht_set(struct net_bridge_mcast_gc *gc) +{ + struct net_bridge_group_eht_set *eht_set; + + eht_set = container_of(gc, struct net_bridge_group_eht_set, mcast_gc); + WARN_ON(!RB_EMPTY_NODE(&eht_set->rb_node)); + WARN_ON(!RB_EMPTY_ROOT(&eht_set->entry_tree)); + + del_timer_sync(&eht_set->timer); + kfree(eht_set); +} + +static void __eht_del_set_entry(struct net_bridge_group_eht_set_entry *set_h) +{ + struct net_bridge_group_eht_host *eht_host = set_h->h_parent; + union net_bridge_eht_addr zero_addr; + + rb_erase(&set_h->rb_node, &set_h->eht_set->entry_tree); + RB_CLEAR_NODE(&set_h->rb_node); + hlist_del_init(&set_h->host_list); + memset(&zero_addr, 0, sizeof(zero_addr)); + if (memcmp(&set_h->h_addr, &zero_addr, sizeof(zero_addr))) + eht_host->num_entries--; + hlist_add_head(&set_h->mcast_gc.gc_node, &set_h->br->mcast_gc_list); + queue_work(system_long_wq, &set_h->br->mcast_gc_work); + + if (hlist_empty(&eht_host->set_entries)) + __eht_destroy_host(eht_host); +} + +static void br_multicast_del_eht_set(struct net_bridge_group_eht_set *eht_set) +{ + struct net_bridge_group_eht_set_entry *set_h; + struct rb_node *node; + + while ((node = rb_first(&eht_set->entry_tree))) { + set_h = rb_entry(node, struct net_bridge_group_eht_set_entry, + rb_node); + __eht_del_set_entry(set_h); + } + + rb_erase(&eht_set->rb_node, &eht_set->pg->eht_set_tree); + RB_CLEAR_NODE(&eht_set->rb_node); + hlist_add_head(&eht_set->mcast_gc.gc_node, &eht_set->br->mcast_gc_list); + queue_work(system_long_wq, &eht_set->br->mcast_gc_work); +} + +void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg) +{ + struct net_bridge_group_eht_set *eht_set; + struct rb_node *node; + + while ((node = rb_first(&pg->eht_set_tree))) { + eht_set = rb_entry(node, struct net_bridge_group_eht_set, + rb_node); + br_multicast_del_eht_set(eht_set); + } +} + +static void br_multicast_eht_set_entry_expired(struct timer_list *t) +{ + struct net_bridge_group_eht_set_entry *set_h = from_timer(set_h, t, timer); + struct net_bridge *br = set_h->br; + + spin_lock(&br->multicast_lock); + if (RB_EMPTY_NODE(&set_h->rb_node) || timer_pending(&set_h->timer)) + goto out; + + br_multicast_del_eht_set_entry(set_h->eht_set->pg, + &set_h->eht_set->src_addr, + &set_h->h_addr); +out: + spin_unlock(&br->multicast_lock); +} + +static void br_multicast_eht_set_expired(struct timer_list *t) +{ + struct net_bridge_group_eht_set *eht_set = from_timer(eht_set, t, + timer); + struct net_bridge *br = eht_set->br; + + spin_lock(&br->multicast_lock); + if (RB_EMPTY_NODE(&eht_set->rb_node) || timer_pending(&eht_set->timer)) + goto out; + + br_multicast_del_eht_set(eht_set); +out: + spin_unlock(&br->multicast_lock); +} + static struct net_bridge_group_eht_host * __eht_lookup_create_host(struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, @@ -113,3 +271,166 @@ __eht_lookup_create_host(struct net_bridge_port_group *pg, return eht_host; } + +static struct net_bridge_group_eht_set_entry * +__eht_lookup_create_set_entry(struct net_bridge *br, + struct net_bridge_group_eht_set *eht_set, + struct net_bridge_group_eht_host *eht_host, + bool allow_zero_src) +{ + struct rb_node **link = &eht_set->entry_tree.rb_node, *parent = NULL; + struct net_bridge_group_eht_set_entry *set_h; + + while (*link) { + struct net_bridge_group_eht_set_entry *this; + int result; + + this = rb_entry(*link, struct net_bridge_group_eht_set_entry, + rb_node); + result = memcmp(&eht_host->h_addr, &this->h_addr, + sizeof(union net_bridge_eht_addr)); + parent = *link; + if (result < 0) + link = &((*link)->rb_left); + else if (result > 0) + link = &((*link)->rb_right); + else + return this; + } + + /* always allow auto-created zero entry */ + if (!allow_zero_src && eht_host->num_entries >= PG_SRC_ENT_LIMIT) + return NULL; + + set_h = kzalloc(sizeof(*set_h), GFP_ATOMIC); + if (!set_h) + return NULL; + + memcpy(&set_h->h_addr, &eht_host->h_addr, + sizeof(union net_bridge_eht_addr)); + set_h->mcast_gc.destroy = br_multicast_destroy_eht_set_entry; + set_h->eht_set = eht_set; + set_h->h_parent = eht_host; + set_h->br = br; + timer_setup(&set_h->timer, br_multicast_eht_set_entry_expired, 0); + + hlist_add_head(&set_h->host_list, &eht_host->set_entries); + rb_link_node(&set_h->rb_node, parent, link); + rb_insert_color(&set_h->rb_node, &eht_set->entry_tree); + /* we must not count the auto-created zero entry otherwise we won't be + * able to track the full list of PG_SRC_ENT_LIMIT entries + */ + if (!allow_zero_src) + eht_host->num_entries++; + + return set_h; +} + +static struct net_bridge_group_eht_set * +__eht_lookup_create_set(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *src_addr) +{ + struct rb_node **link = &pg->eht_set_tree.rb_node, *parent = NULL; + struct net_bridge_group_eht_set *eht_set; + + while (*link) { + struct net_bridge_group_eht_set *this; + int result; + + this = rb_entry(*link, struct net_bridge_group_eht_set, + rb_node); + result = memcmp(src_addr, &this->src_addr, sizeof(*src_addr)); + parent = *link; + if (result < 0) + link = &((*link)->rb_left); + else if (result > 0) + link = &((*link)->rb_right); + else + return this; + } + + eht_set = kzalloc(sizeof(*eht_set), GFP_ATOMIC); + if (!eht_set) + return NULL; + + memcpy(&eht_set->src_addr, src_addr, sizeof(*src_addr)); + eht_set->mcast_gc.destroy = br_multicast_destroy_eht_set; + eht_set->pg = pg; + eht_set->br = pg->key.port->br; + eht_set->entry_tree = RB_ROOT; + timer_setup(&eht_set->timer, br_multicast_eht_set_expired, 0); + + rb_link_node(&eht_set->rb_node, parent, link); + rb_insert_color(&eht_set->rb_node, &pg->eht_set_tree); + + return eht_set; +} + +static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *src_addr, + union net_bridge_eht_addr *h_addr, + int filter_mode, + bool allow_zero_src) +{ + struct net_bridge_group_eht_set_entry *set_h; + struct net_bridge_group_eht_host *eht_host; + struct net_bridge *br = pg->key.port->br; + struct net_bridge_group_eht_set *eht_set; + union net_bridge_eht_addr zero_addr; + + memset(&zero_addr, 0, sizeof(zero_addr)); + if (!allow_zero_src && !memcmp(src_addr, &zero_addr, sizeof(zero_addr))) + return; + + eht_set = __eht_lookup_create_set(pg, src_addr); + if (!eht_set) + return; + + eht_host = __eht_lookup_create_host(pg, h_addr, filter_mode); + if (!eht_host) + goto fail_host; + + set_h = __eht_lookup_create_set_entry(br, eht_set, eht_host, + allow_zero_src); + if (!set_h) + goto fail_set_entry; + + mod_timer(&set_h->timer, jiffies + br_multicast_gmi(br)); + mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(br)); + + return; + +fail_set_entry: + if (hlist_empty(&eht_host->set_entries)) + __eht_destroy_host(eht_host); +fail_host: + if (RB_EMPTY_ROOT(&eht_set->entry_tree)) + br_multicast_del_eht_set(eht_set); +} + +static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *src_addr, + union net_bridge_eht_addr *h_addr) +{ + struct net_bridge_group_eht_set_entry *set_h; + struct net_bridge_group_eht_set *eht_set; + bool set_deleted = false; + + eht_set = br_multicast_eht_set_lookup(pg, src_addr); + if (!eht_set) + goto out; + + set_h = br_multicast_eht_set_entry_lookup(eht_set, h_addr); + if (!set_h) + goto out; + + __eht_del_set_entry(set_h); + + if (RB_EMPTY_ROOT(&eht_set->entry_tree)) { + br_multicast_del_eht_set(eht_set); + set_deleted = true; + } + +out: + return set_deleted; +} diff --git a/net/bridge/br_private_mcast_eht.h b/net/bridge/br_private_mcast_eht.h index 0c9c4267969dc..bba507c9acb09 100644 --- a/net/bridge/br_private_mcast_eht.h +++ b/net/bridge/br_private_mcast_eht.h @@ -47,4 +47,6 @@ struct net_bridge_group_eht_set { struct net_bridge_mcast_gc mcast_gc; }; +void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg); + #endif /* _BR_PRIVATE_MCAST_EHT_H_ */ -- GitLab From dba6b0a5ca21c7c270977879f5670c78823e0da2 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 20 Jan 2021 16:51:57 +0200 Subject: [PATCH 1305/2012] net: bridge: multicast: add EHT host delete function Now that we can delete set entries, we can use that to remove EHT hosts. Since the group's host set entries exist only when there are related source set entries we just have to flush all source set entries joined by the host set entry and it will be automatically removed. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast_eht.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c index f4bbf2dc9fc84..409fced7eae23 100644 --- a/net/bridge/br_multicast_eht.c +++ b/net/bridge/br_multicast_eht.c @@ -434,3 +434,20 @@ static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg, out: return set_deleted; } + +static void br_multicast_del_eht_host(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr) +{ + struct net_bridge_group_eht_set_entry *set_h; + struct net_bridge_group_eht_host *eht_host; + struct hlist_node *tmp; + + eht_host = br_multicast_eht_host_lookup(pg, h_addr); + if (!eht_host) + return; + + hlist_for_each_entry_safe(set_h, tmp, &eht_host->set_entries, host_list) + br_multicast_del_eht_set_entry(set_h->eht_set->pg, + &set_h->eht_set->src_addr, + &set_h->h_addr); +} -- GitLab From 474ddb37fa3ad0454f8d07bb9fb53ceab190b667 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 20 Jan 2021 16:51:58 +0200 Subject: [PATCH 1306/2012] net: bridge: multicast: add EHT allow/block handling Add support for IGMPv3/MLDv2 allow/block EHT handling. Similar to how the reports are processed we have 2 cases when the group is in include or exclude mode, these are processed as follows: - group include - allow: create missing entries - block: remove existing matching entries and remove the corresponding S,G entries if there are no more set host entries, then possibly delete the whole group if there are no more S,G entries - group exclude - allow - host include: create missing entries - host exclude: remove existing matching entries and remove the corresponding S,G entries if there are no more set host entries - block - host include: remove existing matching entries and remove the corresponding S,G entries if there are no more set host entries, then possibly delete the whole group if there are no more S,G entries - host exclude: create missing entries Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 43 +++-- net/bridge/br_multicast_eht.c | 252 ++++++++++++++++++++++++++++++ net/bridge/br_private.h | 3 + net/bridge/br_private_mcast_eht.h | 6 + 4 files changed, 290 insertions(+), 14 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index ac363b1042397..3b8c5d1d0c552 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -560,7 +560,7 @@ static void br_multicast_destroy_group_src(struct net_bridge_mcast_gc *gc) kfree_rcu(src, rcu); } -static void br_multicast_del_group_src(struct net_bridge_group_src *src) +void br_multicast_del_group_src(struct net_bridge_group_src *src) { struct net_bridge *br = src->pg->key.port->br; @@ -1092,7 +1092,7 @@ out: spin_unlock(&br->multicast_lock); } -static struct net_bridge_group_src * +struct net_bridge_group_src * br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip) { struct net_bridge_group_src *ent; @@ -1804,7 +1804,8 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg) * EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI */ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_addr, - void *srcs, u32 nsrcs, size_t addr_size) + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge *br = pg->key.port->br; struct net_bridge_group_src *ent; @@ -1828,6 +1829,9 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); } + if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + changed = true; + return changed; } @@ -2140,7 +2144,7 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr, * INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B) */ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, - void *srcs, u32 nsrcs, size_t addr_size) + void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; @@ -2161,6 +2165,9 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, } } + if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + changed = true; + if (to_send) __grp_src_query_marked_and_rexmit(pg); @@ -2180,7 +2187,7 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, * Send Q(G,A-Y) */ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr, - void *srcs, u32 nsrcs, size_t addr_size) + void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; @@ -2208,6 +2215,9 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr, } } + if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + changed = true; + if (to_send) __grp_src_query_marked_and_rexmit(pg); @@ -2215,16 +2225,18 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr, } static bool br_multicast_block(struct net_bridge_port_group *pg, void *h_addr, - void *srcs, u32 nsrcs, size_t addr_size) + void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - changed = __grp_src_block_incl(pg, h_addr, srcs, nsrcs, addr_size); + changed = __grp_src_block_incl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); break; case MCAST_EXCLUDE: - changed = __grp_src_block_excl(pg, h_addr, srcs, nsrcs, addr_size); + changed = __grp_src_block_excl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); break; } @@ -2327,11 +2339,11 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, switch (type) { case IGMPV3_ALLOW_NEW_SOURCES: changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src, - nsrcs, sizeof(__be32)); + nsrcs, sizeof(__be32), type); break; case IGMPV3_MODE_IS_INCLUDE: changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src, - nsrcs, sizeof(__be32)); + nsrcs, sizeof(__be32), type); break; case IGMPV3_MODE_IS_EXCLUDE: changed = br_multicast_isexc(pg, h_addr, grec->grec_src, @@ -2347,7 +2359,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, break; case IGMPV3_BLOCK_OLD_SOURCES: changed = br_multicast_block(pg, h_addr, grec->grec_src, - nsrcs, sizeof(__be32)); + nsrcs, sizeof(__be32), type); break; } if (changed) @@ -2455,12 +2467,14 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, case MLD2_ALLOW_NEW_SOURCES: changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src, nsrcs, - sizeof(struct in6_addr)); + sizeof(struct in6_addr), + grec->grec_type); break; case MLD2_MODE_IS_INCLUDE: changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src, nsrcs, - sizeof(struct in6_addr)); + sizeof(struct in6_addr), + grec->grec_type); break; case MLD2_MODE_IS_EXCLUDE: changed = br_multicast_isexc(pg, h_addr, @@ -2480,7 +2494,8 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, case MLD2_BLOCK_OLD_SOURCES: changed = br_multicast_block(pg, h_addr, grec->grec_src, nsrcs, - sizeof(struct in6_addr)); + sizeof(struct in6_addr), + grec->grec_type); break; } if (changed) diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c index 409fced7eae23..43f60388df160 100644 --- a/net/bridge/br_multicast_eht.c +++ b/net/bridge/br_multicast_eht.c @@ -366,6 +366,21 @@ __eht_lookup_create_set(struct net_bridge_port_group *pg, return eht_set; } +static void br_multicast_ip_src_to_eht_addr(const struct br_ip *src, + union net_bridge_eht_addr *dest) +{ + switch (src->proto) { + case htons(ETH_P_IP): + dest->ip4 = src->src.ip4; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + memcpy(&dest->ip6, &src->src.ip6, sizeof(struct in6_addr)); + break; +#endif + } +} + static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg, union net_bridge_eht_addr *src_addr, union net_bridge_eht_addr *h_addr, @@ -451,3 +466,240 @@ static void br_multicast_del_eht_host(struct net_bridge_port_group *pg, &set_h->eht_set->src_addr, &set_h->h_addr); } + +static void __eht_allow_incl(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size) +{ + union net_bridge_eht_addr eht_src_addr; + u32 src_idx; + + memset(&eht_src_addr, 0, sizeof(eht_src_addr)); + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size); + br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr, + MCAST_INCLUDE, + false); + } +} + +static bool __eht_allow_excl(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size) +{ + bool changed = false, host_excl = false; + union net_bridge_eht_addr eht_src_addr; + struct net_bridge_group_src *src_ent; + struct br_ip src_ip; + u32 src_idx; + + host_excl = !!(br_multicast_eht_host_filter_mode(pg, h_addr) == MCAST_EXCLUDE); + memset(&eht_src_addr, 0, sizeof(eht_src_addr)); + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size); + if (!host_excl) { + br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr, + MCAST_INCLUDE, + false); + } else { + if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr, + h_addr)) + continue; + memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size); + src_ent = br_multicast_find_group_src(pg, &src_ip); + if (!src_ent) + continue; + br_multicast_del_group_src(src_ent); + changed = true; + } + } + + return changed; +} + +static bool br_multicast_eht_allow(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size) +{ + bool changed = false; + + switch (br_multicast_eht_host_filter_mode(pg, h_addr)) { + case MCAST_INCLUDE: + __eht_allow_incl(pg, h_addr, srcs, nsrcs, addr_size); + break; + case MCAST_EXCLUDE: + changed = __eht_allow_excl(pg, h_addr, srcs, nsrcs, addr_size); + break; + } + + return changed; +} + +static bool __eht_block_incl(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size) +{ + union net_bridge_eht_addr eht_src_addr; + struct net_bridge_group_src *src_ent; + bool changed = false; + struct br_ip src_ip; + u32 src_idx; + + memset(&eht_src_addr, 0, sizeof(eht_src_addr)); + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->key.addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size); + if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr, h_addr)) + continue; + memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size); + src_ent = br_multicast_find_group_src(pg, &src_ip); + if (!src_ent) + continue; + br_multicast_del_group_src(src_ent); + changed = true; + } + + return changed; +} + +static bool __eht_block_excl(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size) +{ + bool changed = false, host_excl = false; + union net_bridge_eht_addr eht_src_addr; + struct net_bridge_group_src *src_ent; + struct br_ip src_ip; + u32 src_idx; + + host_excl = !!(br_multicast_eht_host_filter_mode(pg, h_addr) == MCAST_EXCLUDE); + memset(&eht_src_addr, 0, sizeof(eht_src_addr)); + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->key.addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size); + if (host_excl) { + br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr, + MCAST_EXCLUDE, + false); + } else { + if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr, + h_addr)) + continue; + memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size); + src_ent = br_multicast_find_group_src(pg, &src_ip); + if (!src_ent) + continue; + br_multicast_del_group_src(src_ent); + changed = true; + } + } + + return changed; +} + +static bool br_multicast_eht_block(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size) +{ + bool changed = false; + + switch (br_multicast_eht_host_filter_mode(pg, h_addr)) { + case MCAST_INCLUDE: + changed = __eht_block_incl(pg, h_addr, srcs, nsrcs, addr_size); + break; + case MCAST_EXCLUDE: + changed = __eht_block_excl(pg, h_addr, srcs, nsrcs, addr_size); + break; + } + + return changed; +} + +static bool __eht_ip4_handle(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + int grec_type) +{ + bool changed = false; + + switch (grec_type) { + case IGMPV3_ALLOW_NEW_SOURCES: + br_multicast_eht_allow(pg, h_addr, srcs, nsrcs, sizeof(__be32)); + break; + case IGMPV3_BLOCK_OLD_SOURCES: + changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs, + sizeof(__be32)); + break; + } + + return changed; +} + +#if IS_ENABLED(CONFIG_IPV6) +static bool __eht_ip6_handle(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + int grec_type) +{ + bool changed = false; + + switch (grec_type) { + case MLD2_ALLOW_NEW_SOURCES: + br_multicast_eht_allow(pg, h_addr, srcs, nsrcs, + sizeof(struct in6_addr)); + break; + case MLD2_BLOCK_OLD_SOURCES: + changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs, + sizeof(struct in6_addr)); + break; + } + + return changed; +} +#endif + +/* true means an entry was deleted */ +bool br_multicast_eht_handle(struct net_bridge_port_group *pg, + void *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size, + int grec_type) +{ + bool eht_enabled = !!(pg->key.port->flags & BR_MULTICAST_FAST_LEAVE); + union net_bridge_eht_addr eht_host_addr; + bool changed = false; + + if (!eht_enabled) + goto out; + + memset(&eht_host_addr, 0, sizeof(eht_host_addr)); + memcpy(&eht_host_addr, h_addr, addr_size); + if (addr_size == sizeof(__be32)) + changed = __eht_ip4_handle(pg, &eht_host_addr, srcs, nsrcs, + grec_type); +#if IS_ENABLED(CONFIG_IPV6) + else + changed = __eht_ip6_handle(pg, &eht_host_addr, srcs, nsrcs, + grec_type); +#endif + +out: + return changed; +} diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 0bf4c544a5da4..cad967690e9f7 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -848,6 +848,9 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg, u8 filter_mode); void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp, struct net_bridge_port_group *sg); +struct net_bridge_group_src * +br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip); +void br_multicast_del_group_src(struct net_bridge_group_src *src); static inline bool br_group_is_l2(const struct br_ip *group) { diff --git a/net/bridge/br_private_mcast_eht.h b/net/bridge/br_private_mcast_eht.h index bba507c9acb09..92933822301d8 100644 --- a/net/bridge/br_private_mcast_eht.h +++ b/net/bridge/br_private_mcast_eht.h @@ -48,5 +48,11 @@ struct net_bridge_group_eht_set { }; void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg); +bool br_multicast_eht_handle(struct net_bridge_port_group *pg, + void *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size, + int grec_type); #endif /* _BR_PRIVATE_MCAST_EHT_H_ */ -- GitLab From ddc255d993d83bc13c2c8b239fd69cb87d12d03e Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 20 Jan 2021 16:51:59 +0200 Subject: [PATCH 1307/2012] net: bridge: multicast: add EHT include and exclude handling Add support for IGMPv3/MLDv2 include and exclude EHT handling. Similar to how the reports are processed we have 2 cases when the group is in include or exclude mode, these are processed as follows: - group include - is_include: create missing entries - to_include: flush existing entries and create a new set from the report, obviously if the src set is empty then we delete the group - group exclude - is_exclude: create missing entries - to_exclude: flush existing entries and create a new set from the report, any empty source set entries are removed If the group is in a different mode then we just flush all entries reported by the host and we create a new set with the new mode entries created from the report. If the report is include type, the source list is empty and the group has empty sources' set then we remove it. Any source set entries which are empty are removed as well. If the group is in exclude mode it can exist without any S,G entries (allowing for all traffic to pass). Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 84 ++++++++++++++++++------- net/bridge/br_multicast_eht.c | 100 +++++++++++++++++++++++++++++- net/bridge/br_private_mcast_eht.h | 7 +++ 3 files changed, 168 insertions(+), 23 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 3b8c5d1d0c552..9cfc004312aba 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1841,7 +1841,8 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a * Group Timer=GMI */ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr, - void *srcs, u32 nsrcs, size_t addr_size) + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge_group_src *ent; struct br_ip src_ip; @@ -1863,6 +1864,8 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr, br_multicast_fwd_src_handle(ent); } + br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type); + __grp_src_delete_marked(pg); } @@ -1873,7 +1876,8 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr, * Group Timer=GMI */ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr, - void *srcs, u32 nsrcs, size_t addr_size) + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge *br = pg->key.port->br; struct net_bridge_group_src *ent; @@ -1902,6 +1906,9 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr, } } + if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + changed = true; + if (__grp_src_delete_marked(pg)) changed = true; @@ -1909,19 +1916,22 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr, } static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr, - void *srcs, u32 nsrcs, size_t addr_size) + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge *br = pg->key.port->br; bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_isexc_incl(pg, h_addr, srcs, nsrcs, addr_size); + __grp_src_isexc_incl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); changed = true; break; case MCAST_EXCLUDE: - changed = __grp_src_isexc_excl(pg, h_addr, srcs, nsrcs, addr_size); + changed = __grp_src_isexc_excl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); break; } @@ -1936,7 +1946,8 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr, * Send Q(G,A-B) */ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr, - void *srcs, u32 nsrcs, size_t addr_size) + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge *br = pg->key.port->br; u32 src_idx, to_send = pg->src_ents; @@ -1965,6 +1976,9 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr, __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); } + if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + changed = true; + if (to_send) __grp_src_query_marked_and_rexmit(pg); @@ -1977,7 +1991,8 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr, * Send Q(G) */ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr, - void *srcs, u32 nsrcs, size_t addr_size) + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge *br = pg->key.port->br; u32 src_idx, to_send = pg->src_ents; @@ -2009,6 +2024,9 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr, __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); } + if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + changed = true; + if (to_send) __grp_src_query_marked_and_rexmit(pg); @@ -2018,19 +2036,30 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr, } static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr, - void *srcs, u32 nsrcs, size_t addr_size) + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - changed = __grp_src_toin_incl(pg, h_addr, srcs, nsrcs, addr_size); + changed = __grp_src_toin_incl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); break; case MCAST_EXCLUDE: - changed = __grp_src_toin_excl(pg, h_addr, srcs, nsrcs, addr_size); + changed = __grp_src_toin_excl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); break; } + if (br_multicast_eht_should_del_pg(pg)) { + br_multicast_find_del_pg(pg->key.port->br, pg); + /* a notification has already been sent and we shouldn't + * access pg after the delete so we have to return false + */ + changed = false; + } + return changed; } @@ -2041,7 +2070,8 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr, * Group Timer=GMI */ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr, - void *srcs, u32 nsrcs, size_t addr_size) + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; @@ -2066,6 +2096,8 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr, br_multicast_fwd_src_handle(ent); } + br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type); + __grp_src_delete_marked(pg); if (to_send) __grp_src_query_marked_and_rexmit(pg); @@ -2079,7 +2111,8 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr, * Group Timer=GMI */ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr, - void *srcs, u32 nsrcs, size_t addr_size) + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; @@ -2109,6 +2142,9 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr, } } + if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + changed = true; + if (__grp_src_delete_marked(pg)) changed = true; if (to_send) @@ -2118,19 +2154,22 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr, } static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr, - void *srcs, u32 nsrcs, size_t addr_size) + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge *br = pg->key.port->br; bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_toex_incl(pg, h_addr, srcs, nsrcs, addr_size); + __grp_src_toex_incl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); changed = true; break; case MCAST_EXCLUDE: - changed = __grp_src_toex_excl(pg, h_addr, srcs, nsrcs, addr_size); + changed = __grp_src_toex_excl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); break; } @@ -2347,15 +2386,15 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, break; case IGMPV3_MODE_IS_EXCLUDE: changed = br_multicast_isexc(pg, h_addr, grec->grec_src, - nsrcs, sizeof(__be32)); + nsrcs, sizeof(__be32), type); break; case IGMPV3_CHANGE_TO_INCLUDE: changed = br_multicast_toin(pg, h_addr, grec->grec_src, - nsrcs, sizeof(__be32)); + nsrcs, sizeof(__be32), type); break; case IGMPV3_CHANGE_TO_EXCLUDE: changed = br_multicast_toex(pg, h_addr, grec->grec_src, - nsrcs, sizeof(__be32)); + nsrcs, sizeof(__be32), type); break; case IGMPV3_BLOCK_OLD_SOURCES: changed = br_multicast_block(pg, h_addr, grec->grec_src, @@ -2479,17 +2518,20 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, case MLD2_MODE_IS_EXCLUDE: changed = br_multicast_isexc(pg, h_addr, grec->grec_src, nsrcs, - sizeof(struct in6_addr)); + sizeof(struct in6_addr), + grec->grec_type); break; case MLD2_CHANGE_TO_INCLUDE: changed = br_multicast_toin(pg, h_addr, grec->grec_src, nsrcs, - sizeof(struct in6_addr)); + sizeof(struct in6_addr), + grec->grec_type); break; case MLD2_CHANGE_TO_EXCLUDE: changed = br_multicast_toex(pg, h_addr, grec->grec_src, nsrcs, - sizeof(struct in6_addr)); + sizeof(struct in6_addr), + grec->grec_type); break; case MLD2_BLOCK_OLD_SOURCES: changed = br_multicast_block(pg, h_addr, diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c index 43f60388df160..861ae63f4a1c1 100644 --- a/net/bridge/br_multicast_eht.c +++ b/net/bridge/br_multicast_eht.c @@ -629,13 +629,79 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg, return changed; } +/* flush_entries is true when changing mode */ +static bool __eht_inc_exc(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size, + unsigned char filter_mode, + bool to_report) +{ + bool changed = false, flush_entries = to_report; + union net_bridge_eht_addr eht_src_addr; + u32 src_idx; + + if (br_multicast_eht_host_filter_mode(pg, h_addr) != filter_mode) + flush_entries = true; + + memset(&eht_src_addr, 0, sizeof(eht_src_addr)); + /* if we're changing mode del host and its entries */ + if (flush_entries) + br_multicast_del_eht_host(pg, h_addr); + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size); + br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr, + filter_mode, false); + } + /* we can be missing sets only if we've deleted some entries */ + if (flush_entries) { + struct net_bridge_group_src *src_ent; + struct hlist_node *tmp; + + hlist_for_each_entry_safe(src_ent, tmp, &pg->src_list, node) { + br_multicast_ip_src_to_eht_addr(&src_ent->addr, + &eht_src_addr); + if (!br_multicast_eht_set_lookup(pg, &eht_src_addr)) { + br_multicast_del_group_src(src_ent); + changed = true; + continue; + } + } + } + + return changed; +} + +static bool br_multicast_eht_inc(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size, + bool to_report) +{ + return __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size, MCAST_INCLUDE, + to_report); +} + +static bool br_multicast_eht_exc(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size, + bool to_report) +{ + return __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size, MCAST_EXCLUDE, + to_report); +} + static bool __eht_ip4_handle(struct net_bridge_port_group *pg, union net_bridge_eht_addr *h_addr, void *srcs, u32 nsrcs, int grec_type) { - bool changed = false; + bool changed = false, to_report = false; switch (grec_type) { case IGMPV3_ALLOW_NEW_SOURCES: @@ -645,6 +711,20 @@ static bool __eht_ip4_handle(struct net_bridge_port_group *pg, changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs, sizeof(__be32)); break; + case IGMPV3_CHANGE_TO_INCLUDE: + to_report = true; + fallthrough; + case IGMPV3_MODE_IS_INCLUDE: + changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs, + sizeof(__be32), to_report); + break; + case IGMPV3_CHANGE_TO_EXCLUDE: + to_report = true; + fallthrough; + case IGMPV3_MODE_IS_EXCLUDE: + changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs, + sizeof(__be32), to_report); + break; } return changed; @@ -657,7 +737,7 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg, u32 nsrcs, int grec_type) { - bool changed = false; + bool changed = false, to_report = false; switch (grec_type) { case MLD2_ALLOW_NEW_SOURCES: @@ -668,6 +748,22 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg, changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs, sizeof(struct in6_addr)); break; + case MLD2_CHANGE_TO_INCLUDE: + to_report = true; + fallthrough; + case MLD2_MODE_IS_INCLUDE: + changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs, + sizeof(struct in6_addr), + to_report); + break; + case MLD2_CHANGE_TO_EXCLUDE: + to_report = true; + fallthrough; + case MLD2_MODE_IS_EXCLUDE: + changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs, + sizeof(struct in6_addr), + to_report); + break; } return changed; diff --git a/net/bridge/br_private_mcast_eht.h b/net/bridge/br_private_mcast_eht.h index 92933822301d8..9daffa3ad8d5c 100644 --- a/net/bridge/br_private_mcast_eht.h +++ b/net/bridge/br_private_mcast_eht.h @@ -55,4 +55,11 @@ bool br_multicast_eht_handle(struct net_bridge_port_group *pg, size_t addr_size, int grec_type); +static inline bool +br_multicast_eht_should_del_pg(const struct net_bridge_port_group *pg) +{ + return !!((pg->key.port->flags & BR_MULTICAST_FAST_LEAVE) && + RB_EMPTY_ROOT(&pg->eht_host_tree)); +} + #endif /* _BR_PRIVATE_MCAST_EHT_H_ */ -- GitLab From b66bf55bbc1c1c985d136980fb21dfe9ffd6bf4c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 20 Jan 2021 16:52:00 +0200 Subject: [PATCH 1308/2012] net: bridge: multicast: optimize TO_INCLUDE EHT timeouts This is an optimization specifically for TO_INCLUDE which sends queries for the older entries and thus lowers the S,G timers to LMQT. If we have the following situation for a group in either include or exclude mode: - host A was interested in srcs X and Y, but is timing out - host B sends TO_INCLUDE src Z, the bridge lowers X and Y's timeouts to LMQT - host B sends BLOCK src Z after LMQT time has passed => since host B is the last host we can delete the group, but if we still have host A's EHT entries for X and Y (i.e. if they weren't lowered to LMQT previously) then we'll have to wait another LMQT time before deleting the group, with this optimization we can directly remove it regardless of the group mode as there are no more interested hosts Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast_eht.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c index 861ae63f4a1c1..fee3060d04959 100644 --- a/net/bridge/br_multicast_eht.c +++ b/net/bridge/br_multicast_eht.c @@ -656,6 +656,8 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg, } /* we can be missing sets only if we've deleted some entries */ if (flush_entries) { + struct net_bridge *br = pg->key.port->br; + struct net_bridge_group_eht_set *eht_set; struct net_bridge_group_src *src_ent; struct hlist_node *tmp; @@ -667,6 +669,25 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg, changed = true; continue; } + /* this is an optimization for TO_INCLUDE where we lower + * the set's timeout to LMQT to catch timeout hosts: + * - host A (timing out): set entries X, Y + * - host B: set entry Z (new from current TO_INCLUDE) + * sends BLOCK Z after LMQT but host A's EHT + * entries still exist (unless lowered to LMQT + * so they can timeout with the S,Gs) + * => we wait another LMQT, when we can just delete the + * group immediately + */ + if (!(src_ent->flags & BR_SGRP_F_SEND) || + filter_mode != MCAST_INCLUDE || + !to_report) + continue; + eht_set = br_multicast_eht_set_lookup(pg, + &eht_src_addr); + if (!eht_set) + continue; + mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(br)); } } -- GitLab From c9739016a03244e32f48c7f01176cd3b6ac1d916 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 20 Jan 2021 16:52:01 +0200 Subject: [PATCH 1309/2012] net: bridge: multicast: add EHT host filter_mode handling We should be able to handle host filter mode changing. For exclude mode we must create a zero-src entry so the group will be kept even without any S,G entries (non-zero source sets). That entry doesn't count to the entry limit and can always be created, its timer is refreshed on new exclude reports and if we change the host filter mode to include then it gets removed and we rely only on the non-zero source sets. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast_eht.c | 42 +++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c index fee3060d04959..64ccbd4ae9d9e 100644 --- a/net/bridge/br_multicast_eht.c +++ b/net/bridge/br_multicast_eht.c @@ -381,6 +381,30 @@ static void br_multicast_ip_src_to_eht_addr(const struct br_ip *src, } } +static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + int filter_mode) +{ + struct net_bridge_group_eht_host *eht_host; + union net_bridge_eht_addr zero_addr; + + eht_host = br_multicast_eht_host_lookup(pg, h_addr); + if (eht_host) + eht_host->filter_mode = filter_mode; + + memset(&zero_addr, 0, sizeof(zero_addr)); + switch (filter_mode) { + case MCAST_INCLUDE: + br_multicast_del_eht_set_entry(pg, &zero_addr, h_addr); + break; + case MCAST_EXCLUDE: + br_multicast_create_eht_set_entry(pg, &zero_addr, h_addr, + MCAST_EXCLUDE, + true); + break; + } +} + static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg, union net_bridge_eht_addr *src_addr, union net_bridge_eht_addr *h_addr, @@ -701,8 +725,13 @@ static bool br_multicast_eht_inc(struct net_bridge_port_group *pg, size_t addr_size, bool to_report) { - return __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size, MCAST_INCLUDE, - to_report); + bool changed; + + changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size, + MCAST_INCLUDE, to_report); + br_eht_convert_host_filter_mode(pg, h_addr, MCAST_INCLUDE); + + return changed; } static bool br_multicast_eht_exc(struct net_bridge_port_group *pg, @@ -712,8 +741,13 @@ static bool br_multicast_eht_exc(struct net_bridge_port_group *pg, size_t addr_size, bool to_report) { - return __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size, MCAST_EXCLUDE, - to_report); + bool changed; + + changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size, + MCAST_EXCLUDE, to_report); + br_eht_convert_host_filter_mode(pg, h_addr, MCAST_EXCLUDE); + + return changed; } static bool __eht_ip4_handle(struct net_bridge_port_group *pg, -- GitLab From e87e4b5caa5db4ab14508e75ec5926a1c05020ac Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 20 Jan 2021 16:52:02 +0200 Subject: [PATCH 1310/2012] net: bridge: multicast: handle block pg delete for all cases A block report can result in empty source and host sets for both include and exclude groups so if there are no hosts left we can safely remove the group. Pull the block group handling so it can cover both cases and add a check if EHT requires the delete. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 9cfc004312aba..47afb1e11daf3 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2210,14 +2210,6 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, if (to_send) __grp_src_query_marked_and_rexmit(pg); - if (pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list)) { - br_multicast_find_del_pg(pg->key.port->br, pg); - /* a notification has already been sent and we shouldn't access - * pg after the delete thus we have to return false - */ - changed = false; - } - return changed; } @@ -2279,6 +2271,15 @@ static bool br_multicast_block(struct net_bridge_port_group *pg, void *h_addr, break; } + if ((pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list)) || + br_multicast_eht_should_del_pg(pg)) { + br_multicast_find_del_pg(pg->key.port->br, pg); + /* a notification has already been sent and we shouldn't + * access pg after the delete so we have to return false + */ + changed = false; + } + return changed; } -- GitLab From d5a1022283c3b0baa252506b34178266a4c0db4d Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 20 Jan 2021 16:52:03 +0200 Subject: [PATCH 1311/2012] net: bridge: multicast: mark IGMPv3/MLDv2 fast-leave deletes Mark groups which were deleted due to fast leave/EHT. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 21 ++++++++++++++------- net/bridge/br_multicast_eht.c | 8 ++++---- net/bridge/br_private.h | 3 ++- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 47afb1e11daf3..df5db6a58e952 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -442,7 +442,8 @@ static void br_multicast_fwd_src_add(struct net_bridge_group_src *src) br_multicast_sg_add_exclude_ports(star_mp, sg); } -static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src) +static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src, + bool fastleave) { struct net_bridge_port_group *p, *pg = src->pg; struct net_bridge_port_group __rcu **pp; @@ -467,6 +468,8 @@ static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src) (p->flags & MDB_PG_FLAGS_PERMANENT)) break; + if (fastleave) + p->flags |= MDB_PG_FLAGS_FAST_LEAVE; br_multicast_del_pg(mp, p, pp); break; } @@ -560,11 +563,12 @@ static void br_multicast_destroy_group_src(struct net_bridge_mcast_gc *gc) kfree_rcu(src, rcu); } -void br_multicast_del_group_src(struct net_bridge_group_src *src) +void br_multicast_del_group_src(struct net_bridge_group_src *src, + bool fastleave) { struct net_bridge *br = src->pg->key.port->br; - br_multicast_fwd_src_remove(src); + br_multicast_fwd_src_remove(src, fastleave); hlist_del_init_rcu(&src->node); src->pg->src_ents--; hlist_add_head(&src->mcast_gc.gc_node, &br->mcast_gc_list); @@ -596,7 +600,7 @@ void br_multicast_del_pg(struct net_bridge_mdb_entry *mp, hlist_del_init(&pg->mglist); br_multicast_eht_clean_sets(pg); hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node) - br_multicast_del_group_src(ent); + br_multicast_del_group_src(ent, false); br_mdb_notify(br->dev, mp, pg, RTM_DELMDB); if (!br_multicast_is_star_g(&mp->addr)) { rhashtable_remove_fast(&br->sg_port_tbl, &pg->rhnode, @@ -653,7 +657,7 @@ static void br_multicast_port_group_expired(struct timer_list *t) pg->filter_mode = MCAST_INCLUDE; hlist_for_each_entry_safe(src_ent, tmp, &pg->src_list, node) { if (!timer_pending(&src_ent->timer)) { - br_multicast_del_group_src(src_ent); + br_multicast_del_group_src(src_ent, false); changed = true; } } @@ -1080,7 +1084,7 @@ static void br_multicast_group_src_expired(struct timer_list *t) pg = src->pg; if (pg->filter_mode == MCAST_INCLUDE) { - br_multicast_del_group_src(src); + br_multicast_del_group_src(src, false); if (!hlist_empty(&pg->src_list)) goto out; br_multicast_find_del_pg(br, pg); @@ -1704,7 +1708,7 @@ static int __grp_src_delete_marked(struct net_bridge_port_group *pg) hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node) if (ent->flags & BR_SGRP_F_DELETE) { - br_multicast_del_group_src(ent); + br_multicast_del_group_src(ent, false); deleted++; } @@ -2053,6 +2057,7 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr, } if (br_multicast_eht_should_del_pg(pg)) { + pg->flags |= MDB_PG_FLAGS_FAST_LEAVE; br_multicast_find_del_pg(pg->key.port->br, pg); /* a notification has already been sent and we shouldn't * access pg after the delete so we have to return false @@ -2273,6 +2278,8 @@ static bool br_multicast_block(struct net_bridge_port_group *pg, void *h_addr, if ((pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list)) || br_multicast_eht_should_del_pg(pg)) { + if (br_multicast_eht_should_del_pg(pg)) + pg->flags |= MDB_PG_FLAGS_FAST_LEAVE; br_multicast_find_del_pg(pg->key.port->br, pg); /* a notification has already been sent and we shouldn't * access pg after the delete so we have to return false diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c index 64ccbd4ae9d9e..a4fa1760bc8a2 100644 --- a/net/bridge/br_multicast_eht.c +++ b/net/bridge/br_multicast_eht.c @@ -537,7 +537,7 @@ static bool __eht_allow_excl(struct net_bridge_port_group *pg, src_ent = br_multicast_find_group_src(pg, &src_ip); if (!src_ent) continue; - br_multicast_del_group_src(src_ent); + br_multicast_del_group_src(src_ent, true); changed = true; } } @@ -588,7 +588,7 @@ static bool __eht_block_incl(struct net_bridge_port_group *pg, src_ent = br_multicast_find_group_src(pg, &src_ip); if (!src_ent) continue; - br_multicast_del_group_src(src_ent); + br_multicast_del_group_src(src_ent, true); changed = true; } @@ -625,7 +625,7 @@ static bool __eht_block_excl(struct net_bridge_port_group *pg, src_ent = br_multicast_find_group_src(pg, &src_ip); if (!src_ent) continue; - br_multicast_del_group_src(src_ent); + br_multicast_del_group_src(src_ent, true); changed = true; } } @@ -689,7 +689,7 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg, br_multicast_ip_src_to_eht_addr(&src_ent->addr, &eht_src_addr); if (!br_multicast_eht_set_lookup(pg, &eht_src_addr)) { - br_multicast_del_group_src(src_ent); + br_multicast_del_group_src(src_ent, true); changed = true; continue; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index cad967690e9f7..0e26ba623006c 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -850,7 +850,8 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp, struct net_bridge_port_group *sg); struct net_bridge_group_src * br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip); -void br_multicast_del_group_src(struct net_bridge_group_src *src); +void br_multicast_del_group_src(struct net_bridge_group_src *src, + bool fastleave); static inline bool br_group_is_l2(const struct br_ip *group) { -- GitLab From 925bba24e68a97b9c7926f2df4f405af883e6d0c Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Wed, 20 Jan 2021 16:41:47 -0800 Subject: [PATCH 1312/2012] tcp: Remove CMSG magic numbers for tcp_recvmsg(). At present, tcp_recvmsg() uses flags to track if any CMSGs are pending and what those CMSGs are. These flags are currently magic numbers, used only within tcp_recvmsg(). To prepare for receive timestamp support in tcp receive zerocopy, gently refactor these magic numbers into enums. Signed-off-by: Arjun Roy Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a1a17b64f8cd6..7ba0bd0675cbd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -280,6 +280,12 @@ #include #include +/* Track pending CMSGs. */ +enum { + TCP_CMSG_INQ = 1, + TCP_CMSG_TS = 2 +}; + struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); @@ -2272,7 +2278,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, goto out; if (tp->recvmsg_inq) - *cmsg_flags = 1; + *cmsg_flags = TCP_CMSG_INQ; timeo = sock_rcvtimeo(sk, nonblock); /* Urgent data needs to be handled specially. */ @@ -2453,7 +2459,7 @@ skip_copy: if (TCP_SKB_CB(skb)->has_rxtstamp) { tcp_update_recv_tstamps(skb, tss); - *cmsg_flags |= 2; + *cmsg_flags |= TCP_CMSG_TS; } if (used + offset < skb->len) @@ -2513,9 +2519,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, release_sock(sk); if (cmsg_flags && ret >= 0) { - if (cmsg_flags & 2) + if (cmsg_flags & TCP_CMSG_TS) tcp_recv_timestamp(msg, sk, &tss); - if (cmsg_flags & 1) { + if (cmsg_flags & TCP_CMSG_INQ) { inq = tcp_inq_hint(sk); put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); } -- GitLab From 7eeba1706eba6def15f6cb2fc7b3c3b9a2651edc Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Wed, 20 Jan 2021 16:41:48 -0800 Subject: [PATCH 1313/2012] tcp: Add receive timestamp support for receive zerocopy. tcp_recvmsg() uses the CMSG mechanism to receive control information like packet receive timestamps. This patch adds CMSG fields to struct tcp_zerocopy_receive, and provides receive timestamps if available to the user. Signed-off-by: Arjun Roy Signed-off-by: Jakub Kicinski --- include/uapi/linux/tcp.h | 4 ++ net/ipv4/tcp.c | 116 ++++++++++++++++++++++++++++----------- 2 files changed, 88 insertions(+), 32 deletions(-) diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 16dfa40bdac38..42fc5a640df49 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -354,5 +354,9 @@ struct tcp_zerocopy_receive { __u64 copybuf_address; /* in: copybuf address (small reads) */ __s32 copybuf_len; /* in/out: copybuf bytes avail/used or error */ __u32 flags; /* in: flags */ + __u64 msg_control; /* ancillary data */ + __u64 msg_controllen; + __u32 msg_flags; + /* __u32 hole; Next we must add >1 u32 otherwise length checks fail. */ }; #endif /* _UAPI_LINUX_TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7ba0bd0675cbd..e1a17c6b473c1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1745,6 +1745,20 @@ int tcp_set_rcvlowat(struct sock *sk, int val) } EXPORT_SYMBOL(tcp_set_rcvlowat); +static void tcp_update_recv_tstamps(struct sk_buff *skb, + struct scm_timestamping_internal *tss) +{ + if (skb->tstamp) + tss->ts[0] = ktime_to_timespec64(skb->tstamp); + else + tss->ts[0] = (struct timespec64) {0}; + + if (skb_hwtstamps(skb)->hwtstamp) + tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp); + else + tss->ts[2] = (struct timespec64) {0}; +} + #ifdef CONFIG_MMU static const struct vm_operations_struct tcp_vm_ops = { }; @@ -1848,13 +1862,13 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, struct scm_timestamping_internal *tss, int *cmsg_flags); static int receive_fallback_to_copy(struct sock *sk, - struct tcp_zerocopy_receive *zc, int inq) + struct tcp_zerocopy_receive *zc, int inq, + struct scm_timestamping_internal *tss) { unsigned long copy_address = (unsigned long)zc->copybuf_address; - struct scm_timestamping_internal tss_unused; - int err, cmsg_flags_unused; struct msghdr msg = {}; struct iovec iov; + int err; zc->length = 0; zc->recv_skip_hint = 0; @@ -1868,7 +1882,7 @@ static int receive_fallback_to_copy(struct sock *sk, return err; err = tcp_recvmsg_locked(sk, &msg, inq, /*nonblock=*/1, /*flags=*/0, - &tss_unused, &cmsg_flags_unused); + tss, &zc->msg_flags); if (err < 0) return err; @@ -1909,21 +1923,27 @@ static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc, return (__s32)copylen; } -static int tcp_zerocopy_handle_leftover_data(struct tcp_zerocopy_receive *zc, - struct sock *sk, - struct sk_buff *skb, - u32 *seq, - s32 copybuf_len) +static int tcp_zc_handle_leftover(struct tcp_zerocopy_receive *zc, + struct sock *sk, + struct sk_buff *skb, + u32 *seq, + s32 copybuf_len, + struct scm_timestamping_internal *tss) { u32 offset, copylen = min_t(u32, copybuf_len, zc->recv_skip_hint); if (!copylen) return 0; /* skb is null if inq < PAGE_SIZE. */ - if (skb) + if (skb) { offset = *seq - TCP_SKB_CB(skb)->seq; - else + } else { skb = tcp_recv_skb(sk, *seq, &offset); + if (TCP_SKB_CB(skb)->has_rxtstamp) { + tcp_update_recv_tstamps(skb, tss); + zc->msg_flags |= TCP_CMSG_TS; + } + } zc->copybuf_len = tcp_copy_straggler_data(zc, skb, copylen, &offset, seq); @@ -2010,9 +2030,37 @@ static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma, err); } +static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, + struct scm_timestamping_internal *tss); +static void tcp_zc_finalize_rx_tstamp(struct sock *sk, + struct tcp_zerocopy_receive *zc, + struct scm_timestamping_internal *tss) +{ + unsigned long msg_control_addr; + struct msghdr cmsg_dummy; + + msg_control_addr = (unsigned long)zc->msg_control; + cmsg_dummy.msg_control = (void *)msg_control_addr; + cmsg_dummy.msg_controllen = + (__kernel_size_t)zc->msg_controllen; + cmsg_dummy.msg_flags = in_compat_syscall() + ? MSG_CMSG_COMPAT : 0; + zc->msg_flags = 0; + if (zc->msg_control == msg_control_addr && + zc->msg_controllen == cmsg_dummy.msg_controllen) { + tcp_recv_timestamp(&cmsg_dummy, sk, tss); + zc->msg_control = (__u64) + ((uintptr_t)cmsg_dummy.msg_control); + zc->msg_controllen = + (__u64)cmsg_dummy.msg_controllen; + zc->msg_flags = (__u32)cmsg_dummy.msg_flags; + } +} + #define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32 static int tcp_zerocopy_receive(struct sock *sk, - struct tcp_zerocopy_receive *zc) + struct tcp_zerocopy_receive *zc, + struct scm_timestamping_internal *tss) { u32 length = 0, offset, vma_len, avail_len, copylen = 0; unsigned long address = (unsigned long)zc->address; @@ -2029,6 +2077,7 @@ static int tcp_zerocopy_receive(struct sock *sk, int ret; zc->copybuf_len = 0; + zc->msg_flags = 0; if (address & (PAGE_SIZE - 1) || address != zc->address) return -EINVAL; @@ -2039,7 +2088,7 @@ static int tcp_zerocopy_receive(struct sock *sk, sock_rps_record_flow(sk); if (inq && inq <= copybuf_len) - return receive_fallback_to_copy(sk, zc, inq); + return receive_fallback_to_copy(sk, zc, inq, tss); if (inq < PAGE_SIZE) { zc->length = 0; @@ -2084,6 +2133,11 @@ static int tcp_zerocopy_receive(struct sock *sk, } else { skb = tcp_recv_skb(sk, seq, &offset); } + + if (TCP_SKB_CB(skb)->has_rxtstamp) { + tcp_update_recv_tstamps(skb, tss); + zc->msg_flags |= TCP_CMSG_TS; + } zc->recv_skip_hint = skb->len - offset; frags = skb_advance_to_frag(skb, offset, &offset_frag); if (!frags || offset_frag) @@ -2126,8 +2180,7 @@ out: mmap_read_unlock(current->mm); /* Try to copy straggler data. */ if (!ret) - copylen = tcp_zerocopy_handle_leftover_data(zc, sk, skb, &seq, - copybuf_len); + copylen = tcp_zc_handle_leftover(zc, sk, skb, &seq, copybuf_len, tss); if (length + copylen) { WRITE_ONCE(tp->copied_seq, seq); @@ -2148,20 +2201,6 @@ out: } #endif -static void tcp_update_recv_tstamps(struct sk_buff *skb, - struct scm_timestamping_internal *tss) -{ - if (skb->tstamp) - tss->ts[0] = ktime_to_timespec64(skb->tstamp); - else - tss->ts[0] = (struct timespec64) {0}; - - if (skb_hwtstamps(skb)->hwtstamp) - tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp); - else - tss->ts[2] = (struct timespec64) {0}; -} - /* Similar to __sock_recv_timestamp, but does not require an skb */ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, struct scm_timestamping_internal *tss) @@ -4105,6 +4144,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, } #ifdef CONFIG_MMU case TCP_ZEROCOPY_RECEIVE: { + struct scm_timestamping_internal tss; struct tcp_zerocopy_receive zc = {}; int err; @@ -4120,11 +4160,18 @@ static int do_tcp_getsockopt(struct sock *sk, int level, if (copy_from_user(&zc, optval, len)) return -EFAULT; lock_sock(sk); - err = tcp_zerocopy_receive(sk, &zc); + err = tcp_zerocopy_receive(sk, &zc, &tss); release_sock(sk); - if (len >= offsetofend(struct tcp_zerocopy_receive, err)) - goto zerocopy_rcv_sk_err; + if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags)) + goto zerocopy_rcv_cmsg; switch (len) { + case offsetofend(struct tcp_zerocopy_receive, msg_flags): + goto zerocopy_rcv_cmsg; + case offsetofend(struct tcp_zerocopy_receive, msg_controllen): + case offsetofend(struct tcp_zerocopy_receive, msg_control): + case offsetofend(struct tcp_zerocopy_receive, flags): + case offsetofend(struct tcp_zerocopy_receive, copybuf_len): + case offsetofend(struct tcp_zerocopy_receive, copybuf_address): case offsetofend(struct tcp_zerocopy_receive, err): goto zerocopy_rcv_sk_err; case offsetofend(struct tcp_zerocopy_receive, inq): @@ -4133,6 +4180,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level, default: goto zerocopy_rcv_out; } +zerocopy_rcv_cmsg: + if (zc.msg_flags & TCP_CMSG_TS) + tcp_zc_finalize_rx_tstamp(sk, &zc, &tss); + else + zc.msg_flags = 0; zerocopy_rcv_sk_err: if (!err) zc.err = sock_error(sk); -- GitLab From 336e8eb2a3cfe2285c314cd85630076da365f6c6 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 21 Jan 2021 14:31:17 +0800 Subject: [PATCH 1314/2012] riscv: Fixup pfn_valid error with wrong max_mapnr The max_mapnr is the number of PFNs, not absolute PFN offset. Signed-off-by: Guo Ren Fixes: d0d8aae64566 ("RISC-V: Set maximum number of mapped pages correctly") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 7cd4993f4ff21..f9f9568d689ef 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -196,7 +196,7 @@ void __init setup_bootmem(void) max_pfn = PFN_DOWN(dram_end); max_low_pfn = max_pfn; dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); - set_max_mapnr(max_low_pfn); + set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); #ifdef CONFIG_BLK_DEV_INITRD setup_initrd(); -- GitLab From ca1e4ab199933e1af3f9a86d31060b7f9181c3fc Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 19 Jan 2021 14:08:11 +0200 Subject: [PATCH 1315/2012] net: sched: Add multi-queue support to sch_tree_lock The existing qdiscs that set TCQ_F_MQROOT don't use sch_tree_lock. However, hardware-offloaded HTB will start setting this flag while also using sch_tree_lock. The current implementation of sch_tree_lock basically locks on qdisc->dev_queue->qdisc, and it works fine when the tree is attached to some queue. However, it's not the case for MQROOT qdiscs: such a qdisc is the root itself, and its dev_queue just points to queue 0, while not actually being used, because there are real per-queue qdiscs. This patch changes the logic of sch_tree_lock and sch_tree_unlock to lock the qdisc itself if it's the MQROOT. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e7bee99aebce4..2969190105523 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -552,14 +552,20 @@ static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc) return qdisc->dev_queue->dev; } -static inline void sch_tree_lock(const struct Qdisc *q) +static inline void sch_tree_lock(struct Qdisc *q) { - spin_lock_bh(qdisc_root_sleeping_lock(q)); + if (q->flags & TCQ_F_MQROOT) + spin_lock_bh(qdisc_lock(q)); + else + spin_lock_bh(qdisc_root_sleeping_lock(q)); } -static inline void sch_tree_unlock(const struct Qdisc *q) +static inline void sch_tree_unlock(struct Qdisc *q) { - spin_unlock_bh(qdisc_root_sleeping_lock(q)); + if (q->flags & TCQ_F_MQROOT) + spin_unlock_bh(qdisc_lock(q)); + else + spin_unlock_bh(qdisc_root_sleeping_lock(q)); } extern struct Qdisc noop_qdisc; -- GitLab From 4dd78a73738afa92d33a226ec477b42938b31c83 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 19 Jan 2021 14:08:12 +0200 Subject: [PATCH 1316/2012] net: sched: Add extack to Qdisc_class_ops.delete In a following commit, sch_htb will start using extack in the delete class operation to pass hardware errors in offload mode. This commit prepares for that by adding the extack parameter to this callback and converting usage of the existing qdiscs. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 3 ++- net/sched/sch_api.c | 7 ++++--- net/sched/sch_atm.c | 3 ++- net/sched/sch_cbq.c | 3 ++- net/sched/sch_drr.c | 3 ++- net/sched/sch_dsmark.c | 3 ++- net/sched/sch_hfsc.c | 3 ++- net/sched/sch_htb.c | 3 ++- net/sched/sch_qfq.c | 3 ++- net/sched/sch_sfb.c | 3 ++- 10 files changed, 22 insertions(+), 12 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2969190105523..070f01bf17eb9 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -210,7 +210,8 @@ struct Qdisc_class_ops { int (*change)(struct Qdisc *, u32, u32, struct nlattr **, unsigned long *, struct netlink_ext_ack *); - int (*delete)(struct Qdisc *, unsigned long); + int (*delete)(struct Qdisc *, unsigned long, + struct netlink_ext_ack *); void (*walk)(struct Qdisc *, struct qdisc_walker * arg); /* Filter manipulation */ diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6fe4e5cc807c9..e2e4353db8a70 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1866,7 +1866,8 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, static int tclass_del_notify(struct net *net, const struct Qdisc_class_ops *cops, struct sk_buff *oskb, struct nlmsghdr *n, - struct Qdisc *q, unsigned long cl) + struct Qdisc *q, unsigned long cl, + struct netlink_ext_ack *extack) { u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; struct sk_buff *skb; @@ -1885,7 +1886,7 @@ static int tclass_del_notify(struct net *net, return -EINVAL; } - err = cops->delete(q, cl); + err = cops->delete(q, cl, extack); if (err) { kfree_skb(skb); return err; @@ -2088,7 +2089,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, goto out; break; case RTM_DELTCLASS: - err = tclass_del_notify(net, cops, skb, n, q, cl); + err = tclass_del_notify(net, cops, skb, n, q, cl, extack); /* Unbind the class with flilters with 0 */ tc_bind_tclass(q, portid, clid, 0); goto out; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 007bd2d9f1ff9..d0c9a57398fc2 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -320,7 +320,8 @@ err_out: return error; } -static int atm_tc_delete(struct Qdisc *sch, unsigned long arg) +static int atm_tc_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct atm_qdisc_data *p = qdisc_priv(sch); struct atm_flow_data *flow = (struct atm_flow_data *)arg; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 53d45e029c36d..320b3d31fa97f 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1675,7 +1675,8 @@ failure: return err; } -static int cbq_delete(struct Qdisc *sch, unsigned long arg) +static int cbq_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index dde564670ad8c..fc1e470695936 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -146,7 +146,8 @@ static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl) kfree(cl); } -static int drr_delete_class(struct Qdisc *sch, unsigned long arg) +static int drr_delete_class(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl = (struct drr_class *)arg; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 2b88710994d71..cd2748e2d4a20 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -150,7 +150,8 @@ errout: return err; } -static int dsmark_delete(struct Qdisc *sch, unsigned long arg) +static int dsmark_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct dsmark_qdisc_data *p = qdisc_priv(sch); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d1902fca98447..bf0034c66e359 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1090,7 +1090,8 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) } static int -hfsc_delete_class(struct Qdisc *sch, unsigned long arg) +hfsc_delete_class(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl = (struct hfsc_class *)arg; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index cd70dbcbd72fd..a8fc97b05bd87 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1246,7 +1246,8 @@ static void htb_destroy(struct Qdisc *sch) __qdisc_reset_queue(&q->direct_queue); } -static int htb_delete(struct Qdisc *sch, unsigned long arg) +static int htb_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 6335230a971e2..1db9d4a2ef5ef 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -529,7 +529,8 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) kfree(cl); } -static int qfq_delete_class(struct Qdisc *sch, unsigned long arg) +static int qfq_delete_class(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index da047a37a3bf3..dde829d4b9f83 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -649,7 +649,8 @@ static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -ENOSYS; } -static int sfb_delete(struct Qdisc *sch, unsigned long cl) +static int sfb_delete(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { return -ENOSYS; } -- GitLab From d03b195b5aa015f6c11988b86a3625f8d5dbac52 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 19 Jan 2021 14:08:13 +0200 Subject: [PATCH 1317/2012] sch_htb: Hierarchical QoS hardware offload HTB doesn't scale well because of contention on a single lock, and it also consumes CPU. This patch adds support for offloading HTB to hardware that supports hierarchical rate limiting. In the offload mode, HTB passes control commands to the driver using ndo_setup_tc. The driver has to replicate the whole hierarchy of classes and their settings (rate, ceil) in the NIC. Every modification of the HTB tree caused by the admin results in ndo_setup_tc being called. After this setup, the HTB algorithm is done completely in the NIC. An SQ (send queue) is created for every leaf class and attached to the hierarchy, so that the NIC can calculate and obey aggregated rate limits, too. In the future, it can be changed, so that multiple SQs will back a single leaf class. ndo_select_queue is responsible for selecting the right queue that serves the traffic class of each packet. The data path works as follows: a packet is classified by clsact, the driver selects a hardware queue according to its class, and the packet is enqueued into this queue's qdisc. This solution addresses two main problems of scaling HTB: 1. Contention by flow classification. Currently the filters are attached to the HTB instance as follows: # tc filter add dev eth0 parent 1:0 protocol ip flower dst_port 80 classid 1:10 It's possible to move classification to clsact egress hook, which is thread-safe and lock-free: # tc filter add dev eth0 egress protocol ip flower dst_port 80 action skbedit priority 1:10 This way classification still happens in software, but the lock contention is eliminated, and it happens before selecting the TX queue, allowing the driver to translate the class to the corresponding hardware queue in ndo_select_queue. Note that this is already compatible with non-offloaded HTB and doesn't require changes to the kernel nor iproute2. 2. Contention by handling packets. HTB is not multi-queue, it attaches to a whole net device, and handling of all packets takes the same lock. When HTB is offloaded, it registers itself as a multi-queue qdisc, similarly to mq: HTB is attached to the netdev, and each queue has its own qdisc. Some features of HTB may be not supported by some particular hardware, for example, the maximum number of classes may be limited, the granularity of rate and ceil parameters may be different, etc. - so, the offload is not enabled by default, a new parameter is used to enable it: # tc qdisc replace dev eth0 root handle 1: htb offload Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + include/net/pkt_cls.h | 36 ++ include/uapi/linux/pkt_sched.h | 1 + net/sched/sch_htb.c | 501 +++++++++++++++++++++++++-- tools/include/uapi/linux/pkt_sched.h | 1 + 5 files changed, 512 insertions(+), 28 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ef517254367d9..9e8572533d8e6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -858,6 +858,7 @@ enum tc_setup_type { TC_SETUP_QDISC_ETS, TC_SETUP_QDISC_TBF, TC_SETUP_QDISC_FIFO, + TC_SETUP_QDISC_HTB, }; /* These structures hold the attributes of bpf state that are being passed diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0f2a9c44171c6..255e4f4b521f4 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -783,6 +783,42 @@ struct tc_mq_qopt_offload { }; }; +enum tc_htb_command { + /* Root */ + TC_HTB_CREATE, /* Initialize HTB offload. */ + TC_HTB_DESTROY, /* Destroy HTB offload. */ + + /* Classes */ + /* Allocate qid and create leaf. */ + TC_HTB_LEAF_ALLOC_QUEUE, + /* Convert leaf to inner, preserve and return qid, create new leaf. */ + TC_HTB_LEAF_TO_INNER, + /* Delete leaf, while siblings remain. */ + TC_HTB_LEAF_DEL, + /* Delete leaf, convert parent to leaf, preserving qid. */ + TC_HTB_LEAF_DEL_LAST, + /* TC_HTB_LEAF_DEL_LAST, but delete driver data on hardware errors. */ + TC_HTB_LEAF_DEL_LAST_FORCE, + /* Modify parameters of a node. */ + TC_HTB_NODE_MODIFY, + + /* Class qdisc */ + TC_HTB_LEAF_QUERY_QUEUE, /* Query qid by classid. */ +}; + +struct tc_htb_qopt_offload { + struct netlink_ext_ack *extack; + enum tc_htb_command command; + u16 classid; + u32 parent_classid; + u16 qid; + u16 moved_qid; + u64 rate; + u64 ceil; +}; + +#define TC_HTB_CLASSID_ROOT U32_MAX + enum tc_red_command { TC_RED_REPLACE, TC_RED_DESTROY, diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 9e7c2c6078456..79a699f106b14 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -434,6 +434,7 @@ enum { TCA_HTB_RATE64, TCA_HTB_CEIL64, TCA_HTB_PAD, + TCA_HTB_OFFLOAD, __TCA_HTB_MAX, }; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index a8fc97b05bd87..d1b60fe3d311f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -174,6 +174,11 @@ struct htb_sched { int row_mask[TC_HTB_MAXDEPTH]; struct htb_level hlevel[TC_HTB_MAXDEPTH]; + + struct Qdisc **direct_qdiscs; + unsigned int num_direct_qdiscs; + + bool offload; }; /* find class in global hash table using given handle */ @@ -957,7 +962,7 @@ static void htb_reset(struct Qdisc *sch) if (cl->level) memset(&cl->inner, 0, sizeof(cl->inner)); else { - if (cl->leaf.q) + if (cl->leaf.q && !q->offload) qdisc_reset(cl->leaf.q); } cl->prio_activity = 0; @@ -980,6 +985,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 }, [TCA_HTB_RATE64] = { .type = NLA_U64 }, [TCA_HTB_CEIL64] = { .type = NLA_U64 }, + [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG }, }; static void htb_work_func(struct work_struct *work) @@ -992,12 +998,27 @@ static void htb_work_func(struct work_struct *work) rcu_read_unlock(); } +static void htb_set_lockdep_class_child(struct Qdisc *q) +{ + static struct lock_class_key child_key; + + lockdep_set_class(qdisc_lock(q), &child_key); +} + +static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt) +{ + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt); +} + static int htb_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + struct net_device *dev = qdisc_dev(sch); + struct tc_htb_qopt_offload offload_opt; struct htb_sched *q = qdisc_priv(sch); struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_glob *gopt; + unsigned int ntx; int err; qdisc_watchdog_init(&q->watchdog, sch); @@ -1022,9 +1043,26 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, if (gopt->version != HTB_VER >> 16) return -EINVAL; + q->offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]); + + if (q->offload) { + if (sch->parent != TC_H_ROOT) + return -EOPNOTSUPP; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return -EOPNOTSUPP; + + q->num_direct_qdiscs = dev->real_num_tx_queues; + q->direct_qdiscs = kcalloc(q->num_direct_qdiscs, + sizeof(*q->direct_qdiscs), + GFP_KERNEL); + if (!q->direct_qdiscs) + return -ENOMEM; + } + err = qdisc_class_hash_init(&q->clhash); if (err < 0) - return err; + goto err_free_direct_qdiscs; qdisc_skb_head_init(&q->direct_queue); @@ -1037,7 +1075,107 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, q->rate2quantum = 1; q->defcls = gopt->defcls; + if (!q->offload) + return 0; + + for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *qdisc; + + qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, 0), extack); + if (!qdisc) { + err = -ENOMEM; + goto err_free_qdiscs; + } + + htb_set_lockdep_class_child(qdisc); + q->direct_qdiscs[ntx] = qdisc; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; + } + + sch->flags |= TCQ_F_MQROOT; + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_CREATE, + .parent_classid = TC_H_MAJ(sch->handle) >> 16, + .classid = TC_H_MIN(q->defcls), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) + goto err_free_qdiscs; + return 0; + +err_free_qdiscs: + /* TC_HTB_CREATE call failed, avoid any further calls to the driver. */ + q->offload = false; + + for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx]; + ntx++) + qdisc_put(q->direct_qdiscs[ntx]); + + qdisc_class_hash_destroy(&q->clhash); + /* Prevent use-after-free and double-free when htb_destroy gets called. + */ + q->clhash.hash = NULL; + q->clhash.hashsize = 0; + +err_free_direct_qdiscs: + kfree(q->direct_qdiscs); + q->direct_qdiscs = NULL; + return err; +} + +static void htb_attach_offload(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct htb_sched *q = qdisc_priv(sch); + unsigned int ntx; + + for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { + struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx]; + + old = dev_graft_qdisc(qdisc->dev_queue, qdisc); + qdisc_put(old); + qdisc_hash_add(qdisc, false); + } + for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL); + + qdisc_put(old); + } + + kfree(q->direct_qdiscs); + q->direct_qdiscs = NULL; +} + +static void htb_attach_software(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + unsigned int ntx; + + /* Resemble qdisc_graft behavior. */ + for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *old = dev_graft_qdisc(dev_queue, sch); + + qdisc_refcount_inc(sch); + + qdisc_put(old); + } +} + +static void htb_attach(struct Qdisc *sch) +{ + struct htb_sched *q = qdisc_priv(sch); + + if (q->offload) + htb_attach_offload(sch); + else + htb_attach_software(sch); } static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -1046,6 +1184,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nest; struct tc_htb_glob gopt; + if (q->offload) + sch->flags |= TCQ_F_OFFLOADED; + else + sch->flags &= ~TCQ_F_OFFLOADED; + sch->qstats.overlimits = q->overlimits; /* Its safe to not acquire qdisc lock. As we hold RTNL, * no change can happen on the qdisc parameters. @@ -1063,6 +1206,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen)) goto nla_put_failure; + if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) + goto nla_put_failure; return nla_nest_end(skb, nest); @@ -1144,19 +1289,97 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); } +static struct netdev_queue * +htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_htb_qopt_offload offload_opt; + int err; + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_LEAF_QUERY_QUEUE, + .classid = TC_H_MIN(tcm->tcm_parent), + }; + err = htb_offload(dev, &offload_opt); + if (err || offload_opt.qid >= dev->num_tx_queues) + return NULL; + return netdev_get_tx_queue(dev, offload_opt.qid); +} + +static struct Qdisc * +htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q) +{ + struct net_device *dev = dev_queue->dev; + struct Qdisc *old_q; + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + old_q = dev_graft_qdisc(dev_queue, new_q); + if (new_q) + new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; + if (dev->flags & IFF_UP) + dev_activate(dev); + + return old_q; +} + +static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new) +{ + struct netdev_queue *queue_old, *queue_new; + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *qdisc; + + queue_old = netdev_get_tx_queue(dev, qid_old); + queue_new = netdev_get_tx_queue(dev, qid_new); + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + qdisc = dev_graft_qdisc(queue_old, NULL); + qdisc->dev_queue = queue_new; + qdisc = dev_graft_qdisc(queue_new, qdisc); + if (dev->flags & IFF_UP) + dev_activate(dev); + + WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN)); +} + static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { + struct netdev_queue *dev_queue = sch->dev_queue; struct htb_class *cl = (struct htb_class *)arg; + struct htb_sched *q = qdisc_priv(sch); + struct Qdisc *old_q; if (cl->level) return -EINVAL; - if (new == NULL && - (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - cl->common.classid, extack)) == NULL) - return -ENOBUFS; + + if (q->offload) { + dev_queue = new->dev_queue; + WARN_ON(dev_queue != cl->leaf.q->dev_queue); + } + + if (!new) { + new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, + cl->common.classid, extack); + if (!new) + return -ENOBUFS; + } + + if (q->offload) { + htb_set_lockdep_class_child(new); + /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ + qdisc_refcount_inc(new); + old_q = htb_graft_helper(dev_queue, new); + } *old = qdisc_replace(sch, new, &cl->leaf.q); + + if (q->offload) { + WARN_ON(old_q != *old); + qdisc_put(old_q); + } + return 0; } @@ -1184,9 +1407,10 @@ static inline int htb_parent_last_child(struct htb_class *cl) return 1; } -static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, +static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl, struct Qdisc *new_q) { + struct htb_sched *q = qdisc_priv(sch); struct htb_class *parent = cl->parent; WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity); @@ -1204,6 +1428,71 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, parent->cmode = HTB_CAN_SEND; } +static void htb_parent_to_leaf_offload(struct Qdisc *sch, + struct netdev_queue *dev_queue, + struct Qdisc *new_q) +{ + struct Qdisc *old_q; + + /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ + qdisc_refcount_inc(new_q); + old_q = htb_graft_helper(dev_queue, new_q); + WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); +} + +static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, + bool last_child, bool destroying, + struct netlink_ext_ack *extack) +{ + struct tc_htb_qopt_offload offload_opt; + struct Qdisc *q = cl->leaf.q; + struct Qdisc *old = NULL; + int err; + + if (cl->level) + return -EINVAL; + + WARN_ON(!q); + if (!destroying) { + /* On destroy of HTB, two cases are possible: + * 1. q is a normal qdisc, but q->dev_queue has noop qdisc. + * 2. q is a noop qdisc (for nodes that were inner), + * q->dev_queue is noop_netdev_queue. + */ + old = htb_graft_helper(q->dev_queue, NULL); + WARN_ON(!old); + WARN_ON(old != q); + } + + offload_opt = (struct tc_htb_qopt_offload) { + .command = !last_child ? TC_HTB_LEAF_DEL : + destroying ? TC_HTB_LEAF_DEL_LAST_FORCE : + TC_HTB_LEAF_DEL_LAST, + .classid = cl->common.classid, + .extack = extack, + }; + err = htb_offload(qdisc_dev(sch), &offload_opt); + + if (!err || destroying) + qdisc_put(old); + else + htb_graft_helper(q->dev_queue, old); + + if (last_child) + return err; + + if (!err && offload_opt.moved_qid != 0) { + if (destroying) + q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch), + offload_opt.qid); + else + htb_offload_move_qdisc(sch, offload_opt.moved_qid, + offload_opt.qid); + } + + return err; +} + static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { if (!cl->level) { @@ -1217,8 +1506,11 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) static void htb_destroy(struct Qdisc *sch) { + struct net_device *dev = qdisc_dev(sch); + struct tc_htb_qopt_offload offload_opt; struct htb_sched *q = qdisc_priv(sch); struct hlist_node *next; + bool nonempty, changed; struct htb_class *cl; unsigned int i; @@ -1237,13 +1529,58 @@ static void htb_destroy(struct Qdisc *sch) cl->block = NULL; } } - for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], - common.hnode) - htb_destroy_class(sch, cl); - } + + do { + nonempty = false; + changed = false; + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], + common.hnode) { + bool last_child; + + if (!q->offload) { + htb_destroy_class(sch, cl); + continue; + } + + nonempty = true; + + if (cl->level) + continue; + + changed = true; + + last_child = htb_parent_last_child(cl); + htb_destroy_class_offload(sch, cl, last_child, + true, NULL); + qdisc_class_hash_remove(&q->clhash, + &cl->common); + if (cl->parent) + cl->parent->children--; + if (last_child) + htb_parent_to_leaf(sch, cl, NULL); + htb_destroy_class(sch, cl); + } + } + } while (changed); + WARN_ON(nonempty); + qdisc_class_hash_destroy(&q->clhash); __qdisc_reset_queue(&q->direct_queue); + + if (!q->offload) + return; + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_DESTROY, + }; + htb_offload(dev, &offload_opt); + + if (!q->direct_qdiscs) + return; + for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++) + qdisc_put(q->direct_qdiscs[i]); + kfree(q->direct_qdiscs); } static int htb_delete(struct Qdisc *sch, unsigned long arg, @@ -1253,6 +1590,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, struct htb_class *cl = (struct htb_class *)arg; struct Qdisc *new_q = NULL; int last_child = 0; + int err; /* TODO: why don't allow to delete subtree ? references ? does * tc subsys guarantee us that in htb_destroy it holds no class @@ -1261,11 +1599,28 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, if (cl->children || cl->filter_cnt) return -EBUSY; - if (!cl->level && htb_parent_last_child(cl)) { - new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + if (!cl->level && htb_parent_last_child(cl)) + last_child = 1; + + if (q->offload) { + err = htb_destroy_class_offload(sch, cl, last_child, false, + extack); + if (err) + return err; + } + + if (last_child) { + struct netdev_queue *dev_queue; + + dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue; + new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, cl->parent->common.classid, NULL); - last_child = 1; + if (q->offload) { + if (new_q) + htb_set_lockdep_class_child(new_q); + htb_parent_to_leaf_offload(sch, dev_queue, new_q); + } } sch_tree_lock(sch); @@ -1286,7 +1641,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, &q->hlevel[cl->level].wait_pq); if (last_child) - htb_parent_to_leaf(q, cl, new_q); + htb_parent_to_leaf(sch, cl, new_q); sch_tree_unlock(sch); @@ -1301,9 +1656,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, int err = -EINVAL; struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; + struct tc_htb_qopt_offload offload_opt; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_HTB_MAX + 1]; struct Qdisc *parent_qdisc = NULL; + struct netdev_queue *dev_queue; struct tc_htb_opt *hopt; u64 rate64, ceil64; int warn = 0; @@ -1336,8 +1693,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB], NULL)); + rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; + ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; + if (!cl) { /* new class */ - struct Qdisc *new_q; + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *new_q, *old_q; int prio; struct { struct nlattr nla; @@ -1380,11 +1741,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, NULL, qdisc_root_sleeping_running(sch), tca[TCA_RATE] ? : &est.nla); - if (err) { - tcf_block_put(cl->block); - kfree(cl); - goto failure; - } + if (err) + goto err_block_put; } cl->children = 0; @@ -1393,12 +1751,74 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, for (prio = 0; prio < TC_HTB_NUMPRIO; prio++) RB_CLEAR_NODE(&cl->node[prio]); + cl->common.classid = classid; + + /* Make sure nothing interrupts us in between of two + * ndo_setup_tc calls. + */ + ASSERT_RTNL(); + /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) * so that can't be used inside of sch_tree_lock * -- thanks to Karlis Peisenieks */ - new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + if (!q->offload) { + dev_queue = sch->dev_queue; + } else if (!(parent && !parent->level)) { + /* Assign a dev_queue to this classid. */ + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_LEAF_ALLOC_QUEUE, + .classid = cl->common.classid, + .parent_classid = parent ? + TC_H_MIN(parent->common.classid) : + TC_HTB_CLASSID_ROOT, + .rate = max_t(u64, hopt->rate.rate, rate64), + .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) { + pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n", + err); + goto err_kill_estimator; + } + dev_queue = netdev_get_tx_queue(dev, offload_opt.qid); + } else { /* First child. */ + dev_queue = parent->leaf.q->dev_queue; + old_q = htb_graft_helper(dev_queue, NULL); + WARN_ON(old_q != parent->leaf.q); + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_LEAF_TO_INNER, + .classid = cl->common.classid, + .parent_classid = + TC_H_MIN(parent->common.classid), + .rate = max_t(u64, hopt->rate.rate, rate64), + .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) { + pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n", + err); + htb_graft_helper(dev_queue, old_q); + goto err_kill_estimator; + } + qdisc_put(old_q); + } + new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, classid, NULL); + if (q->offload) { + if (new_q) { + htb_set_lockdep_class_child(new_q); + /* One ref for cl->leaf.q, the other for + * dev_queue->qdisc. + */ + qdisc_refcount_inc(new_q); + } + old_q = htb_graft_helper(dev_queue, new_q); + /* No qdisc_put needed. */ + WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); + } sch_tree_lock(sch); if (parent && !parent->level) { /* turn parent into inner node */ @@ -1416,10 +1836,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, : TC_HTB_MAXDEPTH) - 1; memset(&parent->inner, 0, sizeof(parent->inner)); } + /* leaf (we) needs elementary qdisc */ cl->leaf.q = new_q ? new_q : &noop_qdisc; - cl->common.classid = classid; cl->parent = parent; /* set class to be in HTB_CAN_SEND state */ @@ -1445,12 +1865,30 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (err) return err; } - sch_tree_lock(sch); - } - rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; + if (q->offload) { + struct net_device *dev = qdisc_dev(sch); + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_NODE_MODIFY, + .classid = cl->common.classid, + .rate = max_t(u64, hopt->rate.rate, rate64), + .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) + /* Estimator was replaced, and rollback may fail + * as well, so we don't try to recover it, and + * the estimator won't work property with the + * offload anyway, because bstats are updated + * only when the stats are queried. + */ + return err; + } - ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; + sch_tree_lock(sch); + } psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); @@ -1493,6 +1931,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, *arg = (unsigned long)cl; return 0; +err_kill_estimator: + gen_kill_estimator(&cl->rate_est); +err_block_put: + tcf_block_put(cl->block); + kfree(cl); failure: return err; } @@ -1558,6 +2001,7 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) } static const struct Qdisc_class_ops htb_class_ops = { + .select_queue = htb_select_queue, .graft = htb_graft, .leaf = htb_leaf, .qlen_notify = htb_qlen_notify, @@ -1580,6 +2024,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .dequeue = htb_dequeue, .peek = qdisc_peek_dequeued, .init = htb_init, + .attach = htb_attach, .reset = htb_reset, .destroy = htb_destroy, .dump = htb_dump, diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h index 0d18b1d1fbbc8..5c903abc9fa52 100644 --- a/tools/include/uapi/linux/pkt_sched.h +++ b/tools/include/uapi/linux/pkt_sched.h @@ -414,6 +414,7 @@ enum { TCA_HTB_RATE64, TCA_HTB_CEIL64, TCA_HTB_PAD, + TCA_HTB_OFFLOAD, __TCA_HTB_MAX, }; -- GitLab From 83271586249c8ecf8458834864c827f67ad57773 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 19 Jan 2021 14:08:14 +0200 Subject: [PATCH 1318/2012] sch_htb: Stats for offloaded HTB This commit adds support for statistics of offloaded HTB. Bytes and packets counters for leaf and inner nodes are supported, the values are taken from per-queue qdiscs, and the numbers that the user sees should have the same behavior as the software (non-offloaded) HTB. Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- net/sched/sch_htb.c | 53 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d1b60fe3d311f..dff3adf5a9156 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -114,6 +114,7 @@ struct htb_class { * Written often fields */ struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_packed bstats_bias; struct tc_htb_xstats xstats; /* our special stats */ /* token bucket parameters */ @@ -1220,6 +1221,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct htb_class *cl = (struct htb_class *)arg; + struct htb_sched *q = qdisc_priv(sch); struct nlattr *nest; struct tc_htb_opt opt; @@ -1246,6 +1248,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, opt.level = cl->level; if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt)) goto nla_put_failure; + if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) + goto nla_put_failure; if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) && nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps, TCA_HTB_PAD)) @@ -1262,10 +1266,39 @@ nla_put_failure: return -1; } +static void htb_offload_aggregate_stats(struct htb_sched *q, + struct htb_class *cl) +{ + struct htb_class *c; + unsigned int i; + + memset(&cl->bstats, 0, sizeof(cl->bstats)); + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { + struct htb_class *p = c; + + while (p && p->level < cl->level) + p = p->parent; + + if (p != cl) + continue; + + cl->bstats.bytes += c->bstats_bias.bytes; + cl->bstats.packets += c->bstats_bias.packets; + if (c->level == 0) { + cl->bstats.bytes += c->leaf.q->bstats.bytes; + cl->bstats.packets += c->leaf.q->bstats.packets; + } + } + } +} + static int htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct htb_class *cl = (struct htb_class *)arg; + struct htb_sched *q = qdisc_priv(sch); struct gnet_stats_queue qs = { .drops = cl->drops, .overlimits = cl->overlimits, @@ -1280,6 +1313,19 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens), INT_MIN, INT_MAX); + if (q->offload) { + if (!cl->level) { + if (cl->leaf.q) + cl->bstats = cl->leaf.q->bstats; + else + memset(&cl->bstats, 0, sizeof(cl->bstats)); + cl->bstats.bytes += cl->bstats_bias.bytes; + cl->bstats.packets += cl->bstats_bias.packets; + } else { + htb_offload_aggregate_stats(q, cl); + } + } + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || @@ -1464,6 +1510,11 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, WARN_ON(old != q); } + if (cl->parent) { + cl->parent->bstats_bias.bytes += q->bstats.bytes; + cl->parent->bstats_bias.packets += q->bstats.packets; + } + offload_opt = (struct tc_htb_qopt_offload) { .command = !last_child ? TC_HTB_LEAF_DEL : destroying ? TC_HTB_LEAF_DEL_LAST_FORCE : @@ -1803,6 +1854,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, htb_graft_helper(dev_queue, old_q); goto err_kill_estimator; } + parent->bstats_bias.bytes += old_q->bstats.bytes; + parent->bstats_bias.packets += old_q->bstats.packets; qdisc_put(old_q); } new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, -- GitLab From 214baf22870cfa437522f3bd4fbae56338674b04 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 19 Jan 2021 14:08:15 +0200 Subject: [PATCH 1319/2012] net/mlx5e: Support HTB offload This commit adds support for HTB offload in the mlx5e driver. Performance: NIC: Mellanox ConnectX-6 Dx CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz (24 cores with HT) 100 Gbit/s line rate, 500 UDP streams @ ~200 Mbit/s each 48 traffic classes, flower used for steering No shaping (rate limits set to 4 Gbit/s per TC) - checking for max throughput. Baseline: 98.7 Gbps, 8.25 Mpps HTB: 6.7 Gbps, 0.56 Mpps HTB offload: 95.6 Gbps, 8.00 Mpps Limitations: 1. 256 leaf nodes, 3 levels of depth. 2. Granularity for ceil is 1 Mbit/s. Rates are converted to weights, and the bandwidth is split among the siblings according to these weights. Other parameters for classes are not supported. Ethtool statistics support for QoS SQs are also added. The counters are called qos_txN_*, where N is the QoS queue number (starting from 0, the numeration is separate from the normal SQs), and * is the counter name (the counters are the same as for the normal SQs). Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/Makefile | 6 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 27 +- .../ethernet/mellanox/mlx5/core/en/params.h | 2 + .../net/ethernet/mellanox/mlx5/core/en/ptp.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en/qos.c | 984 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en/qos.h | 44 + .../ethernet/mellanox/mlx5/core/en_ethtool.c | 21 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 176 +++- .../ethernet/mellanox/mlx5/core/en_stats.c | 100 ++ .../ethernet/mellanox/mlx5/core/en_stats.h | 2 + .../net/ethernet/mellanox/mlx5/core/en_tx.c | 47 +- .../net/ethernet/mellanox/mlx5/core/en_txrx.c | 26 + drivers/net/ethernet/mellanox/mlx5/core/qos.c | 85 ++ drivers/net/ethernet/mellanox/mlx5/core/qos.h | 30 + include/linux/mlx5/mlx5_ifc.h | 13 +- 15 files changed, 1516 insertions(+), 49 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/qos.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/qos.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/qos.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/qos.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 134bd038ae8af..fcfc0b1149852 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -16,7 +16,8 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \ - diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o fw_reset.o + diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ + fw_reset.o qos.o # # Netdev basic @@ -25,7 +26,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ - en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o + en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ + en/qos.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 055baf3b6cb10..26e578a973e57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -55,6 +55,7 @@ #include "en_stats.h" #include "en/dcbnl.h" #include "en/fs.h" +#include "en/qos.h" #include "lib/hv_vhca.h" extern const struct net_device_ops mlx5e_netdev_ops; @@ -161,6 +162,9 @@ do { \ ##__VA_ARGS__); \ } while (0) +#define mlx5e_state_dereference(priv, p) \ + rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock)) + enum mlx5e_rq_group { MLX5E_RQ_GROUP_REGULAR, MLX5E_RQ_GROUP_XSK, @@ -663,11 +667,13 @@ struct mlx5e_channel { struct mlx5e_xdpsq rq_xdpsq; struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC]; struct mlx5e_icosq icosq; /* internal control operations */ + struct mlx5e_txqsq __rcu * __rcu *qos_sqs; bool xdp; struct napi_struct napi; struct device *pdev; struct net_device *netdev; __be32 mkey_be; + u16 qos_sqs_size; u8 num_tc; u8 lag_port; @@ -756,6 +762,8 @@ struct mlx5e_modify_sq_param { int next_state; int rl_update; int rl_index; + bool qos_update; + u16 qos_queue_group_id; }; #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) @@ -788,10 +796,20 @@ struct mlx5e_scratchpad { cpumask_var_t cpumask; }; +struct mlx5e_htb { + DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES)); + DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES); + struct mlx5e_sq_stats **qos_sq_stats; + u16 max_qos_sqs; + u16 maj_id; + u16 defcls; +}; + struct mlx5e_priv { /* priv data path fields - start */ /* +1 for port ptp ts */ - struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC]; + struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC + + MLX5E_QOS_MAX_LEAF_NODES]; int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC]; #ifdef CONFIG_MLX5_CORE_EN_DCB @@ -859,6 +877,7 @@ struct mlx5e_priv { struct mlx5e_hv_vhca_stats_agent stats_agent; #endif struct mlx5e_scratchpad scratchpad; + struct mlx5e_htb htb; }; struct mlx5e_rx_handlers { @@ -986,6 +1005,7 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, struct mlx5e_channels *new_chs, mlx5e_fp_preactivate preactivate, void *context); +int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv); int mlx5e_num_channels_changed(struct mlx5e_priv *priv); int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); @@ -1010,6 +1030,9 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq); int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, struct mlx5e_modify_sq_param *p); +int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, + struct mlx5e_params *params, struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid); void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq); void mlx5e_free_txqsq(struct mlx5e_txqsq *sq); @@ -1020,8 +1043,10 @@ struct mlx5e_create_sq_param; int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, struct mlx5e_sq_param *param, struct mlx5e_create_sq_param *csp, + u16 qos_queue_group_id, u32 *sqn); void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); +void mlx5e_close_txqsq(struct mlx5e_txqsq *sq); static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 807147d97a0fa..ea2cfb04b31ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -118,6 +118,8 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv, struct mlx5e_rq_param *param); void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, struct mlx5e_sq_param *param); +void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_sq_param *param); void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 2a2bac30daaa7..eeddd1137dda5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -261,7 +261,7 @@ static int mlx5e_ptp_open_txqsq(struct mlx5e_port_ptp *c, u32 tisn, csp.min_inline_mode = txqsq->min_inline_mode; csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn; - err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, &txqsq->sqn); + err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn); if (err) goto err_free_txqsq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c new file mode 100644 index 0000000000000..12d7ad0612375 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -0,0 +1,984 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "en.h" +#include "params.h" +#include "../qos.h" + +#define BYTES_IN_MBIT 125000 + +int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev) +{ + return min(MLX5E_QOS_MAX_LEAF_NODES, mlx5_qos_max_leaf_nodes(mdev)); +} + +int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv) +{ + int last = find_last_bit(priv->htb.qos_used_qids, mlx5e_qos_max_leaf_nodes(priv->mdev)); + + return last == mlx5e_qos_max_leaf_nodes(priv->mdev) ? 0 : last + 1; +} + +/* Software representation of the QoS tree (internal to this file) */ + +static int mlx5e_find_unused_qos_qid(struct mlx5e_priv *priv) +{ + int size = mlx5e_qos_max_leaf_nodes(priv->mdev); + int res; + + WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__); + res = find_first_zero_bit(priv->htb.qos_used_qids, size); + + return res == size ? -ENOSPC : res; +} + +struct mlx5e_qos_node { + struct hlist_node hnode; + struct rcu_head rcu; + struct mlx5e_qos_node *parent; + u64 rate; + u32 bw_share; + u32 max_average_bw; + u32 hw_id; + u32 classid; /* 16-bit, except root. */ + u16 qid; +}; + +#define MLX5E_QOS_QID_INNER 0xffff +#define MLX5E_HTB_CLASSID_ROOT 0xffffffff + +static struct mlx5e_qos_node * +mlx5e_sw_node_create_leaf(struct mlx5e_priv *priv, u16 classid, u16 qid, + struct mlx5e_qos_node *parent) +{ + struct mlx5e_qos_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->parent = parent; + + node->qid = qid; + __set_bit(qid, priv->htb.qos_used_qids); + + node->classid = classid; + hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, classid); + + mlx5e_update_tx_netdev_queues(priv); + + return node; +} + +static struct mlx5e_qos_node *mlx5e_sw_node_create_root(struct mlx5e_priv *priv) +{ + struct mlx5e_qos_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->qid = MLX5E_QOS_QID_INNER; + node->classid = MLX5E_HTB_CLASSID_ROOT; + hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, node->classid); + + return node; +} + +static struct mlx5e_qos_node *mlx5e_sw_node_find(struct mlx5e_priv *priv, u32 classid) +{ + struct mlx5e_qos_node *node = NULL; + + hash_for_each_possible(priv->htb.qos_tc2node, node, hnode, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +static struct mlx5e_qos_node *mlx5e_sw_node_find_rcu(struct mlx5e_priv *priv, u32 classid) +{ + struct mlx5e_qos_node *node = NULL; + + hash_for_each_possible_rcu(priv->htb.qos_tc2node, node, hnode, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +static void mlx5e_sw_node_delete(struct mlx5e_priv *priv, struct mlx5e_qos_node *node) +{ + hash_del_rcu(&node->hnode); + if (node->qid != MLX5E_QOS_QID_INNER) { + __clear_bit(node->qid, priv->htb.qos_used_qids); + mlx5e_update_tx_netdev_queues(priv); + } + kfree_rcu(node, rcu); +} + +/* TX datapath API */ + +static u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid) +{ + /* These channel params are safe to access from the datapath, because: + * 1. This function is called only after checking priv->htb.maj_id != 0, + * and the number of queues can't change while HTB offload is active. + * 2. When priv->htb.maj_id becomes 0, synchronize_rcu waits for + * mlx5e_select_queue to finish while holding priv->state_lock, + * preventing other code from changing the number of queues. + */ + bool is_ptp = MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS); + + return (chs->params.num_channels + is_ptp) * chs->params.num_tc + qid; +} + +int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid) +{ + struct mlx5e_qos_node *node; + u16 qid; + int res; + + rcu_read_lock(); + + node = mlx5e_sw_node_find_rcu(priv, classid); + if (!node) { + res = -ENOENT; + goto out; + } + qid = READ_ONCE(node->qid); + if (qid == MLX5E_QOS_QID_INNER) { + res = -EINVAL; + goto out; + } + res = mlx5e_qid_from_qos(&priv->channels, qid); + +out: + rcu_read_unlock(); + return res; +} + +static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_channel *c; + int ix; + + ix = qid % params->num_channels; + qid /= params->num_channels; + c = priv->channels.c[ix]; + + qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); + return mlx5e_state_dereference(priv, qos_sqs[qid]); +} + +/* SQ lifecycle */ + +static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs, + struct mlx5e_qos_node *node) +{ + struct mlx5e_create_cq_param ccp = {}; + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_sq_param param_sq; + struct mlx5e_cq_param param_cq; + int txq_ix, ix, qid, err = 0; + struct mlx5e_params *params; + struct mlx5e_channel *c; + struct mlx5e_txqsq *sq; + + params = &chs->params; + + txq_ix = mlx5e_qid_from_qos(chs, node->qid); + + WARN_ON(node->qid > priv->htb.max_qos_sqs); + if (node->qid == priv->htb.max_qos_sqs) { + struct mlx5e_sq_stats *stats, **stats_list = NULL; + + if (priv->htb.max_qos_sqs == 0) { + stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev), + sizeof(*stats_list), + GFP_KERNEL); + if (!stats_list) + return -ENOMEM; + } + stats = kzalloc(sizeof(*stats), GFP_KERNEL); + if (!stats) { + kvfree(stats_list); + return -ENOMEM; + } + if (stats_list) + WRITE_ONCE(priv->htb.qos_sq_stats, stats_list); + WRITE_ONCE(priv->htb.qos_sq_stats[node->qid], stats); + /* Order max_qos_sqs increment after writing the array pointer. + * Pairs with smp_load_acquire in en_stats.c. + */ + smp_store_release(&priv->htb.max_qos_sqs, priv->htb.max_qos_sqs + 1); + } + + ix = node->qid % params->num_channels; + qid = node->qid / params->num_channels; + c = chs->c[ix]; + + qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); + sq = kzalloc(sizeof(*sq), GFP_KERNEL); + + if (!sq) + return -ENOMEM; + + mlx5e_build_create_cq_param(&ccp, c); + + memset(¶m_sq, 0, sizeof(param_sq)); + memset(¶m_cq, 0, sizeof(param_cq)); + mlx5e_build_sq_param(priv, params, ¶m_sq); + mlx5e_build_tx_cq_param(priv, params, ¶m_cq); + err = mlx5e_open_cq(priv, params->tx_cq_moderation, ¶m_cq, &ccp, &sq->cq); + if (err) + goto err_free_sq; + err = mlx5e_open_txqsq(c, priv->tisn[c->lag_port][0], txq_ix, params, + ¶m_sq, sq, 0, node->hw_id, node->qid); + if (err) + goto err_close_cq; + + rcu_assign_pointer(qos_sqs[qid], sq); + + return 0; + +err_close_cq: + mlx5e_close_cq(&sq->cq); +err_free_sq: + kfree(sq); + return err; +} + +static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node *node) +{ + struct mlx5e_txqsq *sq; + + sq = mlx5e_get_qos_sq(priv, node->qid); + + WRITE_ONCE(priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, node->qid)], sq); + + /* Make the change to txq2sq visible before the queue is started. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); + + qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node->qid); + mlx5e_activate_txqsq(sq); +} + +static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid) +{ + struct mlx5e_txqsq *sq; + + sq = mlx5e_get_qos_sq(priv, qid); + if (!sq) /* Handle the case when the SQ failed to open. */ + return; + + qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid); + mlx5e_deactivate_txqsq(sq); + + /* The queue is disabled, no synchronization with datapath is needed. */ + priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL; +} + +static void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid) +{ + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_params *params; + struct mlx5e_channel *c; + struct mlx5e_txqsq *sq; + int ix; + + params = &priv->channels.params; + + ix = qid % params->num_channels; + qid /= params->num_channels; + c = priv->channels.c[ix]; + qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); + sq = rcu_replace_pointer(qos_sqs[qid], NULL, lockdep_is_held(&priv->state_lock)); + if (!sq) /* Handle the case when the SQ failed to open. */ + return; + + synchronize_rcu(); /* Sync with NAPI. */ + + mlx5e_close_txqsq(sq); + mlx5e_close_cq(&sq->cq); + kfree(sq); +} + +void mlx5e_qos_close_queues(struct mlx5e_channel *c) +{ + struct mlx5e_txqsq __rcu **qos_sqs; + int i; + + qos_sqs = rcu_replace_pointer(c->qos_sqs, NULL, lockdep_is_held(&c->priv->state_lock)); + if (!qos_sqs) + return; + synchronize_rcu(); /* Sync with NAPI. */ + + for (i = 0; i < c->qos_sqs_size; i++) { + struct mlx5e_txqsq *sq; + + sq = mlx5e_state_dereference(c->priv, qos_sqs[i]); + if (!sq) /* Handle the case when the SQ failed to open. */ + continue; + + mlx5e_close_txqsq(sq); + mlx5e_close_cq(&sq->cq); + kfree(sq); + } + + kvfree(qos_sqs); +} + +static void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) + mlx5e_qos_close_queues(chs->c[i]); +} + +static int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + u16 qos_sqs_size; + int i; + + qos_sqs_size = DIV_ROUND_UP(mlx5e_qos_max_leaf_nodes(priv->mdev), chs->num); + + for (i = 0; i < chs->num; i++) { + struct mlx5e_txqsq **sqs; + + sqs = kvcalloc(qos_sqs_size, sizeof(struct mlx5e_txqsq *), GFP_KERNEL); + if (!sqs) + goto err_free; + + WRITE_ONCE(chs->c[i]->qos_sqs_size, qos_sqs_size); + smp_wmb(); /* Pairs with mlx5e_napi_poll. */ + rcu_assign_pointer(chs->c[i]->qos_sqs, sqs); + } + + return 0; + +err_free: + while (--i >= 0) { + struct mlx5e_txqsq **sqs; + + sqs = rcu_replace_pointer(chs->c[i]->qos_sqs, NULL, + lockdep_is_held(&priv->state_lock)); + + synchronize_rcu(); /* Sync with NAPI. */ + kvfree(sqs); + } + return -ENOMEM; +} + +int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + struct mlx5e_qos_node *node = NULL; + int bkt, err; + + if (!priv->htb.maj_id) + return 0; + + err = mlx5e_qos_alloc_queues(priv, chs); + if (err) + return err; + + hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) { + if (node->qid == MLX5E_QOS_QID_INNER) + continue; + err = mlx5e_open_qos_sq(priv, chs, node); + if (err) { + mlx5e_qos_close_all_queues(chs); + return err; + } + } + + return 0; +} + +void mlx5e_qos_activate_queues(struct mlx5e_priv *priv) +{ + struct mlx5e_qos_node *node = NULL; + int bkt; + + hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) { + if (node->qid == MLX5E_QOS_QID_INNER) + continue; + mlx5e_activate_qos_sq(priv, node); + } +} + +void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c) +{ + struct mlx5e_params *params = &c->priv->channels.params; + struct mlx5e_txqsq __rcu **qos_sqs; + int i; + + qos_sqs = mlx5e_state_dereference(c->priv, c->qos_sqs); + if (!qos_sqs) + return; + + for (i = 0; i < c->qos_sqs_size; i++) { + u16 qid = params->num_channels * i + c->ix; + struct mlx5e_txqsq *sq; + + sq = mlx5e_state_dereference(c->priv, qos_sqs[i]); + if (!sq) /* Handle the case when the SQ failed to open. */ + continue; + + qos_dbg(c->mdev, "Deactivate QoS SQ qid %u\n", qid); + mlx5e_deactivate_txqsq(sq); + + /* The queue is disabled, no synchronization with datapath is needed. */ + c->priv->txq2sq[mlx5e_qid_from_qos(&c->priv->channels, qid)] = NULL; + } +} + +static void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) + mlx5e_qos_deactivate_queues(chs->c[i]); +} + +/* HTB API */ + +int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *root; + bool opened; + int err; + + qos_dbg(priv->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls); + + if (!mlx5_qos_is_supported(priv->mdev)) { + NL_SET_ERR_MSG_MOD(extack, + "Missing QoS capabilities. Try disabling SRIOV or use a supported device."); + return -EOPNOTSUPP; + } + + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (opened) { + err = mlx5e_qos_alloc_queues(priv, &priv->channels); + if (err) + return err; + } + + root = mlx5e_sw_node_create_root(priv); + if (IS_ERR(root)) { + err = PTR_ERR(root); + goto err_free_queues; + } + + err = mlx5_qos_create_root_node(priv->mdev, &root->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware."); + goto err_sw_node_delete; + } + + WRITE_ONCE(priv->htb.defcls, htb_defcls); + /* Order maj_id after defcls - pairs with + * mlx5e_select_queue/mlx5e_select_htb_queues. + */ + smp_store_release(&priv->htb.maj_id, htb_maj_id); + + return 0; + +err_sw_node_delete: + mlx5e_sw_node_delete(priv, root); + +err_free_queues: + if (opened) + mlx5e_qos_close_all_queues(&priv->channels); + return err; +} + +int mlx5e_htb_root_del(struct mlx5e_priv *priv) +{ + struct mlx5e_qos_node *root; + int err; + + qos_dbg(priv->mdev, "TC_HTB_DESTROY\n"); + + WRITE_ONCE(priv->htb.maj_id, 0); + synchronize_rcu(); /* Sync with mlx5e_select_htb_queue and TX data path. */ + + root = mlx5e_sw_node_find(priv, MLX5E_HTB_CLASSID_ROOT); + if (!root) { + qos_err(priv->mdev, "Failed to find the root node in the QoS tree\n"); + return -ENOENT; + } + err = mlx5_qos_destroy_node(priv->mdev, root->hw_id); + if (err) + qos_err(priv->mdev, "Failed to destroy root node %u, err = %d\n", + root->hw_id, err); + mlx5e_sw_node_delete(priv, root); + + mlx5e_qos_deactivate_all_queues(&priv->channels); + mlx5e_qos_close_all_queues(&priv->channels); + + return err; +} + +static int mlx5e_htb_convert_rate(struct mlx5e_priv *priv, u64 rate, + struct mlx5e_qos_node *parent, u32 *bw_share) +{ + u64 share = 0; + + while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw) + parent = parent->parent; + + if (parent->max_average_bw) + share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT), + parent->max_average_bw); + else + share = 101; + + *bw_share = share == 0 ? 1 : share > 100 ? 0 : share; + + qos_dbg(priv->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n", + rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share); + + return 0; +} + +static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw) +{ + *max_average_bw = div_u64(ceil, BYTES_IN_MBIT); + + qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n", + ceil, *max_average_bw); +} + +int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid, + u32 parent_classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *parent; + int qid; + int err; + + qos_dbg(priv->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n", + classid, parent_classid, rate, ceil); + + qid = mlx5e_find_unused_qos_qid(priv); + if (qid < 0) { + NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached."); + return qid; + } + + parent = mlx5e_sw_node_find(priv, parent_classid); + if (!parent) + return -EINVAL; + + node = mlx5e_sw_node_create_leaf(priv, classid, qid, parent); + if (IS_ERR(node)) + return PTR_ERR(node); + + node->rate = rate; + mlx5e_htb_convert_rate(priv, rate, node->parent, &node->bw_share); + mlx5e_htb_convert_ceil(priv, ceil, &node->max_average_bw); + + err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->hw_id, + node->bw_share, node->max_average_bw, + &node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + mlx5e_sw_node_delete(priv, node); + return err; + } + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, node); + } + } + + return mlx5e_qid_from_qos(&priv->channels, node->qid); +} + +int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid, + u64 rate, u64 ceil, struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *child; + int err, tmp_err; + u32 new_hw_id; + u16 qid; + + qos_dbg(priv->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n", + classid, child_classid, rate, ceil); + + node = mlx5e_sw_node_find(priv, classid); + if (!node) + return -ENOENT; + + err = mlx5_qos_create_inner_node(priv->mdev, node->parent->hw_id, + node->bw_share, node->max_average_bw, + &new_hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node."); + qos_err(priv->mdev, "Failed to create an inner node (class %04x), err = %d\n", + classid, err); + return err; + } + + /* Intentionally reuse the qid for the upcoming first child. */ + child = mlx5e_sw_node_create_leaf(priv, child_classid, node->qid, node); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto err_destroy_hw_node; + } + + child->rate = rate; + mlx5e_htb_convert_rate(priv, rate, node, &child->bw_share); + mlx5e_htb_convert_ceil(priv, ceil, &child->max_average_bw); + + err = mlx5_qos_create_leaf_node(priv->mdev, new_hw_id, child->bw_share, + child->max_average_bw, &child->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + goto err_delete_sw_node; + } + + /* No fail point. */ + + qid = node->qid; + /* Pairs with mlx5e_get_txq_by_classid. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + err = mlx5_qos_destroy_node(priv->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + node->hw_id = new_hw_id; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, child); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, child); + } + } + + return 0; + +err_delete_sw_node: + child->qid = MLX5E_QOS_QID_INNER; + mlx5e_sw_node_delete(priv, child); + +err_destroy_hw_node: + tmp_err = mlx5_qos_destroy_node(priv->mdev, new_hw_id); + if (tmp_err) /* Not fatal. */ + qos_warn(priv->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n", + new_hw_id, classid, tmp_err); + return err; +} + +static struct mlx5e_qos_node *mlx5e_sw_node_find_by_qid(struct mlx5e_priv *priv, u16 qid) +{ + struct mlx5e_qos_node *node = NULL; + int bkt; + + hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) + if (node->qid == qid) + break; + + return node; +} + +static void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq) +{ + qos_dbg(priv->mdev, "Reactivate QoS SQ qid %u\n", qid); + netdev_tx_reset_queue(txq); + netif_tx_start_queue(txq); +} + +static void mlx5e_reset_qdisc(struct net_device *dev, u16 qid) +{ + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid); + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + + if (!qdisc) + return; + + spin_lock_bh(qdisc_lock(qdisc)); + qdisc_reset(qdisc); + spin_unlock_bh(qdisc_lock(qdisc)); +} + +int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid, + u16 *new_qid, struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node; + struct netdev_queue *txq; + u16 qid, moved_qid; + bool opened; + int err; + + qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL classid %04x\n", classid); + + *old_qid = *new_qid = 0; + + node = mlx5e_sw_node_find(priv, classid); + if (!node) + return -ENOENT; + + /* Store qid for reuse. */ + qid = node->qid; + + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (opened) { + txq = netdev_get_tx_queue(priv->netdev, + mlx5e_qid_from_qos(&priv->channels, qid)); + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + err = mlx5_qos_destroy_node(priv->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + mlx5e_sw_node_delete(priv, node); + + moved_qid = mlx5e_qos_cur_leaf_nodes(priv); + + if (moved_qid == 0) { + /* The last QoS SQ was just destroyed. */ + if (opened) + mlx5e_reactivate_qos_sq(priv, qid, txq); + return 0; + } + moved_qid--; + + if (moved_qid < qid) { + /* The highest QoS SQ was just destroyed. */ + WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u", + qid, moved_qid); + if (opened) + mlx5e_reactivate_qos_sq(priv, qid, txq); + return 0; + } + + WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid); + qos_dbg(priv->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid); + + node = mlx5e_sw_node_find_by_qid(priv, moved_qid); + WARN(!node, "Could not find a node with qid %u to move to queue %u", + moved_qid, qid); + + /* Stop traffic to the old queue. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + __clear_bit(moved_qid, priv->htb.qos_used_qids); + + if (opened) { + txq = netdev_get_tx_queue(priv->netdev, + mlx5e_qid_from_qos(&priv->channels, moved_qid)); + mlx5e_deactivate_qos_sq(priv, moved_qid); + mlx5e_close_qos_sq(priv, moved_qid); + } + + /* Prevent packets from the old class from getting into the new one. */ + mlx5e_reset_qdisc(priv->netdev, moved_qid); + + __set_bit(qid, priv->htb.qos_used_qids); + WRITE_ONCE(node->qid, qid); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n", + node->classid, moved_qid, qid, err); + } else { + mlx5e_activate_qos_sq(priv, node); + } + } + + mlx5e_update_tx_netdev_queues(priv); + if (opened) + mlx5e_reactivate_qos_sq(priv, moved_qid, txq); + + *old_qid = mlx5e_qid_from_qos(&priv->channels, moved_qid); + *new_qid = mlx5e_qid_from_qos(&priv->channels, qid); + return 0; +} + +int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *parent; + u32 old_hw_id, new_hw_id; + int err, saved_err = 0; + u16 qid; + + qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n", + force ? "_FORCE" : "", classid); + + node = mlx5e_sw_node_find(priv, classid); + if (!node) + return -ENOENT; + + err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->parent->hw_id, + node->parent->bw_share, + node->parent->max_average_bw, + &new_hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + if (!force) + return err; + saved_err = err; + } + + /* Store qid for reuse and prevent clearing the bit. */ + qid = node->qid; + /* Pairs with mlx5e_get_txq_by_classid. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + /* Prevent packets from the old class from getting into the new one. */ + mlx5e_reset_qdisc(priv->netdev, qid); + + err = mlx5_qos_destroy_node(priv->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + parent = node->parent; + mlx5e_sw_node_delete(priv, node); + + node = parent; + WRITE_ONCE(node->qid, qid); + + /* Early return on error in force mode. Parent will still be an inner + * node to be deleted by a following delete operation. + */ + if (saved_err) + return saved_err; + + old_hw_id = node->hw_id; + node->hw_id = new_hw_id; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, node); + } + } + + err = mlx5_qos_destroy_node(priv->mdev, old_hw_id); + if (err) /* Not fatal. */ + qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + return 0; +} + +static int mlx5e_qos_update_children(struct mlx5e_priv *priv, struct mlx5e_qos_node *node, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *child; + int err = 0; + int bkt; + + hash_for_each(priv->htb.qos_tc2node, bkt, child, hnode) { + u32 old_bw_share = child->bw_share; + int err_one; + + if (child->parent != node) + continue; + + mlx5e_htb_convert_rate(priv, child->rate, node, &child->bw_share); + if (child->bw_share == old_bw_share) + continue; + + err_one = mlx5_qos_update_node(priv->mdev, child->hw_id, child->bw_share, + child->max_average_bw, child->hw_id); + if (!err && err_one) { + err = err_one; + + NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node."); + qos_err(priv->mdev, "Failed to modify a child node (class %04x), err = %d\n", + node->classid, err); + } + } + + return err; +} + +int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack) +{ + u32 bw_share, max_average_bw; + struct mlx5e_qos_node *node; + bool ceil_changed = false; + int err; + + qos_dbg(priv->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n", + classid, rate, ceil); + + node = mlx5e_sw_node_find(priv, classid); + if (!node) + return -ENOENT; + + node->rate = rate; + mlx5e_htb_convert_rate(priv, rate, node->parent, &bw_share); + mlx5e_htb_convert_ceil(priv, ceil, &max_average_bw); + + err = mlx5_qos_update_node(priv->mdev, node->parent->hw_id, bw_share, + max_average_bw, node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node."); + qos_err(priv->mdev, "Failed to modify a node (class %04x), err = %d\n", + classid, err); + return err; + } + + if (max_average_bw != node->max_average_bw) + ceil_changed = true; + + node->bw_share = bw_share; + node->max_average_bw = max_average_bw; + + if (ceil_changed) + err = mlx5e_qos_update_children(priv, node, extack); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h new file mode 100644 index 0000000000000..5af7991fcd194 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_EN_QOS_H +#define __MLX5E_EN_QOS_H + +#include + +#define MLX5E_QOS_MAX_LEAF_NODES 256 + +struct mlx5e_priv; +struct mlx5e_channels; +struct mlx5e_channel; + +int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev); +int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv); + +/* TX datapath API */ +int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid); +struct mlx5e_txqsq *mlx5e_get_sq(struct mlx5e_priv *priv, int qid); + +/* SQ lifecycle */ +int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs); +void mlx5e_qos_activate_queues(struct mlx5e_priv *priv); +void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c); +void mlx5e_qos_close_queues(struct mlx5e_channel *c); + +/* HTB API */ +int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls, + struct netlink_ext_ack *extack); +int mlx5e_htb_root_del(struct mlx5e_priv *priv); +int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid, + u32 parent_classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack); +int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid, + u64 rate, u64 ceil, struct netlink_ext_ack *extack); +int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid, + u16 *new_qid, struct netlink_ext_ack *extack); +int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force, + struct netlink_ext_ack *extack); +int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 2d37742a888c1..2e5a0696374ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -447,6 +447,17 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, goto out; } + /* Don't allow changing the number of channels if HTB offload is active, + * because the numeration of the QoS SQs will change, while per-queue + * qdiscs are attached. + */ + if (priv->htb.maj_id) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the number of channels\n", + __func__); + goto out; + } + new_channels.params = priv->channels.params; new_channels.params.num_channels = count; @@ -1966,6 +1977,16 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) return -EOPNOTSUPP; + /* Don't allow changing the PTP state if HTB offload is active, because + * the numeration of the QoS SQs will change, while per-queue qdiscs are + * attached. + */ + if (priv->htb.maj_id) { + netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the PTP state\n", + __func__); + return -EINVAL; + } + new_channels.params = priv->channels.params; MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_TX_PORT_TS, enable); /* No need to verify SQ stop room as diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f33c38629886a..b9a175982801b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -65,6 +65,7 @@ #include "en/devlink.h" #include "lib/mlx5.h" #include "en/ptp.h" +#include "qos.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { @@ -1143,7 +1144,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); @@ -1233,6 +1233,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev, int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, struct mlx5e_modify_sq_param *p) { + u64 bitmask = 0; void *in; void *sqc; int inlen; @@ -1248,9 +1249,14 @@ int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, MLX5_SET(modify_sq_in, in, sq_state, p->curr_state); MLX5_SET(sqc, sqc, state, p->next_state); if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) { - MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); - MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index); + bitmask |= 1; + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index); } + if (p->qos_update && p->next_state == MLX5_SQC_STATE_RDY) { + bitmask |= 1 << 2; + MLX5_SET(sqc, sqc, qos_queue_group_id, p->qos_queue_group_id); + } + MLX5_SET64(modify_sq_in, in, modify_bitmask, bitmask); err = mlx5_core_modify_sq(mdev, sqn, in); @@ -1267,6 +1273,7 @@ static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn) int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, struct mlx5e_sq_param *param, struct mlx5e_create_sq_param *csp, + u16 qos_queue_group_id, u32 *sqn) { struct mlx5e_modify_sq_param msp = {0}; @@ -1278,6 +1285,10 @@ int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, msp.curr_state = MLX5_SQC_STATE_RST; msp.next_state = MLX5_SQC_STATE_RDY; + if (qos_queue_group_id) { + msp.qos_update = true; + msp.qos_queue_group_id = qos_queue_group_id; + } err = mlx5e_modify_sq(mdev, *sqn, &msp); if (err) mlx5e_destroy_sq(mdev, *sqn); @@ -1288,13 +1299,9 @@ int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, static int mlx5e_set_sq_maxrate(struct net_device *dev, struct mlx5e_txqsq *sq, u32 rate); -static int mlx5e_open_txqsq(struct mlx5e_channel *c, - u32 tisn, - int txq_ix, - struct mlx5e_params *params, - struct mlx5e_sq_param *param, - struct mlx5e_txqsq *sq, - int tc) +int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, + struct mlx5e_params *params, struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid) { struct mlx5e_create_sq_param csp = {}; u32 tx_rate; @@ -1304,12 +1311,17 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c, if (err) return err; + if (qos_queue_group_id) + sq->stats = c->priv->htb.qos_sq_stats[qos_qid]; + else + sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; + csp.tisn = tisn; csp.tis_lst_sz = 1; csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; - err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn); if (err) goto err_free_txqsq; @@ -1366,7 +1378,7 @@ void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) } } -static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) +void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) { struct mlx5_core_dev *mdev = sq->mdev; struct mlx5_rate_limit rl = {0}; @@ -1403,7 +1415,7 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = params->tx_min_inline_mode; - err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); if (err) goto err_free_icosq; @@ -1452,7 +1464,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); if (err) goto err_free_xdpsq; @@ -1703,7 +1715,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c, int txq_ix = c->ix + tc * params->num_channels; err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, - params, &cparam->txq_sq, &c->sq[tc], tc); + params, &cparam->txq_sq, &c->sq[tc], tc, 0, 0); if (err) goto err_close_sqs; } @@ -2044,6 +2056,8 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) mlx5e_deactivate_icosq(&c->icosq); for (tc = 0; tc < c->num_tc; tc++) mlx5e_deactivate_txqsq(&c->sq[tc]); + + mlx5e_qos_deactivate_queues(c); } static void mlx5e_close_channel(struct mlx5e_channel *c) @@ -2051,6 +2065,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) mlx5e_close_xsk(c); mlx5e_close_queues(c); + mlx5e_qos_close_queues(c); netif_napi_del(&c->napi); kvfree(c); @@ -2198,9 +2213,8 @@ void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev)); } -static void mlx5e_build_sq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_sq_param *param) +void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -2379,10 +2393,18 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, goto err_close_channels; } + err = mlx5e_qos_open_queues(priv, chs); + if (err) + goto err_close_ptp; + mlx5e_health_channels_update(priv); kvfree(cparam); return 0; +err_close_ptp: + if (chs->port_ptp) + mlx5e_port_ptp_close(chs->port_ptp); + err_close_channels: for (i--; i >= 0; i--) mlx5e_close_channel(chs->c[i]); @@ -2915,11 +2937,31 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc) netdev_set_tc_queue(netdev, tc, nch, 0); } +int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) +{ + int qos_queues, nch, ntc, num_txqs, err; + + qos_queues = mlx5e_qos_cur_leaf_nodes(priv); + + nch = priv->channels.params.num_channels; + ntc = priv->channels.params.num_tc; + num_txqs = nch * ntc + qos_queues; + if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)) + num_txqs += ntc; + + mlx5e_dbg(DRV, priv, "Setting num_txqs %d\n", num_txqs); + err = netif_set_real_num_tx_queues(priv->netdev, num_txqs); + if (err) + netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err); + + return err; +} + static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; - int num_txqs, num_rxqs, nch, ntc; int old_num_txqs, old_ntc; + int num_rxqs, nch, ntc; int err; old_num_txqs = netdev->real_num_tx_queues; @@ -2927,18 +2969,13 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) nch = priv->channels.params.num_channels; ntc = priv->channels.params.num_tc; - num_txqs = nch * ntc; - if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)) - num_txqs += ntc; num_rxqs = nch * priv->profile->rq_groups; mlx5e_netdev_set_tcs(netdev, nch, ntc); - err = netif_set_real_num_tx_queues(netdev, num_txqs); - if (err) { - netdev_warn(netdev, "netif_set_real_num_tx_queues failed, %d\n", err); + err = mlx5e_update_tx_netdev_queues(priv); + if (err) goto err_tcs; - } err = netif_set_real_num_rx_queues(netdev, num_rxqs); if (err) { netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err); @@ -3042,6 +3079,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_update_num_tc_x_num_ch(priv); mlx5e_build_txq_maps(priv); mlx5e_activate_channels(&priv->channels); + mlx5e_qos_activate_queues(priv); mlx5e_xdp_tx_enable(priv); netif_tx_start_all_queues(priv->netdev); @@ -3608,6 +3646,14 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, mutex_lock(&priv->state_lock); + /* MQPRIO is another toplevel qdisc that can't be attached + * simultaneously with the offloaded HTB. + */ + if (WARN_ON(priv->htb.maj_id)) { + err = -EINVAL; + goto out; + } + new_channels.params = priv->channels.params; new_channels.params.num_tc = tc ? tc : 1; @@ -3628,12 +3674,55 @@ out: return err; } +static int mlx5e_setup_tc_htb(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb) +{ + int res; + + switch (htb->command) { + case TC_HTB_CREATE: + return mlx5e_htb_root_add(priv, htb->parent_classid, htb->classid, + htb->extack); + case TC_HTB_DESTROY: + return mlx5e_htb_root_del(priv); + case TC_HTB_LEAF_ALLOC_QUEUE: + res = mlx5e_htb_leaf_alloc_queue(priv, htb->classid, htb->parent_classid, + htb->rate, htb->ceil, htb->extack); + if (res < 0) + return res; + htb->qid = res; + return 0; + case TC_HTB_LEAF_TO_INNER: + return mlx5e_htb_leaf_to_inner(priv, htb->parent_classid, htb->classid, + htb->rate, htb->ceil, htb->extack); + case TC_HTB_LEAF_DEL: + return mlx5e_htb_leaf_del(priv, htb->classid, &htb->moved_qid, &htb->qid, + htb->extack); + case TC_HTB_LEAF_DEL_LAST: + case TC_HTB_LEAF_DEL_LAST_FORCE: + return mlx5e_htb_leaf_del_last(priv, htb->classid, + htb->command == TC_HTB_LEAF_DEL_LAST_FORCE, + htb->extack); + case TC_HTB_NODE_MODIFY: + return mlx5e_htb_node_modify(priv, htb->classid, htb->rate, htb->ceil, + htb->extack); + case TC_HTB_LEAF_QUERY_QUEUE: + res = mlx5e_get_txq_by_classid(priv, htb->classid); + if (res < 0) + return res; + htb->qid = res; + return 0; + default: + return -EOPNOTSUPP; + } +} + static LIST_HEAD(mlx5e_block_cb_list); static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct mlx5e_priv *priv = netdev_priv(dev); + int err; switch (type) { case TC_SETUP_BLOCK: { @@ -3647,6 +3736,11 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, } case TC_SETUP_QDISC_MQPRIO: return mlx5e_setup_tc_mqprio(priv, type_data); + case TC_SETUP_QDISC_HTB: + mutex_lock(&priv->state_lock); + err = mlx5e_setup_tc_htb(priv, type_data); + mutex_unlock(&priv->state_lock); + return err; default: return -EOPNOTSUPP; } @@ -3811,20 +3905,25 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable) return 0; } -#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) -static int set_feature_tc_num_filters(struct net_device *netdev, bool enable) +static int set_feature_hw_tc(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; } +#endif + + if (!enable && priv->htb.maj_id) { + netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n"); + return -EINVAL; + } return 0; } -#endif static int set_feature_rx_all(struct net_device *netdev, bool enable) { @@ -3922,9 +4021,7 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, set_feature_cvlan_filter); -#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) - err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters); -#endif + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc); err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all); err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs); err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan); @@ -5028,6 +5125,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_NTUPLE; #endif } + if (mlx5_qos_is_supported(mdev)) + netdev->features |= NETIF_F_HW_TC; netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; @@ -5333,6 +5432,7 @@ int mlx5e_netdev_init(struct net_device *netdev, return -ENOMEM; mutex_init(&priv->state_lock); + hash_init(priv->htb.qos_tc2node); INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work); @@ -5355,8 +5455,14 @@ err_free_cpumask: void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv) { + int i; + destroy_workqueue(priv->wq); free_cpumask_var(priv->scratchpad.cpumask); + + for (i = 0; i < priv->htb.max_qos_sqs; i++) + kfree(priv->htb.qos_sq_stats[i]); + kvfree(priv->htb.qos_sq_stats); } struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, @@ -5366,13 +5472,17 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, { struct net_device *netdev; unsigned int ptp_txqs = 0; + int qos_sqs = 0; int err; if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) ptp_txqs = profile->max_tc; + if (mlx5_qos_is_supported(mdev)) + qos_sqs = mlx5e_qos_max_leaf_nodes(mdev); + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), - nch * profile->max_tc + ptp_txqs, + nch * profile->max_tc + ptp_txqs + qos_sqs, nch * profile->rq_groups); if (!netdev) { mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 2cf2042b37c76..92c5b81427b97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -420,6 +420,25 @@ static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv, } } +static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv, + struct mlx5e_sw_stats *s) +{ + struct mlx5e_sq_stats **stats; + u16 max_qos_sqs; + int i; + + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs); + stats = READ_ONCE(priv->htb.qos_sq_stats); + + for (i = 0; i < max_qos_sqs; i++) { + mlx5e_stats_grp_sw_update_stats_sq(s, READ_ONCE(stats[i])); + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } +} + static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) { struct mlx5e_sw_stats *s = &priv->stats.sw; @@ -449,6 +468,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) } } mlx5e_stats_grp_sw_update_stats_ptp(priv, s); + mlx5e_stats_grp_sw_update_stats_qos(priv, s); } static const struct counter_desc q_stats_desc[] = { @@ -1740,6 +1760,41 @@ static const struct counter_desc ptp_cq_stats_desc[] = { { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) }, }; +static const struct counter_desc qos_sq_stats_desc[] = { + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) }, +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) }, +#endif + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_none) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, recover) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqe_err) }, +}; + #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) #define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc) @@ -1750,6 +1805,49 @@ static const struct counter_desc ptp_cq_stats_desc[] = { #define NUM_PTP_SQ_STATS ARRAY_SIZE(ptp_sq_stats_desc) #define NUM_PTP_CH_STATS ARRAY_SIZE(ptp_ch_stats_desc) #define NUM_PTP_CQ_STATS ARRAY_SIZE(ptp_cq_stats_desc) +#define NUM_QOS_SQ_STATS ARRAY_SIZE(qos_sq_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos) +{ + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + return NUM_QOS_SQ_STATS * smp_load_acquire(&priv->htb.max_qos_sqs); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qos) +{ + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + u16 max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs); + int i, qid; + + for (qid = 0; qid < max_qos_sqs; qid++) + for (i = 0; i < NUM_QOS_SQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + qos_sq_stats_desc[i].format, qid); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qos) +{ + struct mlx5e_sq_stats **stats; + u16 max_qos_sqs; + int i, qid; + + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs); + stats = READ_ONCE(priv->htb.qos_sq_stats); + + for (qid = 0; qid < max_qos_sqs; qid++) { + struct mlx5e_sq_stats *s = READ_ONCE(stats[qid]); + + for (i = 0; i < NUM_QOS_SQ_STATS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(s, qos_sq_stats_desc, i); + } + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; } static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp) { @@ -1932,6 +2030,7 @@ MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0); MLX5E_DEFINE_STATS_GRP(eth_ext, 0); static MLX5E_DEFINE_STATS_GRP(tls, 0); static MLX5E_DEFINE_STATS_GRP(ptp, 0); +static MLX5E_DEFINE_STATS_GRP(qos, 0); /* The stats groups order is opposite to the update_stats() order calls */ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { @@ -1955,6 +2054,7 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { &MLX5E_STATS_GRP(channels), &MLX5E_STATS_GRP(per_port_buff_congest), &MLX5E_STATS_GRP(ptp), + &MLX5E_STATS_GRP(qos), }; unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index e41fc11f2ce78..93c41312fb037 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -55,6 +55,8 @@ #define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld) + struct counter_desc { char format[ETH_GSTRING_LEN]; size_t offset; /* Byte offset */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 74f233eece549..da6a358a8a103 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -106,28 +106,53 @@ return_txq: return priv->port_ptp_tc2realtxq[up]; } +static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb, + u16 htb_maj_id) +{ + u16 classid; + + if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id) + classid = TC_H_MIN(skb->priority); + else + classid = READ_ONCE(priv->htb.defcls); + + if (!classid) + return 0; + + return mlx5e_get_txq_by_classid(priv, classid); +} + u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { struct mlx5e_priv *priv = netdev_priv(dev); + int num_tc_x_num_ch; int txq_ix; int up = 0; int ch_ix; - if (unlikely(priv->channels.port_ptp)) { - int num_tc_x_num_ch; + /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */ + num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch); + if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) { + /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */ + u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - mlx5e_use_ptpsq(skb)) - return mlx5e_select_ptpsq(dev, skb); + if (unlikely(htb_maj_id)) { + txq_ix = mlx5e_select_htb_queue(priv, skb, htb_maj_id); + if (txq_ix > 0) + return txq_ix; + } - /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */ - num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch); + if (unlikely(priv->channels.port_ptp)) + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + mlx5e_use_ptpsq(skb)) + return mlx5e_select_ptpsq(dev, skb); txq_ix = netdev_pick_tx(dev, skb, NULL); - /* Fix netdev_pick_tx() not to choose ptp_channel txqs. + /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs. * If they are selected, switch to regular queues. - * Driver to select these queues only at mlx5e_select_ptpsq(). + * Driver to select these queues only at mlx5e_select_ptpsq() + * and mlx5e_select_htb_queue(). */ if (unlikely(txq_ix >= num_tc_x_num_ch)) txq_ix %= num_tc_x_num_ch; @@ -702,6 +727,10 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) u16 pi; sq = priv->txq2sq[skb_get_queue_mapping(skb)]; + if (unlikely(!sq)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } /* May send SKBs and WQEs. */ if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel))) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index a3cfe06d51169..d54da3797c30b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -115,17 +115,21 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) napi); struct mlx5e_ch_stats *ch_stats = c->stats; struct mlx5e_xdpsq *xsksq = &c->xsksq; + struct mlx5e_txqsq __rcu **qos_sqs; struct mlx5e_rq *xskrq = &c->xskrq; struct mlx5e_rq *rq = &c->rq; bool aff_change = false; bool busy_xsk = false; bool busy = false; int work_done = 0; + u16 qos_sqs_size; bool xsk_open; int i; rcu_read_lock(); + qos_sqs = rcu_dereference(c->qos_sqs); + xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); ch_stats->poll++; @@ -133,6 +137,18 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); + if (unlikely(qos_sqs)) { + smp_rmb(); /* Pairs with mlx5e_qos_alloc_queues. */ + qos_sqs_size = READ_ONCE(c->qos_sqs_size); + + for (i = 0; i < qos_sqs_size; i++) { + struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]); + + if (sq) + busy |= mlx5e_poll_tx_cq(&sq->cq, budget); + } + } + busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); if (c->xdp) @@ -186,6 +202,16 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_handle_tx_dim(&c->sq[i]); mlx5e_cq_arm(&c->sq[i].cq); } + if (unlikely(qos_sqs)) { + for (i = 0; i < qos_sqs_size; i++) { + struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]); + + if (sq) { + mlx5e_handle_tx_dim(sq); + mlx5e_cq_arm(&sq->cq); + } + } + } mlx5e_handle_rx_dim(rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c new file mode 100644 index 0000000000000..0777be24a3074 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "qos.h" + +#define MLX5_QOS_DEFAULT_DWRR_UID 0 + +bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev) +{ + if (!MLX5_CAP_GEN(mdev, qos)) + return false; + if (!MLX5_CAP_QOS(mdev, nic_sq_scheduling)) + return false; + if (!MLX5_CAP_QOS(mdev, nic_bw_share)) + return false; + if (!MLX5_CAP_QOS(mdev, nic_rate_limit)) + return false; + return true; +} + +int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev) +{ + return 1 << MLX5_CAP_QOS(mdev, log_max_qos_nic_queue_group); +} + +int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw); + + return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, + sched_ctx, id); +} + +int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + void *attr; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw); + + attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); + + return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, + sched_ctx, id); +} + +int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id) +{ + return mlx5_qos_create_inner_node(mdev, MLX5_QOS_DEFAULT_DWRR_UID, 0, 0, id); +} + +int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 id) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + u32 bitmask = 0; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw); + + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + + return mlx5_modify_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, + sched_ctx, id, bitmask); +} + +int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id) +{ + return mlx5_destroy_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, id); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/qos.h new file mode 100644 index 0000000000000..125e4e47e6f71 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_QOS_H +#define __MLX5_QOS_H + +#include "mlx5_core.h" + +#define MLX5_DEBUG_QOS_MASK BIT(4) + +#define qos_err(mdev, fmt, ...) \ + mlx5_core_err(mdev, "QoS: " fmt, ##__VA_ARGS__) +#define qos_warn(mdev, fmt, ...) \ + mlx5_core_warn(mdev, "QoS: " fmt, ##__VA_ARGS__) +#define qos_dbg(mdev, fmt, ...) \ + mlx5_core_dbg_mask(mdev, MLX5_DEBUG_QOS_MASK, "QoS: " fmt, ##__VA_ARGS__) + +bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev); +int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev); + +int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id); +int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id); +int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id); +int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id, u32 bw_share, + u32 max_avg_bw, u32 id); +int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id); + +#endif diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 823411e288c01..71ae6aac34109 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -842,11 +842,16 @@ struct mlx5_ifc_qos_cap_bits { u8 reserved_at_4[0x1]; u8 packet_pacing_burst_bound[0x1]; u8 packet_pacing_typical_size[0x1]; - u8 reserved_at_7[0x4]; + u8 reserved_at_7[0x1]; + u8 nic_sq_scheduling[0x1]; + u8 nic_bw_share[0x1]; + u8 nic_rate_limit[0x1]; u8 packet_pacing_uid[0x1]; u8 reserved_at_c[0x14]; - u8 reserved_at_20[0x20]; + u8 reserved_at_20[0xb]; + u8 log_max_qos_nic_queue_group[0x5]; + u8 reserved_at_30[0x10]; u8 packet_pacing_max_rate[0x20]; @@ -3347,7 +3352,7 @@ struct mlx5_ifc_sqc_bits { u8 reserved_at_e0[0x10]; u8 packet_pacing_rate_limit_index[0x10]; u8 tis_lst_sz[0x10]; - u8 reserved_at_110[0x10]; + u8 qos_queue_group_id[0x10]; u8 reserved_at_120[0x40]; @@ -3362,6 +3367,7 @@ enum { SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2, SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, + SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP = 0x4, }; enum { @@ -4805,6 +4811,7 @@ struct mlx5_ifc_query_scheduling_element_out_bits { enum { SCHEDULING_HIERARCHY_E_SWITCH = 0x2, + SCHEDULING_HIERARCHY_NIC = 0x3, }; struct mlx5_ifc_query_scheduling_element_in_bits { -- GitLab From 321f7ab0d45899fe0313139822d69c2d5adbb760 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 21 Jan 2021 15:10:23 +0200 Subject: [PATCH 1320/2012] mlxsw: Register physical ports as a devlink resource The switch ASIC has a limited capacity of physical ('flavour physical' in devlink terminology) ports that it can support. While each system is brought up with a different number of ports, this number can be increased via splitting up to the ASIC's limit. Expose physical ports as a devlink resource so that user space will have visibility to the maximum number of ports that can be supported and the current occupancy. In addition, add a "Generic Resources" section in devlink-resource documentation so the different drivers will be aligned by the same resource name when exposing to user space. Signed-off-by: Danielle Ratson Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../networking/devlink/devlink-resource.rst | 14 ++++ drivers/net/ethernet/mellanox/mlxsw/core.c | 77 ++++++++++++++++--- drivers/net/ethernet/mellanox/mlxsw/core.h | 5 ++ .../net/ethernet/mellanox/mlxsw/spectrum.h | 2 +- include/net/devlink.h | 2 + net/core/devlink.c | 4 + 6 files changed, 93 insertions(+), 11 deletions(-) diff --git a/Documentation/networking/devlink/devlink-resource.rst b/Documentation/networking/devlink/devlink-resource.rst index 93e92d2f07527..3d5ae51e65a2d 100644 --- a/Documentation/networking/devlink/devlink-resource.rst +++ b/Documentation/networking/devlink/devlink-resource.rst @@ -23,6 +23,20 @@ current size and related sub resources. To access a sub resource, you specify the path of the resource. For example ``/IPv4/fib`` is the id for the ``fib`` sub-resource under the ``IPv4`` resource. +Generic Resources +================= + +Generic resources are used to describe resources that can be shared by multiple +device drivers and their description must be added to the following table: + +.. list-table:: List of Generic Resources + :widths: 10 90 + + * - Name + - Description + * - ``physical_ports`` + - A limited capacity of physical ports that the switch ASIC can support + example usage ------------- diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 685037e052af2..52fdc34251baf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -84,6 +84,7 @@ struct mlxsw_core { struct mlxsw_thermal *thermal; struct mlxsw_core_port *ports; unsigned int max_ports; + atomic_t active_ports_count; bool fw_flash_in_progress; struct { struct devlink_health_reporter *fw_fatal; @@ -96,8 +97,36 @@ struct mlxsw_core { #define MLXSW_PORT_MAX_PORTS_DEFAULT 0x40 -static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core) +static u64 mlxsw_ports_occ_get(void *priv) { + struct mlxsw_core *mlxsw_core = priv; + + return atomic_read(&mlxsw_core->active_ports_count); +} + +static int mlxsw_core_resources_ports_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params ports_num_params; + u32 max_ports; + + max_ports = mlxsw_core->max_ports - 1; + devlink_resource_size_params_init(&ports_num_params, max_ports, + max_ports, 1, + DEVLINK_RESOURCE_UNIT_ENTRY); + + return devlink_resource_register(devlink, + DEVLINK_RESOURCE_GENERIC_NAME_PORTS, + max_ports, MLXSW_CORE_RESOURCE_PORTS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &ports_num_params); +} + +static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core, bool reload) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + int err; + /* Switch ports are numbered from 1 to queried value */ if (MLXSW_CORE_RES_VALID(mlxsw_core, MAX_SYSTEM_PORT)) mlxsw_core->max_ports = MLXSW_CORE_RES_GET(mlxsw_core, @@ -110,11 +139,30 @@ static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core) if (!mlxsw_core->ports) return -ENOMEM; + if (!reload) { + err = mlxsw_core_resources_ports_register(mlxsw_core); + if (err) + goto err_resources_ports_register; + } + atomic_set(&mlxsw_core->active_ports_count, 0); + devlink_resource_occ_get_register(devlink, MLXSW_CORE_RESOURCE_PORTS, + mlxsw_ports_occ_get, mlxsw_core); + return 0; + +err_resources_ports_register: + kfree(mlxsw_core->ports); + return err; } -static void mlxsw_ports_fini(struct mlxsw_core *mlxsw_core) +static void mlxsw_ports_fini(struct mlxsw_core *mlxsw_core, bool reload) { + struct devlink *devlink = priv_to_devlink(mlxsw_core); + + devlink_resource_occ_get_unregister(devlink, MLXSW_CORE_RESOURCE_PORTS); + if (!reload) + devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL); + kfree(mlxsw_core->ports); } @@ -1897,7 +1945,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_register_resources; } - err = mlxsw_ports_init(mlxsw_core); + err = mlxsw_ports_init(mlxsw_core, reload); if (err) goto err_ports_init; @@ -1986,7 +2034,7 @@ err_devlink_register: err_emad_init: kfree(mlxsw_core->lag.mapping); err_alloc_lag_mapping: - mlxsw_ports_fini(mlxsw_core); + mlxsw_ports_fini(mlxsw_core, reload); err_ports_init: if (!reload) devlink_resources_unregister(devlink, NULL); @@ -2056,7 +2104,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, devlink_unregister(devlink); mlxsw_emad_fini(mlxsw_core); kfree(mlxsw_core->lag.mapping); - mlxsw_ports_fini(mlxsw_core); + mlxsw_ports_fini(mlxsw_core, reload); if (!reload) devlink_resources_unregister(devlink, NULL); mlxsw_core->bus->fini(mlxsw_core->bus_priv); @@ -2755,16 +2803,25 @@ int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port, const unsigned char *switch_id, unsigned char switch_id_len) { - return __mlxsw_core_port_init(mlxsw_core, local_port, - DEVLINK_PORT_FLAVOUR_PHYSICAL, - port_number, split, split_port_subnumber, - splittable, lanes, - switch_id, switch_id_len); + int err; + + err = __mlxsw_core_port_init(mlxsw_core, local_port, + DEVLINK_PORT_FLAVOUR_PHYSICAL, + port_number, split, split_port_subnumber, + splittable, lanes, + switch_id, switch_id_len); + if (err) + return err; + + atomic_inc(&mlxsw_core->active_ports_count); + return 0; } EXPORT_SYMBOL(mlxsw_core_port_init); void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port) { + atomic_dec(&mlxsw_core->active_ports_count); + __mlxsw_core_port_fini(mlxsw_core, local_port); } EXPORT_SYMBOL(mlxsw_core_port_fini); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 6b3ccbf6b238d..8af7d9d03475e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -19,6 +19,11 @@ #include "cmd.h" #include "resources.h" +enum mlxsw_core_resource_id { + MLXSW_CORE_RESOURCE_PORTS = 1, + MLXSW_CORE_RESOURCE_MAX, +}; + struct mlxsw_core; struct mlxsw_core_port; struct mlxsw_driver; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a6956cfc9cb12..a3769f95a1828 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -52,7 +52,7 @@ #define MLXSW_SP_RESOURCE_NAME_COUNTERS_RIF "rif" enum mlxsw_sp_resource_id { - MLXSW_SP_RESOURCE_KVD = 1, + MLXSW_SP_RESOURCE_KVD = MLXSW_CORE_RESOURCE_MAX, MLXSW_SP_RESOURCE_KVD_LINEAR, MLXSW_SP_RESOURCE_KVD_HASH_SINGLE, MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE, diff --git a/include/net/devlink.h b/include/net/devlink.h index f466819cc4771..d12ed2854c34b 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -380,6 +380,8 @@ struct devlink_resource { #define DEVLINK_RESOURCE_ID_PARENT_TOP 0 +#define DEVLINK_RESOURCE_GENERIC_NAME_PORTS "physical_ports" + #define __DEVLINK_PARAM_MAX_STRING_VALUE 32 enum devlink_param_type { DEVLINK_PARAM_TYPE_U8, diff --git a/net/core/devlink.c b/net/core/devlink.c index 738d4344d6799..72ea798797627 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -8617,6 +8617,10 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); * @resource_id: resource's id * @parent_resource_id: resource's parent id * @size_params: size parameters + * + * Generic resources should reuse the same names across drivers. + * Please see the generic resources list at: + * Documentation/networking/devlink/devlink-resource.rst */ int devlink_resource_register(struct devlink *devlink, const char *resource_name, -- GitLab From 5154b1b826d9b3e4365e0ca8c864ba28cd305041 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 21 Jan 2021 15:10:24 +0200 Subject: [PATCH 1321/2012] selftests: mlxsw: Add a scale test for physical ports Query the maximum number of supported physical ports using devlink-resource and test that this number can be reached by splitting each of the splittable ports to its width. Test that an error is returned in case the maximum number is exceeded. Signed-off-by: Danielle Ratson Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/mlxsw/port_scale.sh | 64 +++++++++++++++++++ .../net/mlxsw/spectrum-2/port_scale.sh | 16 +++++ .../net/mlxsw/spectrum-2/resource_scale.sh | 2 +- .../drivers/net/mlxsw/spectrum/port_scale.sh | 16 +++++ .../net/mlxsw/spectrum/resource_scale.sh | 2 +- 5 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/drivers/net/mlxsw/port_scale.sh create mode 100644 tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh create mode 100644 tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh new file mode 100644 index 0000000000000..f813ffefc07ec --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test for physical ports resource. The test splits each splittable port +# to its width and checks that eventually the number of physical ports equals +# the maximum number of physical ports. + +PORT_NUM_NETIFS=0 + +port_setup_prepare() +{ + : +} + +port_cleanup() +{ + pre_cleanup + + for port in "${unsplit[@]}"; do + devlink port unsplit $port + check_err $? "Did not unsplit $netdev" + done +} + +split_all_ports() +{ + local should_fail=$1; shift + local -a unsplit + + # Loop over the splittable netdevs and create tuples of netdev along + # with its width. For example: + # '$netdev1 $count1 $netdev2 $count2...', when: + # $netdev1-2 are splittable netdevs in the device, and + # $count1-2 are the netdevs width respectively. + while read netdev count <<<$( + devlink -j port show | + jq -r '.[][] | select(.splittable==true) | "\(.netdev) \(.lanes)"' + ) + [[ ! -z $netdev ]] + do + devlink port split $netdev count $count + check_err $? "Did not split $netdev into $count" + unsplit+=( "${netdev}s0" ) + done +} + +port_test() +{ + local max_ports=$1; shift + local should_fail=$1; shift + + split_all_ports $should_fail + + occ=$(devlink -j resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="physical_ports") |.["occ"]') + + [[ $occ -eq $max_ports ]] + if [[ $should_fail -eq 0 ]]; then + check_err $? "Mismatch ports number: Expected $max_ports, got $occ." + else + check_err_fail $should_fail $? "Reached more ports than expected" + fi + +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh new file mode 100644 index 0000000000000..0b71dfbbb4471 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../port_scale.sh + +port_get_target() +{ + local should_fail=$1 + local target + + target=$(devlink_resource_size_get physical_ports) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index d7cf33a3f18dc..4a1c9328555ff 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -28,7 +28,7 @@ cleanup() trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police" +ALL_TESTS="router tc_flower mirror_gre tc_police port" for current_test in ${TESTS:-$ALL_TESTS}; do source ${current_test}_scale.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh new file mode 100644 index 0000000000000..0b71dfbbb4471 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../port_scale.sh + +port_get_target() +{ + local should_fail=$1 + local target + + target=$(devlink_resource_size_get physical_ports) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index 43f662401bc30..087a884f66cdf 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -22,7 +22,7 @@ cleanup() devlink_sp_read_kvd_defaults trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police" +ALL_TESTS="router tc_flower mirror_gre tc_police port" for current_test in ${TESTS:-$ALL_TESTS}; do source ${current_test}_scale.sh -- GitLab From 214a5ea081e77346e4963dd6d20c5539ff8b6ae6 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 21 Jan 2021 08:22:48 +1000 Subject: [PATCH 1322/2012] cifs: do not fail __smb_send_rqst if non-fatal signals are pending RHBZ 1848178 The original intent of returning an error in this function in the patch: "CIFS: Mask off signals when sending SMB packets" was to avoid interrupting packet send in the middle of sending the data (and thus breaking an SMB connection), but we also don't want to fail the request for non-fatal signals even before we have had a chance to try to send it (the reported problem could be reproduced e.g. by exiting a child process when the parent process was in the midst of calling futimens to update a file's timestamps). In addition, since the signal may remain pending when we enter the sending loop, we may end up not sending the whole packet before TCP buffers become full. In this case the code returns -EINTR but what we need here is to return -ERESTARTSYS instead to allow system calls to be restarted. Fixes: b30c74c73c78 ("CIFS: Mask off signals when sending SMB packets") Cc: stable@vger.kernel.org # v5.1+ Signed-off-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index e9abb41aa89bc..95ef26b555b91 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -338,7 +338,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, if (ssocket == NULL) return -EAGAIN; - if (signal_pending(current)) { + if (fatal_signal_pending(current)) { cifs_dbg(FYI, "signal pending before send request\n"); return -ERESTARTSYS; } @@ -429,7 +429,7 @@ unmask: if (signal_pending(current) && (total_len != send_length)) { cifs_dbg(FYI, "signal is pending after attempt to send\n"); - rc = -EINTR; + rc = -ERESTARTSYS; } /* uncork it */ -- GitLab From 23b53d4417426edc7c3078e1c1530c242e496c1e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 23 Jan 2021 16:57:30 +0100 Subject: [PATCH 1323/2012] ALSA: pcm: One more dependency for hw constraints The fix for a long-standing USB-audio bug required one more dependency variable to be added to the hw constraints. Unfortunately I didn't realize at debugging that the new addition may result in the overflow of the dependency array of each snd_pcm_hw_rule (up to three plus a sentinel), because USB-audio driver adds one more dependency only for a certain device and bus, hence it works as is for many devices. But in a bad case, a simple open always results in -EINVAL (with kernel WARNING if CONFIG_SND_DEBUG is set) no matter what is passed. Since the dependencies are real and unavoidable (USB-audio restricts the hw_params per looping over the format/rate/channels combos), the only good solution seems to raise the bar for one more dependency for snd_pcm_hw_rule -- so does this patch: now the hw constraint dependencies can be up to four. Fixes: 506c203cc3de ("ALSA: usb-audio: Fix hw constraints dependencies") Reported-by: Jamie Heilman Link: https://lore.kernel.org/r/20210123155730.22576-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/sound/pcm.h | 2 +- sound/core/pcm_native.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 2336bf9243e18..2e1200d17d0cb 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -229,7 +229,7 @@ typedef int (*snd_pcm_hw_rule_func_t)(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule { unsigned int cond; int var; - int deps[4]; + int deps[5]; snd_pcm_hw_rule_func_t func; void *private; diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 9f3f8e953ff04..c4aac703dc224 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -382,8 +382,8 @@ retry: continue; /* - * The 'deps' array includes maximum three dependencies - * to SNDRV_PCM_HW_PARAM_XXXs for this rule. The fourth + * The 'deps' array includes maximum four dependencies + * to SNDRV_PCM_HW_PARAM_XXXs for this rule. The fifth * member of this array is a sentinel and should be * negative value. * -- GitLab From fe773b8711e3be4190994ea54bf7a5a0564245a1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 23 Jan 2021 16:58:42 +0100 Subject: [PATCH 1324/2012] ALSA: usb-audio: workaround for iface reset issue The recently introduced sample rate validation code seems causing a problem on some devices; namely, after performing this, the bus gets screwed and it influences even on other USB devices. As a quick workaround, perform it only for the necessary devices; currently MOTU devices are known to need the valid altset checks, so filter out other devices. Fixes: 93db51d06b32 ("ALSA: usb-audio: Check valid altsetting at parsing rates for UAC2/3") Reported-by: Jamie Heilman BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1178203 Link: https://lore.kernel.org/r/20210123155842.22652-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/format.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/usb/format.c b/sound/usb/format.c index 9ebc5d202c873..e6ff317a67852 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -466,6 +466,17 @@ static int validate_sample_rate_table_v2v3(struct snd_usb_audio *chip, unsigned int nr_rates; int i, err; + /* performing the rate verification may lead to unexpected USB bus + * behavior afterwards by some unknown reason. Do this only for the + * known devices. + */ + switch (USB_ID_VENDOR(chip->usb_id)) { + case 0x07fd: /* MOTU */ + break; + default: + return 0; /* don't perform the validation as default */ + } + table = kcalloc(fp->nr_rates, sizeof(*table), GFP_KERNEL); if (!table) return -ENOMEM; -- GitLab From dc090de854b9d7fdbc6f4df70bd7fc1b43eeccf8 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 21 Jan 2021 21:40:36 +0100 Subject: [PATCH 1325/2012] net: mrp: fix definitions of MRP test packets Wireshark says that the MRP test packets cannot be decoded - and the reason for that is that there's a two-byte hole filled with garbage between the "transitions" and "timestamp" members. So Wireshark decodes the two garbage bytes and the top two bytes of the timestamp written by the kernel as the timestamp value (which thus fluctuates wildly), and interprets the lower two bytes of the timestamp as a new (type, length) pair, which is of course broken. Even though this makes the timestamp field in the struct unaligned, it actually makes it end up on a 32 bit boundary in the frame as mandated by the standard, since it is preceded by a two byte TLV header. The struct definitions live under include/uapi/, but they are not really part of any kernel<->userspace API/ABI, so fixing the definitions by adding the packed attribute should not cause any compatibility issues. Signed-off-by: Rasmus Villemoes Reviewed-by: Horatiu Vultur Signed-off-by: Jakub Kicinski --- include/uapi/linux/mrp_bridge.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h index 9744773de5ffa..5ce771327e326 100644 --- a/include/uapi/linux/mrp_bridge.h +++ b/include/uapi/linux/mrp_bridge.h @@ -97,7 +97,7 @@ struct br_mrp_ring_test_hdr { __be16 state; __be16 transitions; __be32 timestamp; -}; +} __attribute__((__packed__)); struct br_mrp_ring_topo_hdr { __be16 prio; @@ -142,7 +142,7 @@ struct br_mrp_in_test_hdr { __be16 state; __be16 transitions; __be32 timestamp; -}; +} __attribute__((__packed__)); struct br_mrp_in_topo_hdr { __u8 sa[ETH_ALEN]; -- GitLab From 6781939054a1a161e06e7a7955a4846be770a711 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 21 Jan 2021 21:40:37 +0100 Subject: [PATCH 1326/2012] net: mrp: move struct definitions out of uapi None of these are actually used in the kernel/userspace interface - there's a userspace component of implementing MRP, and userspace will need to construct certain frames to put on the wire, but there's no reason the kernel should provide the relevant definitions in a UAPI header. In fact, some of those definitions were broken until previous commit, so only keep the few that are actually referenced in the kernel code, and move them to the br_private_mrp.h header. Signed-off-by: Rasmus Villemoes Signed-off-by: Jakub Kicinski --- include/uapi/linux/mrp_bridge.h | 86 --------------------------------- net/bridge/br_private_mrp.h | 29 +++++++++++ 2 files changed, 29 insertions(+), 86 deletions(-) diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h index 5ce771327e326..bd4424de56ff5 100644 --- a/include/uapi/linux/mrp_bridge.h +++ b/include/uapi/linux/mrp_bridge.h @@ -71,90 +71,4 @@ enum br_mrp_sub_tlv_header_type { BR_MRP_SUB_TLV_HEADER_TEST_AUTO_MGR = 0x3, }; -struct br_mrp_tlv_hdr { - __u8 type; - __u8 length; -}; - -struct br_mrp_sub_tlv_hdr { - __u8 type; - __u8 length; -}; - -struct br_mrp_end_hdr { - struct br_mrp_tlv_hdr hdr; -}; - -struct br_mrp_common_hdr { - __be16 seq_id; - __u8 domain[MRP_DOMAIN_UUID_LENGTH]; -}; - -struct br_mrp_ring_test_hdr { - __be16 prio; - __u8 sa[ETH_ALEN]; - __be16 port_role; - __be16 state; - __be16 transitions; - __be32 timestamp; -} __attribute__((__packed__)); - -struct br_mrp_ring_topo_hdr { - __be16 prio; - __u8 sa[ETH_ALEN]; - __be16 interval; -}; - -struct br_mrp_ring_link_hdr { - __u8 sa[ETH_ALEN]; - __be16 port_role; - __be16 interval; - __be16 blocked; -}; - -struct br_mrp_sub_opt_hdr { - __u8 type; - __u8 manufacture_data[MRP_MANUFACTURE_DATA_LENGTH]; -}; - -struct br_mrp_test_mgr_nack_hdr { - __be16 prio; - __u8 sa[ETH_ALEN]; - __be16 other_prio; - __u8 other_sa[ETH_ALEN]; -}; - -struct br_mrp_test_prop_hdr { - __be16 prio; - __u8 sa[ETH_ALEN]; - __be16 other_prio; - __u8 other_sa[ETH_ALEN]; -}; - -struct br_mrp_oui_hdr { - __u8 oui[MRP_OUI_LENGTH]; -}; - -struct br_mrp_in_test_hdr { - __be16 id; - __u8 sa[ETH_ALEN]; - __be16 port_role; - __be16 state; - __be16 transitions; - __be32 timestamp; -} __attribute__((__packed__)); - -struct br_mrp_in_topo_hdr { - __u8 sa[ETH_ALEN]; - __be16 id; - __be16 interval; -}; - -struct br_mrp_in_link_hdr { - __u8 sa[ETH_ALEN]; - __be16 port_role; - __be16 id; - __be16 interval; -}; - #endif diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h index 1883118aae55b..32a48e5418dac 100644 --- a/net/bridge/br_private_mrp.h +++ b/net/bridge/br_private_mrp.h @@ -88,4 +88,33 @@ int br_mrp_switchdev_send_in_test(struct net_bridge *br, struct br_mrp *mrp, int br_mrp_ring_port_open(struct net_device *dev, u8 loc); int br_mrp_in_port_open(struct net_device *dev, u8 loc); +/* MRP protocol data units */ +struct br_mrp_tlv_hdr { + __u8 type; + __u8 length; +}; + +struct br_mrp_common_hdr { + __be16 seq_id; + __u8 domain[MRP_DOMAIN_UUID_LENGTH]; +}; + +struct br_mrp_ring_test_hdr { + __be16 prio; + __u8 sa[ETH_ALEN]; + __be16 port_role; + __be16 state; + __be16 transitions; + __be32 timestamp; +} __attribute__((__packed__)); + +struct br_mrp_in_test_hdr { + __be16 id; + __u8 sa[ETH_ALEN]; + __be16 port_role; + __be16 state; + __be16 transitions; + __be32 timestamp; +} __attribute__((__packed__)); + #endif /* _BR_PRIVATE_MRP_H */ -- GitLab From c80c4a1ea47f354584c8055015561c4f1ece8f7a Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 21 Jan 2021 05:48:17 -0600 Subject: [PATCH 1327/2012] net: ipa: count actual work done in gsi_channel_poll() There is an off-by-one problem in gsi_channel_poll(). The count of transactions completed is incremented each time through the loop *before* determining whether there is any more work to do. As a result, if we exit the loop early the counter its value is one more than the number of transactions actually processed. Instead, increment the count after processing, to ensure it reflects the number of processed transactions. The result is more naturally described as a for loop rather than a while loop, so change that. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 5b29f7d9d6ac1..56a5eb61b20c4 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -1543,13 +1543,12 @@ static struct gsi_trans *gsi_channel_poll_one(struct gsi_channel *channel) static int gsi_channel_poll(struct napi_struct *napi, int budget) { struct gsi_channel *channel; - int count = 0; + int count; channel = container_of(napi, struct gsi_channel, napi); - while (count < budget) { + for (count = 0; count < budget; count++) { struct gsi_trans *trans; - count++; trans = gsi_channel_poll_one(channel); if (!trans) break; -- GitLab From 148604e7eafb2f6af275d60b9ab27e7a9622e93f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 21 Jan 2021 05:48:18 -0600 Subject: [PATCH 1328/2012] net: ipa: heed napi_complete() return value Pay attention to the return value of napi_complete(), completing polling only if it returns true. Just use napi rather than &channel->napi as the argument passed to napi_complete(). Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 56a5eb61b20c4..634f514e861e7 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -1555,10 +1555,8 @@ static int gsi_channel_poll(struct napi_struct *napi, int budget) gsi_trans_complete(trans); } - if (count < budget) { - napi_complete(&channel->napi); + if (count < budget && napi_complete(napi)) gsi_irq_ieob_enable(channel->gsi, channel->evt_ring_id); - } return count; } -- GitLab From 223f5b34b409828b2f9a15d5e4ec0da0563d17ec Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 21 Jan 2021 05:48:19 -0600 Subject: [PATCH 1329/2012] net: ipa: have gsi_channel_update() return a value Have gsi_channel_update() return the first transaction in the updated completed transaction list, or NULL if no new transactions have been added. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 634f514e861e7..6e5817e16c0f6 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -1452,7 +1452,7 @@ void gsi_channel_doorbell(struct gsi_channel *channel) } /* Consult hardware, move any newly completed transactions to completed list */ -static void gsi_channel_update(struct gsi_channel *channel) +static struct gsi_trans *gsi_channel_update(struct gsi_channel *channel) { u32 evt_ring_id = channel->evt_ring_id; struct gsi *gsi = channel->gsi; @@ -1471,7 +1471,7 @@ static void gsi_channel_update(struct gsi_channel *channel) offset = GSI_EV_CH_E_CNTXT_4_OFFSET(evt_ring_id); index = gsi_ring_index(ring, ioread32(gsi->virt + offset)); if (index == ring->index % ring->count) - return; + return NULL; /* Get the transaction for the latest completed event. Take a * reference to keep it from completing before we give the events @@ -1496,6 +1496,8 @@ static void gsi_channel_update(struct gsi_channel *channel) gsi_evt_ring_doorbell(channel->gsi, channel->evt_ring_id, index); gsi_trans_free(trans); + + return gsi_channel_trans_complete(channel); } /** @@ -1516,11 +1518,8 @@ static struct gsi_trans *gsi_channel_poll_one(struct gsi_channel *channel) /* Get the first transaction from the completed list */ trans = gsi_channel_trans_complete(channel); - if (!trans) { - /* List is empty; see if there's more to do */ - gsi_channel_update(channel); - trans = gsi_channel_trans_complete(channel); - } + if (!trans) /* List is empty; see if there's more to do */ + trans = gsi_channel_update(channel); if (trans) gsi_trans_move_polled(trans); -- GitLab From 5725593e6f182607993364f56ab1c0468d68016f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 21 Jan 2021 05:48:20 -0600 Subject: [PATCH 1330/2012] net: ipa: repurpose gsi_irq_ieob_disable() Rename gsi_irq_ieob_disable() to be gsi_irq_ieob_disable_one(). Introduce a new function gsi_irq_ieob_disable() that takes a mask of events to disable rather than a single event id. This will be used in the next patch. Rename gsi_irq_ieob_enable() to be gsi_irq_ieob_enable_one() to be consistent. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 6e5817e16c0f6..0391f5a207c9f 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -272,7 +272,7 @@ static void gsi_irq_ch_ctrl_disable(struct gsi *gsi) iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); } -static void gsi_irq_ieob_enable(struct gsi *gsi, u32 evt_ring_id) +static void gsi_irq_ieob_enable_one(struct gsi *gsi, u32 evt_ring_id) { bool enable_ieob = !gsi->ieob_enabled_bitmap; u32 val; @@ -286,11 +286,11 @@ static void gsi_irq_ieob_enable(struct gsi *gsi, u32 evt_ring_id) gsi_irq_type_enable(gsi, GSI_IEOB); } -static void gsi_irq_ieob_disable(struct gsi *gsi, u32 evt_ring_id) +static void gsi_irq_ieob_disable(struct gsi *gsi, u32 event_mask) { u32 val; - gsi->ieob_enabled_bitmap &= ~BIT(evt_ring_id); + gsi->ieob_enabled_bitmap &= ~event_mask; /* Disable the interrupt type if this was the last enabled channel */ if (!gsi->ieob_enabled_bitmap) @@ -300,6 +300,11 @@ static void gsi_irq_ieob_disable(struct gsi *gsi, u32 evt_ring_id) iowrite32(val, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET); } +static void gsi_irq_ieob_disable_one(struct gsi *gsi, u32 evt_ring_id) +{ + gsi_irq_ieob_disable(gsi, BIT(evt_ring_id)); +} + /* Enable all GSI_interrupt types */ static void gsi_irq_enable(struct gsi *gsi) { @@ -766,13 +771,13 @@ static void gsi_channel_freeze(struct gsi_channel *channel) napi_disable(&channel->napi); - gsi_irq_ieob_disable(channel->gsi, channel->evt_ring_id); + gsi_irq_ieob_disable_one(channel->gsi, channel->evt_ring_id); } /* Allow transactions to be used on the channel again. */ static void gsi_channel_thaw(struct gsi_channel *channel) { - gsi_irq_ieob_enable(channel->gsi, channel->evt_ring_id); + gsi_irq_ieob_enable_one(channel->gsi, channel->evt_ring_id); napi_enable(&channel->napi); } @@ -1207,7 +1212,7 @@ static void gsi_isr_ieob(struct gsi *gsi) event_mask ^= BIT(evt_ring_id); - gsi_irq_ieob_disable(gsi, evt_ring_id); + gsi_irq_ieob_disable_one(gsi, evt_ring_id); napi_schedule(&gsi->evt_ring[evt_ring_id].channel->napi); } } @@ -1555,7 +1560,7 @@ static int gsi_channel_poll(struct napi_struct *napi, int budget) } if (count < budget && napi_complete(napi)) - gsi_irq_ieob_enable(channel->gsi, channel->evt_ring_id); + gsi_irq_ieob_enable_one(channel->gsi, channel->evt_ring_id); return count; } -- GitLab From 7bd9785f683a7dafd8ea59a863a614da685d92f7 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 21 Jan 2021 05:48:21 -0600 Subject: [PATCH 1331/2012] net: ipa: disable IEOB interrupts before clearing Currently in gsi_isr_ieob(), event ring IEOB interrupts are disabled one at a time. The loop disables the IEOB interrupt for all event rings represented in the event mask. Instead, just disable them all at once. Disable them all *before* clearing the interrupt condition. This guarantees we'll schedule NAPI for each event once, before another IEOB interrupt could be signaled. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 0391f5a207c9f..f79cf3c327c1c 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -1205,6 +1205,7 @@ static void gsi_isr_ieob(struct gsi *gsi) u32 event_mask; event_mask = ioread32(gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_OFFSET); + gsi_irq_ieob_disable(gsi, event_mask); iowrite32(event_mask, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_CLR_OFFSET); while (event_mask) { @@ -1212,7 +1213,6 @@ static void gsi_isr_ieob(struct gsi *gsi) event_mask ^= BIT(evt_ring_id); - gsi_irq_ieob_disable_one(gsi, evt_ring_id); napi_schedule(&gsi->evt_ring[evt_ring_id].channel->napi); } } -- GitLab From cf3c46631e1637582f517a574c77cd6c05793817 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 21 Jan 2021 04:33:43 -0800 Subject: [PATCH 1332/2012] net: dsa: bcm_sf2: put device node before return Put the device node dn before return error code on failure path. Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus") Signed-off-by: Pan Bian Link: https://lore.kernel.org/r/20210121123343.26330-1-bianpan2016@163.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/bcm_sf2.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 1e9a0adda2d69..445226720ff29 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -509,15 +509,19 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) /* Find our integrated MDIO bus node */ dn = of_find_compatible_node(NULL, NULL, "brcm,unimac-mdio"); priv->master_mii_bus = of_mdio_find_bus(dn); - if (!priv->master_mii_bus) + if (!priv->master_mii_bus) { + of_node_put(dn); return -EPROBE_DEFER; + } get_device(&priv->master_mii_bus->dev); priv->master_mii_dn = dn; priv->slave_mii_bus = devm_mdiobus_alloc(ds->dev); - if (!priv->slave_mii_bus) + if (!priv->slave_mii_bus) { + of_node_put(dn); return -ENOMEM; + } priv->slave_mii_bus->priv = priv; priv->slave_mii_bus->name = "sf2 slave mii"; -- GitLab From b6011966ac6f402847eb5326beee8da3a80405c7 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 21 Jan 2021 06:57:38 -0800 Subject: [PATCH 1333/2012] chtls: Fix potential resource leak The dst entry should be released if no neighbour is found. Goto label free_dst to fix the issue. Besides, the check of ndev against NULL is redundant. Signed-off-by: Pan Bian Link: https://lore.kernel.org/r/20210121145738.51091-1-bianpan2016@163.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index e5cfbe196ba66..19dc7dc054a29 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1158,11 +1158,9 @@ static struct sock *chtls_recv_sock(struct sock *lsk, #endif } if (!n || !n->dev) - goto free_sk; + goto free_dst; ndev = n->dev; - if (!ndev) - goto free_dst; if (is_vlan_dev(ndev)) ndev = vlan_dev_real_dev(ndev); @@ -1250,7 +1248,8 @@ static struct sock *chtls_recv_sock(struct sock *lsk, free_csk: chtls_sock_release(&csk->kref); free_dst: - neigh_release(n); + if (n) + neigh_release(n); dst_release(dst); free_sk: inet_csk_prepare_forced_close(newsk); -- GitLab From 57ba00774b5bf5dd3a90725b1d8e619344e2d5ed Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 20 Jan 2021 20:06:00 -0800 Subject: [PATCH 1334/2012] net: dsa: mv88e6xxx: Remove bogus Kconfig dependency. The mv88e6xxx is a DSA driver, and it implements DSA style time stamping of PTP frames. It has no need of the expensive option to enable PHY time stamping. Remove the bogus dependency. Signed-off-by: Richard Cochran Acked-by: Brandon Streiff Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig index 51185e4d7d15e..b17540926c116 100644 --- a/drivers/net/dsa/mv88e6xxx/Kconfig +++ b/drivers/net/dsa/mv88e6xxx/Kconfig @@ -25,7 +25,6 @@ config NET_DSA_MV88E6XXX_PTP default n depends on NET_DSA_MV88E6XXX_GLOBAL2 depends on PTP_1588_CLOCK - imply NETWORK_PHY_TIMESTAMPING help Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch chips that support it. -- GitLab From 04cbb740ce1bbaca1c2681cf2de4d10fffded87f Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 20 Jan 2021 20:06:01 -0800 Subject: [PATCH 1335/2012] net: mvpp2: Remove unneeded Kconfig dependency. The mvpp2 is an Ethernet driver, and it implements MAC style time stamping of PTP frames. It has no need of the expensive option to enable PHY time stamping. Remove the incorrect dependency. Signed-off-by: Richard Cochran Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig index 41815b6095698..7fe15a3286f4a 100644 --- a/drivers/net/ethernet/marvell/Kconfig +++ b/drivers/net/ethernet/marvell/Kconfig @@ -94,7 +94,6 @@ config MVPP2 config MVPP2_PTP bool "Marvell Armada 8K Enable PTP support" - depends on NETWORK_PHY_TIMESTAMPING depends on (PTP_1588_CLOCK = y && MVPP2 = y) || \ (PTP_1588_CLOCK && MVPP2 = m) -- GitLab From fc024c5c07aa2463d36e8c85943343741ba356b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 21 Jan 2021 16:02:44 +0100 Subject: [PATCH 1336/2012] doc: networking: ip-sysctl: Document conf/all/disable_ipv6 and conf/default/disable_ipv6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds documentation for sysctl conf/all/disable_ipv6 and conf/default/disable_ipv6 settings which is currently missing. Signed-off-by: Pali Rohár Link: https://lore.kernel.org/r/20210121150244.20483-1-pali@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index dd2b12a32b739..c7b775da95542 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1807,12 +1807,24 @@ seg6_flowlabel - INTEGER ``conf/default/*``: Change the interface-specific default settings. + These settings would be used during creating new interfaces. + ``conf/all/*``: Change all the interface-specific settings. [XXX: Other special features than forwarding?] +conf/all/disable_ipv6 - BOOLEAN + Changing this value is same as changing ``conf/default/disable_ipv6`` + setting and also all per-interface ``disable_ipv6`` settings to the same + value. + + Reading this value does not have any particular meaning. It does not say + whether IPv6 support is enabled or disabled. Returned value can be 1 + also in the case when some interface has ``disable_ipv6`` set to 0 and + has configured IPv6 addresses. + conf/all/forwarding - BOOLEAN Enable global IPv6 forwarding between all interfaces. -- GitLab From d8f923c3ab96dbbb4e3c22d1afc1dc1d3b195cd8 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 21 Jan 2021 07:37:45 -0800 Subject: [PATCH 1337/2012] NFC: fix possible resource leak Put the device to avoid resource leak on path that the polling flag is invalid. Fixes: a831b9132065 ("NFC: Do not return EBUSY when stopping a poll that's already stopped") Signed-off-by: Pan Bian Link: https://lore.kernel.org/r/20210121153745.122184-1-bianpan2016@163.com Signed-off-by: Jakub Kicinski --- net/nfc/netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 573b38ad2f8ed..e161ef2d4720d 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -852,6 +852,7 @@ static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info) if (!dev->polling) { device_unlock(&dev->dev); + nfc_put_device(dev); return -EINVAL; } -- GitLab From 3a30537cee233fb7da302491b28c832247d89bbe Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 21 Jan 2021 07:27:48 -0800 Subject: [PATCH 1338/2012] NFC: fix resource leak when target index is invalid Goto to the label put_dev instead of the label error to fix potential resource leak on path that the target index is invalid. Fixes: c4fbb6515a4d ("NFC: The core part should generate the target index") Signed-off-by: Pan Bian Link: https://lore.kernel.org/r/20210121152748.98409-1-bianpan2016@163.com Signed-off-by: Jakub Kicinski --- net/nfc/rawsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 955c195ae14bc..9c7eb8455ba8e 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -105,7 +105,7 @@ static int rawsock_connect(struct socket *sock, struct sockaddr *_addr, if (addr->target_idx > dev->target_next_idx - 1 || addr->target_idx < dev->target_next_idx - dev->n_targets) { rc = -EINVAL; - goto error; + goto put_dev; } rc = nfc_activate_target(dev, addr->target_idx, addr->nfc_protocol); -- GitLab From 344db93ae3ee69fc137bd6ed89a8ff1bf5b0db08 Mon Sep 17 00:00:00 2001 From: Enke Chen Date: Fri, 22 Jan 2021 11:13:06 -0800 Subject: [PATCH 1339/2012] tcp: make TCP_USER_TIMEOUT accurate for zero window probes The TCP_USER_TIMEOUT is checked by the 0-window probe timer. As the timer has backoff with a max interval of about two minutes, the actual timeout for TCP_USER_TIMEOUT can be off by up to two minutes. In this patch the TCP_USER_TIMEOUT is made more accurate by taking it into account when computing the timer value for the 0-window probes. This patch is similar to and builds on top of the one that made TCP_USER_TIMEOUT accurate for RTOs in commit b701a99e431d ("tcp: Add tcp_clamp_rto_to_user_timeout() helper to improve accuracy"). Fixes: 9721e709fa68 ("tcp: simplify window probe aborting on USER_TIMEOUT") Signed-off-by: Enke Chen Reviewed-by: Neal Cardwell Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20210122191306.GA99540@localhost.localdomain Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 1 + net/ipv4/tcp_input.c | 4 ++-- net/ipv4/tcp_output.c | 2 ++ net/ipv4/tcp_timer.c | 18 ++++++++++++++++++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 78d13c88720fd..ca7e2c6cc663e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -630,6 +630,7 @@ static inline void tcp_clear_xmit_timers(struct sock *sk) unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); unsigned int tcp_current_mss(struct sock *sk); +u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when); /* Bound MSS / TSO packet size with the half of the window */ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a7dfca0a38cd7..55c6fd6b5d139 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3392,8 +3392,8 @@ static void tcp_ack_probe(struct sock *sk) } else { unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX); - tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - when, TCP_RTO_MAX); + when = tcp_clamp_probe0_to_user_timeout(sk, when); + tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, when, TCP_RTO_MAX); } } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ab458697881ed..8478cf749821a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -4099,6 +4099,8 @@ void tcp_send_probe0(struct sock *sk) */ timeout = TCP_RESOURCE_PROBE_INTERVAL; } + + timeout = tcp_clamp_probe0_to_user_timeout(sk, timeout); tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index faa92948441ba..4ef08079ccfa9 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -40,6 +40,24 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining)); } +u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + u32 remaining; + s32 elapsed; + + if (!icsk->icsk_user_timeout || !icsk->icsk_probes_tstamp) + return when; + + elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp; + if (unlikely(elapsed < 0)) + elapsed = 0; + remaining = msecs_to_jiffies(icsk->icsk_user_timeout) - elapsed; + remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN); + + return min_t(u32, remaining, when); +} + /** * tcp_write_err() - close socket and save error info * @sk: The socket the error has appeared on. -- GitLab From 6f1c0ea133a6e4a193a7b285efe209664caeea43 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 22 Jan 2021 18:19:48 +0000 Subject: [PATCH 1340/2012] net: introduce a netdev feature for UDP GRO forwarding Introduce a new netdev feature, NETIF_F_GRO_UDP_FWD, to allow user to turn UDP GRO on and off for forwarding. Defaults to off to not change current datapath. Suggested-by: Paolo Abeni Signed-off-by: Alexander Lobakin Signed-off-by: Jakub Kicinski --- include/linux/netdev_features.h | 4 +++- net/ethtool/common.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 934de56644e7c..c06d6aaba9df2 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -84,6 +84,7 @@ enum { NETIF_F_GRO_FRAGLIST_BIT, /* Fraglist GRO */ NETIF_F_HW_MACSEC_BIT, /* Offload MACsec operations */ + NETIF_F_GRO_UDP_FWD_BIT, /* Allow UDP GRO for forwarding */ /* * Add your fresh new feature above and remember to update @@ -157,6 +158,7 @@ enum { #define NETIF_F_GRO_FRAGLIST __NETIF_F(GRO_FRAGLIST) #define NETIF_F_GSO_FRAGLIST __NETIF_F(GSO_FRAGLIST) #define NETIF_F_HW_MACSEC __NETIF_F(HW_MACSEC) +#define NETIF_F_GRO_UDP_FWD __NETIF_F(GRO_UDP_FWD) /* Finds the next feature with the highest number of the range of start till 0. */ @@ -234,7 +236,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) #define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO) /* Changeable features with no special hardware requirements that defaults to off. */ -#define NETIF_F_SOFT_FEATURES_OFF NETIF_F_GRO_FRAGLIST +#define NETIF_F_SOFT_FEATURES_OFF (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD) #define NETIF_F_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \ NETIF_F_HW_VLAN_CTAG_RX | \ diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 24036e3055a13..181220101a6e7 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -68,6 +68,7 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload", [NETIF_F_GRO_FRAGLIST_BIT] = "rx-gro-list", [NETIF_F_HW_MACSEC_BIT] = "macsec-hw-offload", + [NETIF_F_GRO_UDP_FWD_BIT] = "rx-udp-gro-forwarding", }; const char -- GitLab From 36707061d6bafc254b3dfc23a8bb95451812b233 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 22 Jan 2021 18:20:02 +0000 Subject: [PATCH 1341/2012] udp: allow forwarding of plain (non-fraglisted) UDP GRO packets Commit 9fd1ff5d2ac7 ("udp: Support UDP fraglist GRO/GSO.") actually not only added a support for fraglisted UDP GRO, but also tweaked some logics the way that non-fraglisted UDP GRO started to work for forwarding too. Commit 2e4ef10f5850 ("net: add GSO UDP L4 and GSO fraglists to the list of software-backed types") added GSO UDP L4 to the list of software GSO to allow virtual netdevs to forward them as is up to the real drivers. Tests showed that currently forwarding and NATing of plain UDP GRO packets are performed fully correctly, regardless if the target netdevice has a support for hardware/driver GSO UDP L4 or not. Add the last element and allow to form plain UDP GRO packets if we are on forwarding path, and the new NETIF_F_GRO_UDP_FWD is enabled on a receiving netdevice. If both NETIF_F_GRO_FRAGLIST and NETIF_F_GRO_UDP_FWD are set, fraglisted GRO takes precedence. This keeps the current behaviour and is generally more optimal for now, as the number of NICs with hardware USO offload is relatively small. Signed-off-by: Alexander Lobakin Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- net/ipv4/udp_offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 1168d186cc438..41249705d9e9b 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -460,7 +460,8 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, if (skb->dev->features & NETIF_F_GRO_FRAGLIST) NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1; - if ((sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) { + if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) || + (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) { pp = call_gro_receive(udp_gro_receive_segment, head, skb); return pp; } -- GitLab From b80b5dbf118fbb97d67e9e41e68941efeb0457c6 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 22 Jan 2021 16:15:54 +0100 Subject: [PATCH 1342/2012] net: mhi: Set wwan device type The 'wwan' devtype is meant for devices that require additional configuration to be used, like WWAN specific APN setup over AT/QMI commands, rmnet link creation, etc. This is the case for MHI (Modem host Interface) netdev which targets modem/WWAN endpoints. Signed-off-by: Loic Poulain Link: https://lore.kernel.org/r/1611328554-1414-1-git-send-email-loic.poulain@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/mhi_net.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c index 5f3a4cc92a880..a5a214d298490 100644 --- a/drivers/net/mhi_net.c +++ b/drivers/net/mhi_net.c @@ -248,6 +248,10 @@ static void mhi_net_rx_refill_work(struct work_struct *work) schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2); } +static struct device_type wwan_type = { + .name = "wwan", +}; + static int mhi_net_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) { @@ -267,6 +271,7 @@ static int mhi_net_probe(struct mhi_device *mhi_dev, mhi_netdev->ndev = ndev; mhi_netdev->mdev = mhi_dev; SET_NETDEV_DEV(ndev, &mhi_dev->dev); + SET_NETDEV_DEVTYPE(ndev, &wwan_type); /* All MHI net channels have 128 ring elements (at least for now) */ mhi_netdev->rx_queue_sz = 128; -- GitLab From 24dfc6eb39b26fc3a5a17a606e868b74f6202ee4 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Sat, 23 Jan 2021 11:56:33 +0100 Subject: [PATCH 1343/2012] net: dsa: hellcreek: Add TAPRIO offloading support The switch has support for the 802.1Qbv Time Aware Shaper (TAS). Traffic schedules may be configured individually on each front port. Each port has eight egress queues. The traffic is mapped to a traffic class respectively via the PCP field of a VLAN tagged frame. The TAPRIO Qdisc already implements that. Therefore, this interface can simply be reused. Add .port_setup_tc() accordingly. The activation of a schedule on a port is split into two parts: * Programming the necessary gate control list (GCL) * Setup delayed work for starting the schedule The hardware supports starting a schedule up to eight seconds in the future. The TAPRIO interface provides an absolute base time. Therefore, periodic delayed work is leveraged to check whether a schedule may be started or not. Signed-off-by: Kurt Kanzenbach Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/hirschmann/hellcreek.c | 303 ++++++++++++++++++++++++- drivers/net/dsa/hirschmann/hellcreek.h | 17 +- 2 files changed, 318 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 9a1921e653e86..4cc51fb37e678 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -3,7 +3,7 @@ * DSA driver for: * Hirschmann Hellcreek TSN switch. * - * Copyright (C) 2019,2020 Linutronix GmbH + * Copyright (C) 2019-2021 Linutronix GmbH * Author Kurt Kanzenbach */ @@ -153,6 +153,13 @@ static void hellcreek_select_vlan(struct hellcreek *hellcreek, int vid, hellcreek_write(hellcreek, val, HR_VIDCFG); } +static void hellcreek_select_tgd(struct hellcreek *hellcreek, int port) +{ + u16 val = port << TR_TGDSEL_TDGSEL_SHIFT; + + hellcreek_write(hellcreek, val, TR_TGDSEL); +} + static int hellcreek_wait_until_ready(struct hellcreek *hellcreek) { u16 val; @@ -1125,6 +1132,296 @@ out: return ret; } +static void hellcreek_setup_gcl(struct hellcreek *hellcreek, int port, + const struct tc_taprio_qopt_offload *schedule) +{ + const struct tc_taprio_sched_entry *cur, *initial, *next; + size_t i; + + cur = initial = &schedule->entries[0]; + next = cur + 1; + + for (i = 1; i <= schedule->num_entries; ++i) { + u16 data; + u8 gates; + + cur++; + next++; + + if (i == schedule->num_entries) + gates = initial->gate_mask ^ + cur->gate_mask; + else + gates = next->gate_mask ^ + cur->gate_mask; + + data = gates; + + if (i == schedule->num_entries) + data |= TR_GCLDAT_GCLWRLAST; + + /* Gates states */ + hellcreek_write(hellcreek, data, TR_GCLDAT); + + /* Time interval */ + hellcreek_write(hellcreek, + cur->interval & 0x0000ffff, + TR_GCLTIL); + hellcreek_write(hellcreek, + (cur->interval & 0xffff0000) >> 16, + TR_GCLTIH); + + /* Commit entry */ + data = ((i - 1) << TR_GCLCMD_GCLWRADR_SHIFT) | + (initial->gate_mask << + TR_GCLCMD_INIT_GATE_STATES_SHIFT); + hellcreek_write(hellcreek, data, TR_GCLCMD); + } +} + +static void hellcreek_set_cycle_time(struct hellcreek *hellcreek, + const struct tc_taprio_qopt_offload *schedule) +{ + u32 cycle_time = schedule->cycle_time; + + hellcreek_write(hellcreek, cycle_time & 0x0000ffff, TR_CTWRL); + hellcreek_write(hellcreek, (cycle_time & 0xffff0000) >> 16, TR_CTWRH); +} + +static void hellcreek_switch_schedule(struct hellcreek *hellcreek, + ktime_t start_time) +{ + struct timespec64 ts = ktime_to_timespec64(start_time); + + /* Start schedule at this point of time */ + hellcreek_write(hellcreek, ts.tv_nsec & 0x0000ffff, TR_ESTWRL); + hellcreek_write(hellcreek, (ts.tv_nsec & 0xffff0000) >> 16, TR_ESTWRH); + + /* Arm timer, set seconds and switch schedule */ + hellcreek_write(hellcreek, TR_ESTCMD_ESTARM | TR_ESTCMD_ESTSWCFG | + ((ts.tv_sec & TR_ESTCMD_ESTSEC_MASK) << + TR_ESTCMD_ESTSEC_SHIFT), TR_ESTCMD); +} + +static bool hellcreek_schedule_startable(struct hellcreek *hellcreek, int port) +{ + struct hellcreek_port *hellcreek_port = &hellcreek->ports[port]; + s64 base_time_ns, current_ns; + + /* The switch allows a schedule to be started only eight seconds within + * the future. Therefore, check the current PTP time if the schedule is + * startable or not. + */ + + /* Use the "cached" time. That should be alright, as it's updated quite + * frequently in the PTP code. + */ + mutex_lock(&hellcreek->ptp_lock); + current_ns = hellcreek->seconds * NSEC_PER_SEC + hellcreek->last_ts; + mutex_unlock(&hellcreek->ptp_lock); + + /* Calculate difference to admin base time */ + base_time_ns = ktime_to_ns(hellcreek_port->current_schedule->base_time); + + return base_time_ns - current_ns < (s64)8 * NSEC_PER_SEC; +} + +static void hellcreek_start_schedule(struct hellcreek *hellcreek, int port) +{ + struct hellcreek_port *hellcreek_port = &hellcreek->ports[port]; + ktime_t base_time, current_time; + s64 current_ns; + u32 cycle_time; + + /* First select port */ + hellcreek_select_tgd(hellcreek, port); + + /* Forward base time into the future if needed */ + mutex_lock(&hellcreek->ptp_lock); + current_ns = hellcreek->seconds * NSEC_PER_SEC + hellcreek->last_ts; + mutex_unlock(&hellcreek->ptp_lock); + + current_time = ns_to_ktime(current_ns); + base_time = hellcreek_port->current_schedule->base_time; + cycle_time = hellcreek_port->current_schedule->cycle_time; + + if (ktime_compare(current_time, base_time) > 0) { + s64 n; + + n = div64_s64(ktime_sub_ns(current_time, base_time), + cycle_time); + base_time = ktime_add_ns(base_time, (n + 1) * cycle_time); + } + + /* Set admin base time and switch schedule */ + hellcreek_switch_schedule(hellcreek, base_time); + + taprio_offload_free(hellcreek_port->current_schedule); + hellcreek_port->current_schedule = NULL; + + dev_dbg(hellcreek->dev, "Armed EST timer for port %d\n", + hellcreek_port->port); +} + +static void hellcreek_check_schedule(struct work_struct *work) +{ + struct delayed_work *dw = to_delayed_work(work); + struct hellcreek_port *hellcreek_port; + struct hellcreek *hellcreek; + bool startable; + + hellcreek_port = dw_to_hellcreek_port(dw); + hellcreek = hellcreek_port->hellcreek; + + mutex_lock(&hellcreek->reg_lock); + + /* Check starting time */ + startable = hellcreek_schedule_startable(hellcreek, + hellcreek_port->port); + if (startable) { + hellcreek_start_schedule(hellcreek, hellcreek_port->port); + mutex_unlock(&hellcreek->reg_lock); + return; + } + + mutex_unlock(&hellcreek->reg_lock); + + /* Reschedule */ + schedule_delayed_work(&hellcreek_port->schedule_work, + HELLCREEK_SCHEDULE_PERIOD); +} + +static int hellcreek_port_set_schedule(struct dsa_switch *ds, int port, + struct tc_taprio_qopt_offload *taprio) +{ + struct hellcreek *hellcreek = ds->priv; + struct hellcreek_port *hellcreek_port; + bool startable; + u16 ctrl; + + hellcreek_port = &hellcreek->ports[port]; + + dev_dbg(hellcreek->dev, "Configure traffic schedule on port %d\n", + port); + + /* First cancel delayed work */ + cancel_delayed_work_sync(&hellcreek_port->schedule_work); + + mutex_lock(&hellcreek->reg_lock); + + if (hellcreek_port->current_schedule) { + taprio_offload_free(hellcreek_port->current_schedule); + hellcreek_port->current_schedule = NULL; + } + hellcreek_port->current_schedule = taprio_offload_get(taprio); + + /* Then select port */ + hellcreek_select_tgd(hellcreek, port); + + /* Enable gating and keep defaults */ + ctrl = (0xff << TR_TGDCTRL_ADMINGATESTATES_SHIFT) | TR_TGDCTRL_GATE_EN; + hellcreek_write(hellcreek, ctrl, TR_TGDCTRL); + + /* Cancel pending schedule */ + hellcreek_write(hellcreek, 0x00, TR_ESTCMD); + + /* Setup a new schedule */ + hellcreek_setup_gcl(hellcreek, port, hellcreek_port->current_schedule); + + /* Configure cycle time */ + hellcreek_set_cycle_time(hellcreek, hellcreek_port->current_schedule); + + /* Check starting time */ + startable = hellcreek_schedule_startable(hellcreek, port); + if (startable) { + hellcreek_start_schedule(hellcreek, port); + mutex_unlock(&hellcreek->reg_lock); + return 0; + } + + mutex_unlock(&hellcreek->reg_lock); + + /* Schedule periodic schedule check */ + schedule_delayed_work(&hellcreek_port->schedule_work, + HELLCREEK_SCHEDULE_PERIOD); + + return 0; +} + +static int hellcreek_port_del_schedule(struct dsa_switch *ds, int port) +{ + struct hellcreek *hellcreek = ds->priv; + struct hellcreek_port *hellcreek_port; + + hellcreek_port = &hellcreek->ports[port]; + + dev_dbg(hellcreek->dev, "Remove traffic schedule on port %d\n", port); + + /* First cancel delayed work */ + cancel_delayed_work_sync(&hellcreek_port->schedule_work); + + mutex_lock(&hellcreek->reg_lock); + + if (hellcreek_port->current_schedule) { + taprio_offload_free(hellcreek_port->current_schedule); + hellcreek_port->current_schedule = NULL; + } + + /* Then select port */ + hellcreek_select_tgd(hellcreek, port); + + /* Disable gating and return to regular switching flow */ + hellcreek_write(hellcreek, 0xff << TR_TGDCTRL_ADMINGATESTATES_SHIFT, + TR_TGDCTRL); + + mutex_unlock(&hellcreek->reg_lock); + + return 0; +} + +static bool hellcreek_validate_schedule(struct hellcreek *hellcreek, + struct tc_taprio_qopt_offload *schedule) +{ + size_t i; + + /* Does this hellcreek version support Qbv in hardware? */ + if (!hellcreek->pdata->qbv_support) + return false; + + /* cycle time can only be 32bit */ + if (schedule->cycle_time > (u32)-1) + return false; + + /* cycle time extension is not supported */ + if (schedule->cycle_time_extension) + return false; + + /* Only set command is supported */ + for (i = 0; i < schedule->num_entries; ++i) + if (schedule->entries[i].command != TC_TAPRIO_CMD_SET_GATES) + return false; + + return true; +} + +static int hellcreek_port_setup_tc(struct dsa_switch *ds, int port, + enum tc_setup_type type, void *type_data) +{ + struct tc_taprio_qopt_offload *taprio = type_data; + struct hellcreek *hellcreek = ds->priv; + + if (type != TC_SETUP_QDISC_TAPRIO) + return -EOPNOTSUPP; + + if (!hellcreek_validate_schedule(hellcreek, taprio)) + return -EOPNOTSUPP; + + if (taprio->enable) + return hellcreek_port_set_schedule(ds, port, taprio); + + return hellcreek_port_del_schedule(ds, port); +} + static const struct dsa_switch_ops hellcreek_ds_ops = { .get_ethtool_stats = hellcreek_get_ethtool_stats, .get_sset_count = hellcreek_get_sset_count, @@ -1143,6 +1440,7 @@ static const struct dsa_switch_ops hellcreek_ds_ops = { .port_hwtstamp_get = hellcreek_port_hwtstamp_get, .port_prechangeupper = hellcreek_port_prechangeupper, .port_rxtstamp = hellcreek_port_rxtstamp, + .port_setup_tc = hellcreek_port_setup_tc, .port_stp_state_set = hellcreek_port_stp_state_set, .port_txtstamp = hellcreek_port_txtstamp, .port_vlan_add = hellcreek_vlan_add, @@ -1197,6 +1495,9 @@ static int hellcreek_probe(struct platform_device *pdev) port->hellcreek = hellcreek; port->port = i; + + INIT_DELAYED_WORK(&port->schedule_work, + hellcreek_check_schedule); } mutex_init(&hellcreek->reg_lock); diff --git a/drivers/net/dsa/hirschmann/hellcreek.h b/drivers/net/dsa/hirschmann/hellcreek.h index e81781ebc31cd..854639f872478 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.h +++ b/drivers/net/dsa/hirschmann/hellcreek.h @@ -3,7 +3,7 @@ * DSA driver for: * Hirschmann Hellcreek TSN switch. * - * Copyright (C) 2019,2020 Linutronix GmbH + * Copyright (C) 2019-2021 Linutronix GmbH * Author Kurt Kanzenbach */ @@ -21,6 +21,7 @@ #include #include #include +#include /* Ports: * - 0: CPU @@ -246,6 +247,10 @@ struct hellcreek_port { /* Per-port timestamping resources */ struct hellcreek_port_hwtstamp port_hwtstamp; + + /* Per-port Qbv schedule information */ + struct tc_taprio_qopt_offload *current_schedule; + struct delayed_work schedule_work; }; struct hellcreek_fdb_entry { @@ -283,4 +288,14 @@ struct hellcreek { size_t fdb_entries; }; +/* A Qbv schedule can only started up to 8 seconds in the future. If the delta + * between the base time and the current ptp time is larger than 8 seconds, then + * use periodic work to check for the schedule to be started. The delayed work + * cannot be armed directly to $base_time - 8 + X, because for large deltas the + * PTP frequency matters. + */ +#define HELLCREEK_SCHEDULE_PERIOD (2 * HZ) +#define dw_to_hellcreek_port(dw) \ + container_of(dw, struct hellcreek_port, schedule_work) + #endif /* _HELLCREEK_H_ */ -- GitLab From 62d9f1a6945ba69c125e548e72a36d203b30596e Mon Sep 17 00:00:00 2001 From: Pengcheng Yang Date: Sun, 24 Jan 2021 13:07:14 +0800 Subject: [PATCH 1344/2012] tcp: fix TLP timer not set when CA_STATE changes from DISORDER to OPEN Upon receiving a cumulative ACK that changes the congestion state from Disorder to Open, the TLP timer is not set. If the sender is app-limited, it can only wait for the RTO timer to expire and retransmit. The reason for this is that the TLP timer is set before the congestion state changes in tcp_ack(), so we delay the time point of calling tcp_set_xmit_timer() until after tcp_fastretrans_alert() returns and remove the FLAG_SET_XMIT_TIMER from ack_flag when the RACK reorder timer is set. This commit has two additional benefits: 1) Make sure to reset RTO according to RFC6298 when receiving ACK, to avoid spurious RTO caused by RTO timer early expires. 2) Reduce the xmit timer reschedule once per ACK when the RACK reorder timer is set. Fixes: df92c8394e6e ("tcp: fix xmit timer to only be reset if data ACKed/SACKed") Link: https://lore.kernel.org/netdev/1611311242-6675-1-git-send-email-yangpc@wangsu.com Signed-off-by: Pengcheng Yang Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Cc: Eric Dumazet Link: https://lore.kernel.org/r/1611464834-23030-1-git-send-email-yangpc@wangsu.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 2 +- net/ipv4/tcp_input.c | 10 ++++++---- net/ipv4/tcp_recovery.c | 5 +++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index ca7e2c6cc663e..25bbada379c46 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2061,7 +2061,7 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb); void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced); extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, u32 reo_wnd); -extern void tcp_rack_mark_lost(struct sock *sk); +extern bool tcp_rack_mark_lost(struct sock *sk); extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 55c6fd6b5d139..9b44caa4b9562 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2859,7 +2859,8 @@ static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag) } else if (tcp_is_rack(sk)) { u32 prior_retrans = tp->retrans_out; - tcp_rack_mark_lost(sk); + if (tcp_rack_mark_lost(sk)) + *ack_flag &= ~FLAG_SET_XMIT_TIMER; if (prior_retrans > tp->retrans_out) *ack_flag |= FLAG_LOST_RETRANS; } @@ -3816,9 +3817,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); - /* If needed, reset TLP/RTO timer; RACK may later override this. */ - if (flag & FLAG_SET_XMIT_TIMER) - tcp_set_xmit_timer(sk); if (tcp_ack_is_dubious(sk, flag)) { if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) { @@ -3831,6 +3829,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) &rexmit); } + /* If needed, reset TLP/RTO timer when RACK doesn't set. */ + if (flag & FLAG_SET_XMIT_TIMER) + tcp_set_xmit_timer(sk); + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) sk_dst_confirm(sk); diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index 177307a3081f9..6f1b4ac7fe99c 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -96,13 +96,13 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) } } -void tcp_rack_mark_lost(struct sock *sk) +bool tcp_rack_mark_lost(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); u32 timeout; if (!tp->rack.advanced) - return; + return false; /* Reset the advanced flag to avoid unnecessary queue scanning */ tp->rack.advanced = 0; @@ -112,6 +112,7 @@ void tcp_rack_mark_lost(struct sock *sk) inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT, timeout, inet_csk(sk)->icsk_rto); } + return !!timeout; } /* Record the most recently (re)sent time among the (s)acked packets -- GitLab From 4025c784c573cab7e3f84746cc82b8033923ec62 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 23 Jan 2021 16:12:44 +1000 Subject: [PATCH 1345/2012] powerpc/64s: prevent recursive replay_soft_interrupts causing superfluous interrupt When an asynchronous interrupt calls irq_exit, it checks for softirqs that may have been created, and runs them. Running softirqs enables local irqs, which can replay pending interrupts causing recursion in replay_soft_interrupts. This abridged trace shows how this can occur: ! NIP replay_soft_interrupts LR interrupt_exit_kernel_prepare Call Trace: interrupt_exit_kernel_prepare (unreliable) interrupt_return --- interrupt: ea0 at __rb_reserve_next NIP __rb_reserve_next LR __rb_reserve_next Call Trace: ring_buffer_lock_reserve trace_function function_trace_call ftrace_call __do_softirq irq_exit timer_interrupt ! replay_soft_interrupts interrupt_exit_kernel_prepare interrupt_return --- interrupt: ea0 at arch_local_irq_restore This can not be prevented easily, because softirqs must not block hard irqs, so it has to be dealt with. The recursion is bounded by design in the softirq code because softirq replay disables softirqs and loops around again to check for new softirqs created while it ran, so that's not a problem. However it does mess up interrupt replay state, causing superfluous interrupts when the second replay_soft_interrupts clears a pending interrupt, leaving it still set in the first call in the 'happened' local variable. Fix this by not caching a copy of irqs_happened across interrupt handler calls. Fixes: 3282a3da25bd ("powerpc/64: Implement soft interrupt replay in C") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210123061244.2076145-1-npiggin@gmail.com --- arch/powerpc/kernel/irq.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 6b1eca53e36cc..cc7a6271b6b4e 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -180,13 +180,18 @@ void notrace restore_interrupts(void) void replay_soft_interrupts(void) { + struct pt_regs regs; + /* - * We use local_paca rather than get_paca() to avoid all - * the debug_smp_processor_id() business in this low level - * function + * Be careful here, calling these interrupt handlers can cause + * softirqs to be raised, which they may run when calling irq_exit, + * which will cause local_irq_enable() to be run, which can then + * recurse into this function. Don't keep any state across + * interrupt handler calls which may change underneath us. + * + * We use local_paca rather than get_paca() to avoid all the + * debug_smp_processor_id() business in this low level function. */ - unsigned char happened = local_paca->irq_happened; - struct pt_regs regs; ppc_save_regs(®s); regs.softe = IRQS_ENABLED; @@ -209,7 +214,7 @@ again: * This is a higher priority interrupt than the others, so * replay it first. */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_HMI)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) { local_paca->irq_happened &= ~PACA_IRQ_HMI; regs.trap = 0xe60; handle_hmi_exception(®s); @@ -217,7 +222,7 @@ again: hard_irq_disable(); } - if (happened & PACA_IRQ_DEC) { + if (local_paca->irq_happened & PACA_IRQ_DEC) { local_paca->irq_happened &= ~PACA_IRQ_DEC; regs.trap = 0x900; timer_interrupt(®s); @@ -225,7 +230,7 @@ again: hard_irq_disable(); } - if (happened & PACA_IRQ_EE) { + if (local_paca->irq_happened & PACA_IRQ_EE) { local_paca->irq_happened &= ~PACA_IRQ_EE; regs.trap = 0x500; do_IRQ(®s); @@ -233,7 +238,7 @@ again: hard_irq_disable(); } - if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (happened & PACA_IRQ_DBELL)) { + if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) { local_paca->irq_happened &= ~PACA_IRQ_DBELL; if (IS_ENABLED(CONFIG_PPC_BOOK3E)) regs.trap = 0x280; @@ -245,7 +250,7 @@ again: } /* Book3E does not support soft-masking PMI interrupts */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (happened & PACA_IRQ_PMI)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) { local_paca->irq_happened &= ~PACA_IRQ_PMI; regs.trap = 0xf00; performance_monitor_exception(®s); @@ -253,8 +258,7 @@ again: hard_irq_disable(); } - happened = local_paca->irq_happened; - if (happened & ~PACA_IRQ_HARD_DIS) { + if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) { /* * We are responding to the next interrupt, so interrupt-off * latencies should be reset here. -- GitLab From 9d5c8190683a462dbc787658467a0da17011ea5f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 24 Jan 2021 15:08:14 +0000 Subject: [PATCH 1346/2012] io_uring: fix sleeping under spin in __io_clean_op [ 27.629441] BUG: sleeping function called from invalid context at fs/file.c:402 [ 27.631317] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 1012, name: io_wqe_worker-0 [ 27.633220] 1 lock held by io_wqe_worker-0/1012: [ 27.634286] #0: ffff888105e26c98 (&ctx->completion_lock) {....}-{2:2}, at: __io_req_complete.part.102+0x30/0x70 [ 27.649249] Call Trace: [ 27.649874] dump_stack+0xac/0xe3 [ 27.650666] ___might_sleep+0x284/0x2c0 [ 27.651566] put_files_struct+0xb8/0x120 [ 27.652481] __io_clean_op+0x10c/0x2a0 [ 27.653362] __io_cqring_fill_event+0x2c1/0x350 [ 27.654399] __io_req_complete.part.102+0x41/0x70 [ 27.655464] io_openat2+0x151/0x300 [ 27.656297] io_issue_sqe+0x6c/0x14e0 [ 27.660991] io_wq_submit_work+0x7f/0x240 [ 27.662890] io_worker_handle_work+0x501/0x8a0 [ 27.664836] io_wqe_worker+0x158/0x520 [ 27.667726] kthread+0x134/0x180 [ 27.669641] ret_from_fork+0x1f/0x30 Instead of cleaning files on overflow, return back overflow cancellation into io_uring_cancel_files(). Previously it was racy to clean REQ_F_OVERFLOW flag, but we got rid of it, and can do it through repetitive attempts targeting all matching requests. Reported-by: Abaci Reported-by: Joseph Qi Cc: Xiaoguang Wang Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 862113a9364f8..8a98afed50cd9 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1025,6 +1025,7 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, const struct iovec *fast_iov, struct iov_iter *iter, bool force); +static void io_req_drop_files(struct io_kiocb *req); static struct kmem_cache *req_cachep; @@ -1048,8 +1049,7 @@ EXPORT_SYMBOL(io_uring_get_socket); static inline void io_clean_op(struct io_kiocb *req) { - if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED | - REQ_F_INFLIGHT)) + if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED)) __io_clean_op(req); } @@ -1394,6 +1394,8 @@ static void io_req_clean_work(struct io_kiocb *req) free_fs_struct(fs); req->work.flags &= ~IO_WQ_WORK_FS; } + if (req->flags & REQ_F_INFLIGHT) + io_req_drop_files(req); io_put_identity(req->task->io_uring, req); } @@ -6230,9 +6232,6 @@ static void __io_clean_op(struct io_kiocb *req) } req->flags &= ~REQ_F_NEED_CLEANUP; } - - if (req->flags & REQ_F_INFLIGHT) - io_req_drop_files(req); } static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock, @@ -8879,6 +8878,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); io_poll_remove_all(ctx, task, files); io_kill_timeouts(ctx, task, files); + io_cqring_overflow_flush(ctx, true, task, files); /* cancellations _may_ trigger task work */ io_run_task_work(); schedule(); -- GitLab From 02a13674fa0e8dd326de8b9f4514b41b03d99003 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 23 Jan 2021 15:49:31 -0700 Subject: [PATCH 1347/2012] io_uring: account io_uring internal files as REQ_F_INFLIGHT We need to actively cancel anything that introduces a potential circular loop, where io_uring holds a reference to itself. If the file in question is an io_uring file, then add the request to the inflight list. Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Jens Axboe --- fs/io_uring.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 8a98afed50cd9..c07913ec0ccac 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1075,8 +1075,11 @@ static bool io_match_task(struct io_kiocb *head, return true; io_for_each_link(req, head) { - if ((req->flags & REQ_F_WORK_INITIALIZED) && - (req->work.flags & IO_WQ_WORK_FILES) && + if (!(req->flags & REQ_F_WORK_INITIALIZED)) + continue; + if (req->file && req->file->f_op == &io_uring_fops) + return true; + if ((req->work.flags & IO_WQ_WORK_FILES) && req->work.identity->files == files) return true; } @@ -1505,11 +1508,14 @@ static bool io_grab_identity(struct io_kiocb *req) return false; atomic_inc(&id->files->count); get_nsproxy(id->nsproxy); - req->flags |= REQ_F_INFLIGHT; - spin_lock_irq(&ctx->inflight_lock); - list_add(&req->inflight_entry, &ctx->inflight_list); - spin_unlock_irq(&ctx->inflight_lock); + if (!(req->flags & REQ_F_INFLIGHT)) { + req->flags |= REQ_F_INFLIGHT; + + spin_lock_irq(&ctx->inflight_lock); + list_add(&req->inflight_entry, &ctx->inflight_list); + spin_unlock_irq(&ctx->inflight_lock); + } req->work.flags |= IO_WQ_WORK_FILES; } if (!(req->work.flags & IO_WQ_WORK_MM) && @@ -6164,8 +6170,10 @@ static void io_req_drop_files(struct io_kiocb *req) struct io_uring_task *tctx = req->task->io_uring; unsigned long flags; - put_files_struct(req->work.identity->files); - put_nsproxy(req->work.identity->nsproxy); + if (req->work.flags & IO_WQ_WORK_FILES) { + put_files_struct(req->work.identity->files); + put_nsproxy(req->work.identity->nsproxy); + } spin_lock_irqsave(&ctx->inflight_lock, flags); list_del(&req->inflight_entry); spin_unlock_irqrestore(&ctx->inflight_lock, flags); @@ -6450,6 +6458,15 @@ static struct file *io_file_get(struct io_submit_state *state, file = __io_file_get(state, fd); } + if (file && file->f_op == &io_uring_fops) { + io_req_init_async(req); + req->flags |= REQ_F_INFLIGHT; + + spin_lock_irq(&ctx->inflight_lock); + list_add(&req->inflight_entry, &ctx->inflight_list); + spin_unlock_irq(&ctx->inflight_lock); + } + return file; } @@ -8860,8 +8877,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, spin_lock_irq(&ctx->inflight_lock); list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { - if (req->task != task || - req->work.identity->files != files) + if (!io_match_task(req, task, files)) continue; found = true; break; -- GitLab From bde9cfa3afe4324ec251e4af80ebf9b7afaf7afe Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sat, 23 Jan 2021 21:00:57 -0800 Subject: [PATCH 1348/2012] x86/setup: don't remove E820_TYPE_RAM for pfn 0 Patch series "mm: fix initialization of struct page for holes in memory layout", v3. Commit 73a6e474cb37 ("mm: memmap_init: iterate over memblock regions rather that check each PFN") exposed several issues with the memory map initialization and these patches fix those issues. Initially there were crashes during compaction that Qian Cai reported back in April [1]. It seemed back then that the problem was fixed, but a few weeks ago Andrea Arcangeli hit the same bug [2] and there was an additional discussion at [3]. [1] https://lore.kernel.org/lkml/8C537EB7-85EE-4DCF-943E-3CC0ED0DF56D@lca.pw [2] https://lore.kernel.org/lkml/20201121194506.13464-1-aarcange@redhat.com [3] https://lore.kernel.org/mm-commits/20201206005401.qKuAVgOXr%akpm@linux-foundation.org This patch (of 2): The first 4Kb of memory is a BIOS owned area and to avoid its allocation for the kernel it was not listed in e820 tables as memory. As the result, pfn 0 was never recognised by the generic memory management and it is not a part of neither node 0 nor ZONE_DMA. If set_pfnblock_flags_mask() would be ever called for the pageblock corresponding to the first 2Mbytes of memory, having pfn 0 outside of ZONE_DMA would trigger VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page); Along with reserving the first 4Kb in e820 tables, several first pages are reserved with memblock in several places during setup_arch(). These reservations are enough to ensure the kernel does not touch the BIOS area and it is not necessary to remove E820_TYPE_RAM for pfn 0. Remove the update of e820 table that changes the type of pfn 0 and move the comment describing why it was done to trim_low_memory_range() that reserves the beginning of the memory. Link: https://lkml.kernel.org/r/20210111194017.22696-2-rppt@kernel.org Signed-off-by: Mike Rapoport Cc: Baoquan He Cc: Borislav Petkov Cc: David Hildenbrand Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Mel Gorman Cc: Michal Hocko Cc: Qian Cai Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/setup.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 740f3bdb3f619..3412c4595efd8 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -660,17 +660,6 @@ static void __init trim_platform_memory_ranges(void) static void __init trim_bios_range(void) { - /* - * A special case is the first 4Kb of memory; - * This is a BIOS owned area, not kernel ram, but generally - * not listed as such in the E820 table. - * - * This typically reserves additional memory (64KiB by default) - * since some BIOSes are known to corrupt low memory. See the - * Kconfig help text for X86_RESERVE_LOW. - */ - e820__range_update(0, PAGE_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); - /* * special case: Some BIOSes report the PC BIOS * area (640Kb -> 1Mb) as RAM even though it is not. @@ -728,6 +717,15 @@ early_param("reservelow", parse_reservelow); static void __init trim_low_memory_range(void) { + /* + * A special case is the first 4Kb of memory; + * This is a BIOS owned area, not kernel ram, but generally + * not listed as such in the E820 table. + * + * This typically reserves additional memory (64KiB by default) + * since some BIOSes are known to corrupt low memory. See the + * Kconfig help text for X86_RESERVE_LOW. + */ memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE)); } -- GitLab From d3921cb8be29ce5668c64e23ffdaeec5f8c69399 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sat, 23 Jan 2021 21:01:02 -0800 Subject: [PATCH 1349/2012] mm: fix initialization of struct page for holes in memory layout There could be struct pages that are not backed by actual physical memory. This can happen when the actual memory bank is not a multiple of SECTION_SIZE or when an architecture does not register memory holes reserved by the firmware as memblock.memory. Such pages are currently initialized using init_unavailable_mem() function that iterates through PFNs in holes in memblock.memory and if there is a struct page corresponding to a PFN, the fields if this page are set to default values and the page is marked as Reserved. init_unavailable_mem() does not take into account zone and node the page belongs to and sets both zone and node links in struct page to zero. On a system that has firmware reserved holes in a zone above ZONE_DMA, for instance in a configuration below: # grep -A1 E820 /proc/iomem 7a17b000-7a216fff : Unknown E820 type 7a217000-7bffffff : System RAM unset zone link in struct page will trigger VM_BUG_ON_PAGE(!zone_spans_pfn(page_zone(page), pfn), page); because there are pages in both ZONE_DMA32 and ZONE_DMA (unset zone link in struct page) in the same pageblock. Update init_unavailable_mem() to use zone constraints defined by an architecture to properly setup the zone link and use node ID of the adjacent range in memblock.memory to set the node link. Link: https://lkml.kernel.org/r/20210111194017.22696-3-rppt@kernel.org Fixes: 73a6e474cb37 ("mm: memmap_init: iterate over memblock regions rather that check each PFN") Signed-off-by: Mike Rapoport Reported-by: Andrea Arcangeli Cc: Andrea Arcangeli Cc: Baoquan He Cc: Borislav Petkov Cc: David Hildenbrand Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Mel Gorman Cc: Michal Hocko Cc: Qian Cai Cc: Thomas Gleixner Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 84 +++++++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 027f6481ba59b..85ecaa6d0d064 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7078,23 +7078,26 @@ void __init free_area_init_memoryless_node(int nid) * Initialize all valid struct pages in the range [spfn, epfn) and mark them * PageReserved(). Return the number of struct pages that were initialized. */ -static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn) +static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn, + int zone, int nid) { - unsigned long pfn; + unsigned long pfn, zone_spfn, zone_epfn; u64 pgcnt = 0; + zone_spfn = arch_zone_lowest_possible_pfn[zone]; + zone_epfn = arch_zone_highest_possible_pfn[zone]; + + spfn = clamp(spfn, zone_spfn, zone_epfn); + epfn = clamp(epfn, zone_spfn, zone_epfn); + for (pfn = spfn; pfn < epfn; pfn++) { if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) { pfn = ALIGN_DOWN(pfn, pageblock_nr_pages) + pageblock_nr_pages - 1; continue; } - /* - * Use a fake node/zone (0) for now. Some of these pages - * (in memblock.reserved but not in memblock.memory) will - * get re-initialized via reserve_bootmem_region() later. - */ - __init_single_page(pfn_to_page(pfn), pfn, 0, 0); + + __init_single_page(pfn_to_page(pfn), pfn, zone, nid); __SetPageReserved(pfn_to_page(pfn)); pgcnt++; } @@ -7103,51 +7106,64 @@ static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn) } /* - * Only struct pages that are backed by physical memory are zeroed and - * initialized by going through __init_single_page(). But, there are some - * struct pages which are reserved in memblock allocator and their fields - * may be accessed (for example page_to_pfn() on some configuration accesses - * flags). We must explicitly initialize those struct pages. + * Only struct pages that correspond to ranges defined by memblock.memory + * are zeroed and initialized by going through __init_single_page() during + * memmap_init(). + * + * But, there could be struct pages that correspond to holes in + * memblock.memory. This can happen because of the following reasons: + * - phyiscal memory bank size is not necessarily the exact multiple of the + * arbitrary section size + * - early reserved memory may not be listed in memblock.memory + * - memory layouts defined with memmap= kernel parameter may not align + * nicely with memmap sections * - * This function also addresses a similar issue where struct pages are left - * uninitialized because the physical address range is not covered by - * memblock.memory or memblock.reserved. That could happen when memblock - * layout is manually configured via memmap=, or when the highest physical - * address (max_pfn) does not end on a section boundary. + * Explicitly initialize those struct pages so that: + * - PG_Reserved is set + * - zone link is set accorging to the architecture constrains + * - node is set to node id of the next populated region except for the + * trailing hole where last node id is used */ -static void __init init_unavailable_mem(void) +static void __init init_zone_unavailable_mem(int zone) { - phys_addr_t start, end; - u64 i, pgcnt; - phys_addr_t next = 0; + unsigned long start, end; + int i, nid; + u64 pgcnt; + unsigned long next = 0; /* - * Loop through unavailable ranges not covered by memblock.memory. + * Loop through holes in memblock.memory and initialize struct + * pages corresponding to these holes */ pgcnt = 0; - for_each_mem_range(i, &start, &end) { + for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) { if (next < start) - pgcnt += init_unavailable_range(PFN_DOWN(next), - PFN_UP(start)); + pgcnt += init_unavailable_range(next, start, zone, nid); next = end; } /* - * Early sections always have a fully populated memmap for the whole - * section - see pfn_valid(). If the last section has holes at the - * end and that section is marked "online", the memmap will be - * considered initialized. Make sure that memmap has a well defined - * state. + * Last section may surpass the actual end of memory (e.g. we can + * have 1Gb section and 512Mb of RAM pouplated). + * Make sure that memmap has a well defined state in this case. */ - pgcnt += init_unavailable_range(PFN_DOWN(next), - round_up(max_pfn, PAGES_PER_SECTION)); + end = round_up(max_pfn, PAGES_PER_SECTION); + pgcnt += init_unavailable_range(next, end, zone, nid); /* * Struct pages that do not have backing memory. This could be because * firmware is using some of this memory, or for some other reasons. */ if (pgcnt) - pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt); + pr_info("Zone %s: zeroed struct page in unavailable ranges: %lld pages", zone_names[zone], pgcnt); +} + +static void __init init_unavailable_mem(void) +{ + int zone; + + for (zone = 0; zone < ZONE_MOVABLE; zone++) + init_zone_unavailable_mem(zone); } #else static inline void __init init_unavailable_mem(void) -- GitLab From 3de7d4f25a7438f09fef4e71ef111f1805cd8e7c Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sat, 23 Jan 2021 21:01:07 -0800 Subject: [PATCH 1350/2012] mm: memcg/slab: optimize objcg stock draining MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Imran Khan reported a 16% regression in hackbench results caused by the commit f2fe7b09a52b ("mm: memcg/slab: charge individual slab objects instead of pages"). The regression is noticeable in the case of a consequent allocation of several relatively large slab objects, e.g. skb's. As soon as the amount of stocked bytes exceeds PAGE_SIZE, drain_obj_stock() and __memcg_kmem_uncharge() are called, and it leads to a number of atomic operations in page_counter_uncharge(). The corresponding call graph is below (provided by Imran Khan): |__alloc_skb | | | |__kmalloc_reserve.isra.61 | | | | | |__kmalloc_node_track_caller | | | | | | | |slab_pre_alloc_hook.constprop.88 | | | obj_cgroup_charge | | | | | | | | | |__memcg_kmem_charge | | | | | | | | | | | |page_counter_try_charge | | | | | | | | | |refill_obj_stock | | | | | | | | | | | |drain_obj_stock.isra.68 | | | | | | | | | | | | | |__memcg_kmem_uncharge | | | | | | | | | | | | | | | |page_counter_uncharge | | | | | | | | | | | | | | | | | |page_counter_cancel | | | | | | | | | | | |__slab_alloc | | | | | | | | | |___slab_alloc | | | | | | | | |slab_post_alloc_hook Instead of directly uncharging the accounted kernel memory, it's possible to refill the generic page-sized per-cpu stock instead. It's a much faster operation, especially on a default hierarchy. As a bonus, __memcg_kmem_uncharge_page() will also get faster, so the freeing of page-sized kernel allocations (e.g. large kmallocs) will become faster. A similar change has been done earlier for the socket memory by the commit 475d0487a2ad ("mm: memcontrol: use per-cpu stocks for socket memory uncharging"). Link: https://lkml.kernel.org/r/20210106042239.2860107-1-guro@fb.com Fixes: f2fe7b09a52b ("mm: memcg/slab: charge individual slab objects instead of pages") Signed-off-by: Roman Gushchin Reported-by: Imran Khan Tested-by: Imran Khan Reviewed-by: Shakeel Butt Reviewed-by: Michal Koutn Cc: Michal Koutný Cc: Johannes Weiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 605f671203efb..e2de77b5bcc2f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3115,9 +3115,7 @@ void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages) if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) page_counter_uncharge(&memcg->kmem, nr_pages); - page_counter_uncharge(&memcg->memory, nr_pages); - if (do_memsw_account()) - page_counter_uncharge(&memcg->memsw, nr_pages); + refill_stock(memcg, nr_pages); } /** -- GitLab From 8a8792f600abacd7e1b9bb667759dca1c153f64c Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Sat, 23 Jan 2021 21:01:11 -0800 Subject: [PATCH 1351/2012] mm: memcg: fix memcg file_dirty numa stat The kernel updates the per-node NR_FILE_DIRTY stats on page migration but not the memcg numa stats. That was not an issue until recently the commit 5f9a4f4a7096 ("mm: memcontrol: add the missing numa_stat interface for cgroup v2") exposed numa stats for the memcg. So fix the file_dirty per-memcg numa stat. Link: https://lkml.kernel.org/r/20210108155813.2914586-1-shakeelb@google.com Fixes: 5f9a4f4a7096 ("mm: memcontrol: add the missing numa_stat interface for cgroup v2") Signed-off-by: Shakeel Butt Reviewed-by: Muchun Song Acked-by: Yang Shi Reviewed-by: Roman Gushchin Cc: Johannes Weiner Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index ee5e612b4cd87..613794f6a4335 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -500,9 +500,9 @@ int migrate_page_move_mapping(struct address_space *mapping, __inc_lruvec_state(new_lruvec, NR_SHMEM); } if (dirty && mapping_can_writeback(mapping)) { - __dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY); + __dec_lruvec_state(old_lruvec, NR_FILE_DIRTY); __dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING); - __inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY); + __inc_lruvec_state(new_lruvec, NR_FILE_DIRTY); __inc_zone_state(newzone, NR_ZONE_WRITE_PENDING); } } -- GitLab From 5c447d274f3746fbed6e695e7b9a2d7bd8b31b71 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Sat, 23 Jan 2021 21:01:15 -0800 Subject: [PATCH 1352/2012] mm: fix numa stats for thp migration Currently the kernel is not correctly updating the numa stats for NR_FILE_PAGES and NR_SHMEM on THP migration. Fix that. For NR_FILE_DIRTY and NR_ZONE_WRITE_PENDING, although at the moment there is no need to handle THP migration as kernel still does not have write support for file THP but to be more future proof, this patch adds the THP support for those stats as well. Link: https://lkml.kernel.org/r/20210108155813.2914586-2-shakeelb@google.com Fixes: e71769ae52609 ("mm: enable thp migration for shmem thp") Signed-off-by: Shakeel Butt Acked-by: Yang Shi Reviewed-by: Roman Gushchin Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 613794f6a4335..c0efe921bca5c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -402,6 +402,7 @@ int migrate_page_move_mapping(struct address_space *mapping, struct zone *oldzone, *newzone; int dirty; int expected_count = expected_page_refs(mapping, page) + extra_count; + int nr = thp_nr_pages(page); if (!mapping) { /* Anonymous page without mapping */ @@ -437,7 +438,7 @@ int migrate_page_move_mapping(struct address_space *mapping, */ newpage->index = page->index; newpage->mapping = page->mapping; - page_ref_add(newpage, thp_nr_pages(page)); /* add cache reference */ + page_ref_add(newpage, nr); /* add cache reference */ if (PageSwapBacked(page)) { __SetPageSwapBacked(newpage); if (PageSwapCache(page)) { @@ -459,7 +460,7 @@ int migrate_page_move_mapping(struct address_space *mapping, if (PageTransHuge(page)) { int i; - for (i = 1; i < HPAGE_PMD_NR; i++) { + for (i = 1; i < nr; i++) { xas_next(&xas); xas_store(&xas, newpage); } @@ -470,7 +471,7 @@ int migrate_page_move_mapping(struct address_space *mapping, * to one less reference. * We know this isn't the last reference. */ - page_ref_unfreeze(page, expected_count - thp_nr_pages(page)); + page_ref_unfreeze(page, expected_count - nr); xas_unlock(&xas); /* Leave irq disabled to prevent preemption while updating stats */ @@ -493,17 +494,17 @@ int migrate_page_move_mapping(struct address_space *mapping, old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat); new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat); - __dec_lruvec_state(old_lruvec, NR_FILE_PAGES); - __inc_lruvec_state(new_lruvec, NR_FILE_PAGES); + __mod_lruvec_state(old_lruvec, NR_FILE_PAGES, -nr); + __mod_lruvec_state(new_lruvec, NR_FILE_PAGES, nr); if (PageSwapBacked(page) && !PageSwapCache(page)) { - __dec_lruvec_state(old_lruvec, NR_SHMEM); - __inc_lruvec_state(new_lruvec, NR_SHMEM); + __mod_lruvec_state(old_lruvec, NR_SHMEM, -nr); + __mod_lruvec_state(new_lruvec, NR_SHMEM, nr); } if (dirty && mapping_can_writeback(mapping)) { - __dec_lruvec_state(old_lruvec, NR_FILE_DIRTY); - __dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING); - __inc_lruvec_state(new_lruvec, NR_FILE_DIRTY); - __inc_zone_state(newzone, NR_ZONE_WRITE_PENDING); + __mod_lruvec_state(old_lruvec, NR_FILE_DIRTY, -nr); + __mod_zone_page_state(oldzone, NR_ZONE_WRITE_PENDING, -nr); + __mod_lruvec_state(new_lruvec, NR_FILE_DIRTY, nr); + __mod_zone_page_state(newzone, NR_ZONE_WRITE_PENDING, nr); } } local_irq_enable(); -- GitLab From a11a496ee6e2ab6ed850233c96b94caf042af0b9 Mon Sep 17 00:00:00 2001 From: Lecopzer Chen Date: Sat, 23 Jan 2021 21:01:25 -0800 Subject: [PATCH 1353/2012] kasan: fix unaligned address is unhandled in kasan_remove_zero_shadow During testing kasan_populate_early_shadow and kasan_remove_zero_shadow, if the shadow start and end address in kasan_remove_zero_shadow() is not aligned to PMD_SIZE, the remain unaligned PTE won't be removed. In the test case for kasan_remove_zero_shadow(): shadow_start: 0xffffffb802000000, shadow end: 0xffffffbfbe000000 3-level page table: PUD_SIZE: 0x40000000 PMD_SIZE: 0x200000 PAGE_SIZE: 4K 0xffffffbf80000000 ~ 0xffffffbfbdf80000 will not be removed because in kasan_remove_pud_table(), kasan_pmd_table(*pud) is true but the next address is 0xffffffbfbdf80000 which is not aligned to PUD_SIZE. In the correct condition, this should fallback to the next level kasan_remove_pmd_table() but the condition flow always continue to skip the unaligned part. Fix by correcting the condition when next and addr are neither aligned. Link: https://lkml.kernel.org/r/20210103135621.83129-1-lecopzer@gmail.com Fixes: 0207df4fa1a86 ("kernel/memremap, kasan: make ZONE_DEVICE with work with KASAN") Signed-off-by: Lecopzer Chen Cc: Andrey Ryabinin Cc: Dan Williams Cc: Dmitry Vyukov Cc: Alexander Potapenko Cc: YJ Chiang Cc: Andrey Konovalov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/init.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/mm/kasan/init.c b/mm/kasan/init.c index 7ca0b92d5886d..91049c1b8994c 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -373,9 +373,10 @@ static void kasan_remove_pmd_table(pmd_t *pmd, unsigned long addr, if (kasan_pte_table(*pmd)) { if (IS_ALIGNED(addr, PMD_SIZE) && - IS_ALIGNED(next, PMD_SIZE)) + IS_ALIGNED(next, PMD_SIZE)) { pmd_clear(pmd); - continue; + continue; + } } pte = pte_offset_kernel(pmd, addr); kasan_remove_pte_table(pte, addr, next); @@ -398,9 +399,10 @@ static void kasan_remove_pud_table(pud_t *pud, unsigned long addr, if (kasan_pmd_table(*pud)) { if (IS_ALIGNED(addr, PUD_SIZE) && - IS_ALIGNED(next, PUD_SIZE)) + IS_ALIGNED(next, PUD_SIZE)) { pud_clear(pud); - continue; + continue; + } } pmd = pmd_offset(pud, addr); pmd_base = pmd_offset(pud, 0); @@ -424,9 +426,10 @@ static void kasan_remove_p4d_table(p4d_t *p4d, unsigned long addr, if (kasan_pud_table(*p4d)) { if (IS_ALIGNED(addr, P4D_SIZE) && - IS_ALIGNED(next, P4D_SIZE)) + IS_ALIGNED(next, P4D_SIZE)) { p4d_clear(p4d); - continue; + continue; + } } pud = pud_offset(p4d, addr); kasan_remove_pud_table(pud, addr, next); @@ -457,9 +460,10 @@ void kasan_remove_zero_shadow(void *start, unsigned long size) if (kasan_p4d_table(*pgd)) { if (IS_ALIGNED(addr, PGDIR_SIZE) && - IS_ALIGNED(next, PGDIR_SIZE)) + IS_ALIGNED(next, PGDIR_SIZE)) { pgd_clear(pgd); - continue; + continue; + } } p4d = p4d_offset(pgd, addr); -- GitLab From 5dabd1712cd056814f9ab15f1d68157ceb04e741 Mon Sep 17 00:00:00 2001 From: Lecopzer Chen Date: Sat, 23 Jan 2021 21:01:29 -0800 Subject: [PATCH 1354/2012] kasan: fix incorrect arguments passing in kasan_add_zero_shadow kasan_remove_zero_shadow() shall use original virtual address, start and size, instead of shadow address. Link: https://lkml.kernel.org/r/20210103063847.5963-1-lecopzer@gmail.com Fixes: 0207df4fa1a86 ("kernel/memremap, kasan: make ZONE_DEVICE with work with KASAN") Signed-off-by: Lecopzer Chen Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dan Williams Cc: Dmitry Vyukov Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/init.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/kasan/init.c b/mm/kasan/init.c index 91049c1b8994c..c4605ac9837b0 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -486,7 +486,6 @@ int kasan_add_zero_shadow(void *start, unsigned long size) ret = kasan_populate_early_shadow(shadow_start, shadow_end); if (ret) - kasan_remove_zero_shadow(shadow_start, - size >> KASAN_SHADOW_SCALE_SHIFT); + kasan_remove_zero_shadow(start, size); return ret; } -- GitLab From 76bc99e81a7cb78a78e058107e4b5b1d8ed3c874 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Sat, 23 Jan 2021 21:01:34 -0800 Subject: [PATCH 1355/2012] kasan: fix HW_TAGS boot parameters The initially proposed KASAN command line parameters are redundant. This change drops the complex "kasan.mode=off/prod/full" parameter and adds a simpler kill switch "kasan=off/on" instead. The new parameter together with the already existing ones provides a cleaner way to express the same set of features. The full set of parameters with this change: kasan=off/on - whether KASAN is enabled kasan.fault=report/panic - whether to only print a report or also panic kasan.stacktrace=off/on - whether to collect alloc/free stack traces Default values: kasan=on kasan.fault=report kasan.stacktrace=on (if CONFIG_DEBUG_KERNEL=y) kasan.stacktrace=off (otherwise) Link: https://linux-review.googlesource.com/id/Ib3694ed90b1e8ccac6cf77dfd301847af4aba7b8 Link: https://lkml.kernel.org/r/4e9c4a4bdcadc168317deb2419144582a9be6e61.1610736745.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Vincenzo Frascino Reviewed-by: Marco Elver Cc: Dmitry Vyukov Cc: Alexander Potapenko Cc: Catalin Marinas Cc: Will Deacon Cc: Andrey Ryabinin Cc: Peter Collingbourne Cc: Evgenii Stepanov Cc: Branislav Rankov Cc: Kevin Brodsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dev-tools/kasan.rst | 27 +++-------- mm/kasan/hw_tags.c | 77 +++++++++++++------------------ 2 files changed, 38 insertions(+), 66 deletions(-) diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index 0fc3fb1860c4a..1651d961f06a6 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -160,29 +160,14 @@ intended for use in production as a security mitigation. Therefore it supports boot parameters that allow to disable KASAN competely or otherwise control particular KASAN features. -The things that can be controlled are: +- ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``). -1. Whether KASAN is enabled at all. -2. Whether KASAN collects and saves alloc/free stacks. -3. Whether KASAN panics on a detected bug or not. +- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack + traces collection (default: ``on`` for ``CONFIG_DEBUG_KERNEL=y``, otherwise + ``off``). -The ``kasan.mode`` boot parameter allows to choose one of three main modes: - -- ``kasan.mode=off`` - KASAN is disabled, no tag checks are performed -- ``kasan.mode=prod`` - only essential production features are enabled -- ``kasan.mode=full`` - all KASAN features are enabled - -The chosen mode provides default control values for the features mentioned -above. However it's also possible to override the default values by providing: - -- ``kasan.stacktrace=off`` or ``=on`` - enable alloc/free stack collection - (default: ``on`` for ``mode=full``, - otherwise ``off``) -- ``kasan.fault=report`` or ``=panic`` - only print KASAN report or also panic - (default: ``report``) - -If ``kasan.mode`` parameter is not provided, it defaults to ``full`` when -``CONFIG_DEBUG_KERNEL`` is enabled, and to ``prod`` otherwise. +- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN + report or also panic the kernel (default: ``report``). For developers ~~~~~~~~~~~~~~ diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 55bd6f09c70ff..e529428e7a111 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -19,11 +19,10 @@ #include "kasan.h" -enum kasan_arg_mode { - KASAN_ARG_MODE_DEFAULT, - KASAN_ARG_MODE_OFF, - KASAN_ARG_MODE_PROD, - KASAN_ARG_MODE_FULL, +enum kasan_arg { + KASAN_ARG_DEFAULT, + KASAN_ARG_OFF, + KASAN_ARG_ON, }; enum kasan_arg_stacktrace { @@ -38,7 +37,7 @@ enum kasan_arg_fault { KASAN_ARG_FAULT_PANIC, }; -static enum kasan_arg_mode kasan_arg_mode __ro_after_init; +static enum kasan_arg kasan_arg __ro_after_init; static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init; static enum kasan_arg_fault kasan_arg_fault __ro_after_init; @@ -52,26 +51,24 @@ DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace); /* Whether panic or disable tag checking on fault. */ bool kasan_flag_panic __ro_after_init; -/* kasan.mode=off/prod/full */ -static int __init early_kasan_mode(char *arg) +/* kasan=off/on */ +static int __init early_kasan_flag(char *arg) { if (!arg) return -EINVAL; if (!strcmp(arg, "off")) - kasan_arg_mode = KASAN_ARG_MODE_OFF; - else if (!strcmp(arg, "prod")) - kasan_arg_mode = KASAN_ARG_MODE_PROD; - else if (!strcmp(arg, "full")) - kasan_arg_mode = KASAN_ARG_MODE_FULL; + kasan_arg = KASAN_ARG_OFF; + else if (!strcmp(arg, "on")) + kasan_arg = KASAN_ARG_ON; else return -EINVAL; return 0; } -early_param("kasan.mode", early_kasan_mode); +early_param("kasan", early_kasan_flag); -/* kasan.stack=off/on */ +/* kasan.stacktrace=off/on */ static int __init early_kasan_flag_stacktrace(char *arg) { if (!arg) @@ -113,8 +110,8 @@ void kasan_init_hw_tags_cpu(void) * as this function is only called for MTE-capable hardware. */ - /* If KASAN is disabled, do nothing. */ - if (kasan_arg_mode == KASAN_ARG_MODE_OFF) + /* If KASAN is disabled via command line, don't initialize it. */ + if (kasan_arg == KASAN_ARG_OFF) return; hw_init_tags(KASAN_TAG_MAX); @@ -124,43 +121,28 @@ void kasan_init_hw_tags_cpu(void) /* kasan_init_hw_tags() is called once on boot CPU. */ void __init kasan_init_hw_tags(void) { - /* If hardware doesn't support MTE, do nothing. */ + /* If hardware doesn't support MTE, don't initialize KASAN. */ if (!system_supports_mte()) return; - /* Choose KASAN mode if kasan boot parameter is not provided. */ - if (kasan_arg_mode == KASAN_ARG_MODE_DEFAULT) { - if (IS_ENABLED(CONFIG_DEBUG_KERNEL)) - kasan_arg_mode = KASAN_ARG_MODE_FULL; - else - kasan_arg_mode = KASAN_ARG_MODE_PROD; - } - - /* Preset parameter values based on the mode. */ - switch (kasan_arg_mode) { - case KASAN_ARG_MODE_DEFAULT: - /* Shouldn't happen as per the check above. */ - WARN_ON(1); - return; - case KASAN_ARG_MODE_OFF: - /* If KASAN is disabled, do nothing. */ + /* If KASAN is disabled via command line, don't initialize it. */ + if (kasan_arg == KASAN_ARG_OFF) return; - case KASAN_ARG_MODE_PROD: - static_branch_enable(&kasan_flag_enabled); - break; - case KASAN_ARG_MODE_FULL: - static_branch_enable(&kasan_flag_enabled); - static_branch_enable(&kasan_flag_stacktrace); - break; - } - /* Now, optionally override the presets. */ + /* Enable KASAN. */ + static_branch_enable(&kasan_flag_enabled); switch (kasan_arg_stacktrace) { case KASAN_ARG_STACKTRACE_DEFAULT: + /* + * Default to enabling stack trace collection for + * debug kernels. + */ + if (IS_ENABLED(CONFIG_DEBUG_KERNEL)) + static_branch_enable(&kasan_flag_stacktrace); break; case KASAN_ARG_STACKTRACE_OFF: - static_branch_disable(&kasan_flag_stacktrace); + /* Do nothing, kasan_flag_stacktrace keeps its default value. */ break; case KASAN_ARG_STACKTRACE_ON: static_branch_enable(&kasan_flag_stacktrace); @@ -169,11 +151,16 @@ void __init kasan_init_hw_tags(void) switch (kasan_arg_fault) { case KASAN_ARG_FAULT_DEFAULT: + /* + * Default to no panic on report. + * Do nothing, kasan_flag_panic keeps its default value. + */ break; case KASAN_ARG_FAULT_REPORT: - kasan_flag_panic = false; + /* Do nothing, kasan_flag_panic keeps its default value. */ break; case KASAN_ARG_FAULT_PANIC: + /* Enable panic on report. */ kasan_flag_panic = true; break; } -- GitLab From ce5716c618524241a3cea821e18ee1e0d16f6c70 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Sat, 23 Jan 2021 21:01:38 -0800 Subject: [PATCH 1356/2012] kasan, mm: fix conflicts with init_on_alloc/free A few places where SLUB accesses object's data or metadata were missed in a previous patch. This leads to false positives with hardware tag-based KASAN when bulk allocations are used with init_on_alloc/free. Fix the false-positives by resetting pointer tags during these accesses. (The kasan_reset_tag call is removed from slab_alloc_node, as it's added into maybe_wipe_obj_freeptr.) Link: https://linux-review.googlesource.com/id/I50dd32838a666e173fe06c3c5c766f2c36aae901 Link: https://lkml.kernel.org/r/093428b5d2ca8b507f4a79f92f9929b35f7fada7.1610731872.git.andreyknvl@google.com Fixes: aa1ef4d7b3f67 ("kasan, mm: reset tags when accessing metadata") Signed-off-by: Andrey Konovalov Reported-by: Dmitry Vyukov Acked-by: Vlastimil Babka Cc: Catalin Marinas Cc: Vincenzo Frascino Cc: Alexander Potapenko Cc: Marco Elver Cc: Will Deacon Cc: Andrey Ryabinin Cc: Peter Collingbourne Cc: Evgenii Stepanov Cc: Branislav Rankov Cc: Kevin Brodsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index d9e4e10683cc1..69742ab9a21d3 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2791,7 +2791,8 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, void *obj) { if (unlikely(slab_want_init_on_free(s)) && obj) - memset((void *)((char *)obj + s->offset), 0, sizeof(void *)); + memset((void *)((char *)kasan_reset_tag(obj) + s->offset), + 0, sizeof(void *)); } /* @@ -2883,7 +2884,7 @@ redo: stat(s, ALLOC_FASTPATH); } - maybe_wipe_obj_freeptr(s, kasan_reset_tag(object)); + maybe_wipe_obj_freeptr(s, object); if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) memset(kasan_reset_tag(object), 0, s->object_size); @@ -3329,7 +3330,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, int j; for (j = 0; j < i; j++) - memset(p[j], 0, s->object_size); + memset(kasan_reset_tag(p[j]), 0, s->object_size); } /* memcg and kmem_cache debug support */ -- GitLab From acb35b177c71d3d39b9a3b9ea213d926235066e3 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Sat, 23 Jan 2021 21:01:43 -0800 Subject: [PATCH 1357/2012] kasan, mm: fix resetting page_alloc tags for HW_TAGS A previous commit added resetting KASAN page tags to kernel_init_free_pages() to avoid false-positives due to accesses to metadata with the hardware tag-based mode. That commit did reset page tags before the metadata access, but didn't restore them after. As the result, KASAN fails to detect bad accesses to page_alloc allocations on some configurations. Fix this by recovering the tag after the metadata access. Link: https://lkml.kernel.org/r/02b5bcd692e912c27d484030f666b350ad7e4ae4.1611074450.git.andreyknvl@google.com Fixes: aa1ef4d7b3f6 ("kasan, mm: reset tags when accessing metadata") Signed-off-by: Andrey Konovalov Cc: Dmitry Vyukov Cc: Alexander Potapenko Cc: Marco Elver Cc: Catalin Marinas Cc: Will Deacon Cc: Vincenzo Frascino Cc: Andrey Ryabinin Cc: Peter Collingbourne Cc: Evgenii Stepanov Cc: Branislav Rankov Cc: Kevin Brodsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 85ecaa6d0d064..783913e41f65d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1207,8 +1207,10 @@ static void kernel_init_free_pages(struct page *page, int numpages) /* s390's use of memset() could override KASAN redzones. */ kasan_disable_current(); for (i = 0; i < numpages; i++) { + u8 tag = page_kasan_tag(page + i); page_kasan_tag_reset(page + i); clear_highpage(page + i); + page_kasan_tag_set(page + i, tag); } kasan_enable_current(); } -- GitLab From 251b5497c5c95e4548e3d33cbda3f638fea2c11e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 23 Jan 2021 21:01:48 -0800 Subject: [PATCH 1358/2012] ubsan: disable unsigned-overflow check for i386 Building ubsan kernels even for compile-testing introduced these warnings in my randconfig environment: crypto/blake2b_generic.c:98:13: error: stack frame size of 9636 bytes in function 'blake2b_compress' [-Werror,-Wframe-larger-than=] static void blake2b_compress(struct blake2b_state *S, crypto/sha512_generic.c:151:13: error: stack frame size of 1292 bytes in function 'sha512_generic_block_fn' [-Werror,-Wframe-larger-than=] static void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src, lib/crypto/curve25519-fiat32.c:312:22: error: stack frame size of 2180 bytes in function 'fe_mul_impl' [-Werror,-Wframe-larger-than=] static noinline void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10]) lib/crypto/curve25519-fiat32.c:444:22: error: stack frame size of 1588 bytes in function 'fe_sqr_impl' [-Werror,-Wframe-larger-than=] static noinline void fe_sqr_impl(u32 out[10], const u32 in1[10]) Further testing showed that this is caused by -fsanitize=unsigned-integer-overflow, but is isolated to the 32-bit x86 architecture. The one in blake2b immediately overflows the 8KB stack area architectures, so better ensure this never happens by disabling the option for 32-bit x86. Link: https://lkml.kernel.org/r/20210112202922.2454435-1-arnd@kernel.org Link: https://lore.kernel.org/lkml/20201230154749.746641-1-arnd@kernel.org/ Fixes: d0a3ac549f38 ("ubsan: enable for all*config builds") Signed-off-by: Arnd Bergmann Acked-by: Kees Cook Reviewed-by: Nathan Chancellor Cc: Nick Desaulniers Cc: Stephen Rothwell Cc: Marco Elver Cc: George Popescu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.ubsan | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 8b635fd75fe41..3a0b1c930733a 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -123,6 +123,7 @@ config UBSAN_SIGNED_OVERFLOW config UBSAN_UNSIGNED_OVERFLOW bool "Perform checking for unsigned arithmetic overflow" depends on $(cc-option,-fsanitize=unsigned-integer-overflow) + depends on !X86_32 # avoid excessive stack usage on x86-32/clang help This option enables -fsanitize=unsigned-integer-overflow which checks for overflow of any arithmetic operations with unsigned integers. This -- GitLab From dad4e5b390866ca902653df0daa864ae4b8d4147 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 23 Jan 2021 21:01:52 -0800 Subject: [PATCH 1359/2012] mm: fix page reference leak in soft_offline_page() The conversion to move pfn_to_online_page() internal to soft_offline_page() missed that the get_user_pages() reference taken by the madvise() path needs to be dropped when pfn_to_online_page() fails. Note the direct sysfs-path to soft_offline_page() does not perform a get_user_pages() lookup. When soft_offline_page() is handed a pfn_valid() && !pfn_to_online_page() pfn the kernel hangs at dax-device shutdown due to a leaked reference. Link: https://lkml.kernel.org/r/161058501210.1840162.8108917599181157327.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: feec24a6139d ("mm, soft-offline: convert parameter to pfn") Signed-off-by: Dan Williams Reviewed-by: David Hildenbrand Reviewed-by: Oscar Salvador Reviewed-by: Naoya Horiguchi Cc: Michal Hocko Cc: Qian Cai Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 04d9f154a130d..e9481632fcd1b 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1885,6 +1885,12 @@ static int soft_offline_free_page(struct page *page) return rc; } +static void put_ref_page(struct page *page) +{ + if (page) + put_page(page); +} + /** * soft_offline_page - Soft offline a page. * @pfn: pfn to soft-offline @@ -1910,20 +1916,26 @@ static int soft_offline_free_page(struct page *page) int soft_offline_page(unsigned long pfn, int flags) { int ret; - struct page *page; bool try_again = true; + struct page *page, *ref_page = NULL; + + WARN_ON_ONCE(!pfn_valid(pfn) && (flags & MF_COUNT_INCREASED)); if (!pfn_valid(pfn)) return -ENXIO; + if (flags & MF_COUNT_INCREASED) + ref_page = pfn_to_page(pfn); + /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ page = pfn_to_online_page(pfn); - if (!page) + if (!page) { + put_ref_page(ref_page); return -EIO; + } if (PageHWPoison(page)) { pr_info("%s: %#lx page already poisoned\n", __func__, pfn); - if (flags & MF_COUNT_INCREASED) - put_page(page); + put_ref_page(ref_page); return 0; } -- GitLab From f99e02372af2e7ee72a6da497712ec9152964347 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 23 Jan 2021 21:01:57 -0800 Subject: [PATCH 1360/2012] sparc/mm/highmem: flush cache and TLB Patch series "mm/highmem: Fix fallout from generic kmap_local conversions". The kmap_local conversion wreckaged sparc, mips and powerpc as it missed some of the details in the original implementation. This patch (of 4): The recent conversion to the generic kmap_local infrastructure failed to assign the proper pre/post map/unmap flush operations for sparc. Sparc requires cache flush before map/unmap and tlb flush afterwards. Link: https://lkml.kernel.org/r/20210112170136.078559026@linutronix.de Link: https://lkml.kernel.org/r/20210112170410.905976187@linutronix.de Fixes: 3293efa97807 ("sparc/mm/highmem: Switch to generic kmap atomic") Signed-off-by: Thomas Gleixner Reported-by: Andreas Larsson Cc: "David S. Miller" Cc: Peter Zijlstra Cc: Paul Cercueil Cc: Thomas Bogendoerfer Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/highmem.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/sparc/include/asm/highmem.h b/arch/sparc/include/asm/highmem.h index 875116209ec14..c7b2e208328b5 100644 --- a/arch/sparc/include/asm/highmem.h +++ b/arch/sparc/include/asm/highmem.h @@ -50,10 +50,11 @@ extern pte_t *pkmap_page_table; #define flush_cache_kmaps() flush_cache_all() -/* FIXME: Use __flush_tlb_one(vaddr) instead of flush_cache_all() -- Anton */ -#define arch_kmap_local_post_map(vaddr, pteval) flush_cache_all() -#define arch_kmap_local_post_unmap(vaddr) flush_cache_all() - +/* FIXME: Use __flush_*_one(vaddr) instead of flush_*_all() -- Anton */ +#define arch_kmap_local_pre_map(vaddr, pteval) flush_cache_all() +#define arch_kmap_local_pre_unmap(vaddr) flush_cache_all() +#define arch_kmap_local_post_map(vaddr, pteval) flush_tlb_all() +#define arch_kmap_local_post_unmap(vaddr) flush_tlb_all() #endif /* __KERNEL__ */ -- GitLab From a1dce7fd2ade8e71e5f95e58b99aa512607f52b0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 23 Jan 2021 21:02:02 -0800 Subject: [PATCH 1361/2012] mm/highmem: prepare for overriding set_pte_at() The generic kmap_local() map function uses set_pte_at(), but MIPS requires set_pte() and PowerPC wants __set_pte_at(). Provide arch_kmap_local_set_pte() and default it to set_pte_at(). Link: https://lkml.kernel.org/r/20210112170411.056306194@linutronix.de Signed-off-by: Thomas Gleixner Cc: Andreas Larsson Cc: "David S. Miller" Cc: Michael Ellerman Cc: Paul Cercueil Cc: Peter Zijlstra Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/highmem.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/highmem.c b/mm/highmem.c index c3a9ea7875ef8..874b732b120ce 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -473,6 +473,11 @@ static inline void *arch_kmap_local_high_get(struct page *page) } #endif +#ifndef arch_kmap_local_set_pte +#define arch_kmap_local_set_pte(mm, vaddr, ptep, ptev) \ + set_pte_at(mm, vaddr, ptep, ptev) +#endif + /* Unmap a local mapping which was obtained by kmap_high_get() */ static inline bool kmap_high_unmap_local(unsigned long vaddr) { @@ -515,7 +520,7 @@ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot) vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte - idx))); pteval = pfn_pte(pfn, prot); - set_pte_at(&init_mm, vaddr, kmap_pte - idx, pteval); + arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte - idx, pteval); arch_kmap_local_post_map(vaddr, pteval); current->kmap_ctrl.pteval[kmap_local_idx()] = pteval; preempt_enable(); -- GitLab From 8c0d5d78f3596e203e9cd27563a8380649c03ad0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 23 Jan 2021 21:02:07 -0800 Subject: [PATCH 1362/2012] mips/mm/highmem: use set_pte() for kmap_local() set_pte_at() on MIPS invokes update_cache() which might recurse into kmap_local(). Use set_pte() like the original MIPS highmem implementation did. Link: https://lkml.kernel.org/r/20210112170411.187513575@linutronix.de Fixes: a4c33e83bca1 ("mips/mm/highmem: Switch to generic kmap atomic") Signed-off-by: Thomas Gleixner Reported-by: Paul Cercueil Reported-by: Thomas Bogendoerfer Acked-by: Thomas Bogendoerfer Cc: Andreas Larsson Cc: "David S. Miller" Cc: Michael Ellerman Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/include/asm/highmem.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/include/asm/highmem.h b/arch/mips/include/asm/highmem.h index 19edf8e699712..292d0425717f3 100644 --- a/arch/mips/include/asm/highmem.h +++ b/arch/mips/include/asm/highmem.h @@ -51,6 +51,7 @@ extern void kmap_flush_tlb(unsigned long addr); #define flush_cache_kmaps() BUG_ON(cpu_has_dc_aliases) +#define arch_kmap_local_set_pte(mm, vaddr, ptep, ptev) set_pte(ptep, ptev) #define arch_kmap_local_post_map(vaddr, pteval) local_flush_tlb_one(vaddr) #define arch_kmap_local_post_unmap(vaddr) local_flush_tlb_one(vaddr) -- GitLab From 785025820a6a565185ce9d47fdd8d23dbf91dee8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 23 Jan 2021 21:02:11 -0800 Subject: [PATCH 1363/2012] powerpc/mm/highmem: use __set_pte_at() for kmap_local() The original PowerPC highmem mapping function used __set_pte_at() to denote that the mapping is per CPU. This got lost with the conversion to the generic implementation. Override the default map function. Link: https://lkml.kernel.org/r/20210112170411.281464308@linutronix.de Fixes: 47da42b27a56 ("powerpc/mm/highmem: Switch to generic kmap atomic") Signed-off-by: Thomas Gleixner Cc: Michael Ellerman Cc: Andreas Larsson Cc: "David S. Miller" Cc: Paul Cercueil Cc: Peter Zijlstra Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/highmem.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h index 80a5ae771c653..c0fcd1bbdba98 100644 --- a/arch/powerpc/include/asm/highmem.h +++ b/arch/powerpc/include/asm/highmem.h @@ -58,6 +58,8 @@ extern pte_t *pkmap_page_table; #define flush_cache_kmaps() flush_cache_all() +#define arch_kmap_local_set_pte(mm, vaddr, ptep, ptev) \ + __set_pte_at(mm, vaddr, ptep, ptev, 1) #define arch_kmap_local_post_map(vaddr, pteval) \ local_flush_tlb_page(NULL, vaddr) #define arch_kmap_local_post_unmap(vaddr) \ -- GitLab From 697edcb0e4eadc41645fe88c991fe6a206b1a08d Mon Sep 17 00:00:00 2001 From: Xiaoming Ni Date: Sat, 23 Jan 2021 21:02:16 -0800 Subject: [PATCH 1364/2012] proc_sysctl: fix oops caused by incorrect command parameters The process_sysctl_arg() does not check whether val is empty before invoking strlen(val). If the command line parameter () is incorrectly configured and val is empty, oops is triggered. For example: "hung_task_panic=1" is incorrectly written as "hung_task_panic", oops is triggered. The call stack is as follows: Kernel command line: .... hung_task_panic ...... Call trace: __pi_strlen+0x10/0x98 parse_args+0x278/0x344 do_sysctl_args+0x8c/0xfc kernel_init+0x5c/0xf4 ret_from_fork+0x10/0x30 To fix it, check whether "val" is empty when "phram" is a sysctl field. Error codes are returned in the failure branch, and error logs are generated by parse_args(). Link: https://lkml.kernel.org/r/20210118133029.28580-1-nixiaoming@huawei.com Fixes: 3db978d480e2843 ("kernel/sysctl: support setting sysctl parameters from kernel command line") Signed-off-by: Xiaoming Ni Acked-by: Vlastimil Babka Cc: Luis Chamberlain Cc: Kees Cook Cc: Iurii Zaikin Cc: Alexey Dobriyan Cc: Michal Hocko Cc: Masami Hiramatsu Cc: Heiner Kallweit Cc: Randy Dunlap Cc: [5.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_sysctl.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 317899222d7fd..d2018f70d1fae 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1770,6 +1770,12 @@ static int process_sysctl_arg(char *param, char *val, return 0; } + if (!val) + return -EINVAL; + len = strlen(val); + if (len == 0) + return -EINVAL; + /* * To set sysctl options, we use a temporary mount of proc, look up the * respective sys/ file and write to it. To avoid mounting it when no @@ -1811,7 +1817,6 @@ static int process_sysctl_arg(char *param, char *val, file, param, val); goto out; } - len = strlen(val); wret = kernel_write(file, val, len, &pos); if (wret < 0) { err = wret; -- GitLab From e82d891a63afebefde5d26971768f5cb91627f73 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 23 Jan 2021 21:02:21 -0800 Subject: [PATCH 1365/2012] MAINTAINERS: add a couple more files to the Clang/LLVM section The K: entry should ensure that Nick and I always get CC'd on patches that touch these files but it is better to be explicit rather than implicit. Link: https://lkml.kernel.org/r/20210114004059.2129921-1-natechancellor@gmail.com Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index f79ec98bbb291..91c6dee7850e9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4311,7 +4311,9 @@ W: https://clangbuiltlinux.github.io/ B: https://github.com/ClangBuiltLinux/linux/issues C: irc://chat.freenode.net/clangbuiltlinux F: Documentation/kbuild/llvm.rst +F: include/linux/compiler-clang.h F: scripts/clang-tools/ +F: scripts/clang-version.sh F: scripts/lld-version.sh K: \b(?i:clang|llvm)\b -- GitLab From 6ee1d745b7c9fd573fba142a2efdad76a9f1cb04 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 Jan 2021 16:47:14 -0800 Subject: [PATCH 1366/2012] Linux 5.11-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b0e4767735dca..e0af7a4a55985 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- GitLab From 84965ff8a84f0368b154c9b367b62e59c1193f30 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 23 Jan 2021 15:51:11 -0700 Subject: [PATCH 1367/2012] io_uring: if we see flush on exit, cancel related tasks Ensure we match tasks that belong to a dead or dying task as well, as we need to reap those in addition to those belonging to the exiting task. Cc: stable@vger.kernel.org # 5.9+ Reported-by: Josef Grieb Signed-off-by: Jens Axboe --- fs/io_uring.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index c07913ec0ccac..695fe00bafdcd 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1069,8 +1069,12 @@ static bool io_match_task(struct io_kiocb *head, { struct io_kiocb *req; - if (task && head->task != task) + if (task && head->task != task) { + /* in terms of cancelation, always match if req task is dead */ + if (head->task->flags & PF_EXITING) + return true; return false; + } if (!files) return true; @@ -9136,6 +9140,9 @@ static int io_uring_flush(struct file *file, void *data) struct io_uring_task *tctx = current->io_uring; struct io_ring_ctx *ctx = file->private_data; + if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) + io_uring_cancel_task_requests(ctx, NULL); + if (!tctx) return 0; -- GitLab From b18032bb0a883cd7edd22a7fe6c57e1059b81ed0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 24 Jan 2021 16:58:56 -0700 Subject: [PATCH 1368/2012] io_uring: only call io_cqring_ev_posted() if events were posted This normally doesn't cause any extra harm, but it does mean that we'll increment the eventfd notification count, if one has been registered with the ring. This can confuse applications, when they see more notifications on the eventfd side than are available in the ring. Do the nice thing and only increment this count, if we actually posted (or even overflowed) events. Reported-and-tested-by: Dan Melnic Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 695fe00bafdcd..2166c469789d5 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1779,12 +1779,13 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, struct io_kiocb *req, *tmp; struct io_uring_cqe *cqe; unsigned long flags; - bool all_flushed; + bool all_flushed, posted; LIST_HEAD(list); if (!force && __io_cqring_events(ctx) == rings->cq_ring_entries) return false; + posted = false; spin_lock_irqsave(&ctx->completion_lock, flags); list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) { if (!io_match_task(req, tsk, files)) @@ -1804,6 +1805,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, WRITE_ONCE(ctx->rings->cq_overflow, ctx->cached_cq_overflow); } + posted = true; } all_flushed = list_empty(&ctx->cq_overflow_list); @@ -1813,9 +1815,11 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; } - io_commit_cqring(ctx); + if (posted) + io_commit_cqring(ctx); spin_unlock_irqrestore(&ctx->completion_lock, flags); - io_cqring_ev_posted(ctx); + if (posted) + io_cqring_ev_posted(ctx); while (!list_empty(&list)) { req = list_first_entry(&list, struct io_kiocb, compl.list); -- GitLab From 814b84971388cd5fb182f2e914265b3827758455 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 21 Jan 2021 16:34:37 -0500 Subject: [PATCH 1369/2012] pNFS/NFSv4: Fix a layout segment leak in pnfs_layout_process() If the server returns a new stateid that does not match the one in our cache, then pnfs_layout_process() will leak the layout segments returned by pnfs_mark_layout_stateid_invalid(). Fixes: 9888d837f3cf ("pNFS: Force a retry of LAYOUTGET if the stateid doesn't match our cache") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4f274f21c4abc..e68e6f8cb4079 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2417,6 +2417,7 @@ out_forget: spin_unlock(&ino->i_lock); lseg->pls_layout = lo; NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); + pnfs_free_lseg_list(&free_me); return ERR_PTR(-EAGAIN); } -- GitLab From 08bd8dbe88825760e953759d7ec212903a026c75 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 21 Jan 2021 17:11:42 -0500 Subject: [PATCH 1370/2012] pNFS/NFSv4: Try to return invalid layout in pnfs_layout_process() If the server returns a new stateid that does not match the one in our cache, then try to return the one we hold instead of just invalidating it on the client side. This ensures that both client and server will agree that the stateid is invalid. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e68e6f8cb4079..d6262289cf4aa 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2398,7 +2398,13 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) * We got an entirely new state ID. Mark all segments for the * inode invalid, and retry the layoutget */ - pnfs_mark_layout_stateid_invalid(lo, &free_me); + struct pnfs_layout_range range = { + .iomode = IOMODE_ANY, + .length = NFS4_MAX_UINT64, + }; + pnfs_set_plh_return_info(lo, IOMODE_ANY, 0); + pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, + &range, 0); goto out_forget; } @@ -2417,7 +2423,6 @@ out_forget: spin_unlock(&ino->i_lock); lseg->pls_layout = lo; NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); - pnfs_free_lseg_list(&free_me); return ERR_PTR(-EAGAIN); } -- GitLab From 1bcf34fdac5f8c2fcd16796495db75744612ca27 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 21 Jan 2021 13:51:50 -0500 Subject: [PATCH 1371/2012] pNFS/NFSv4: Update the layout barrier when we schedule a layoutreturn When we're scheduling a layoutreturn, we need to ignore any further incoming layouts with sequence ids that are going to be affected by the layout return. Fixes: 44ea8dfce021 ("NFS/pnfs: Reference the layout cred in pnfs_prepare_layoutreturn()") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d6262289cf4aa..acb63ec000539 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -324,6 +324,21 @@ pnfs_grab_inode_layout_hdr(struct pnfs_layout_hdr *lo) return NULL; } +/* + * Compare 2 layout stateid sequence ids, to see which is newer, + * taking into account wraparound issues. + */ +static bool pnfs_seqid_is_newer(u32 s1, u32 s2) +{ + return (s32)(s1 - s2) > 0; +} + +static void pnfs_barrier_update(struct pnfs_layout_hdr *lo, u32 newseq) +{ + if (pnfs_seqid_is_newer(newseq, lo->plh_barrier)) + lo->plh_barrier = newseq; +} + static void pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, u32 seq) @@ -335,6 +350,7 @@ pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, if (seq != 0) { WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); lo->plh_return_seq = seq; + pnfs_barrier_update(lo, seq); } } @@ -639,15 +655,6 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, return rv; } -/* - * Compare 2 layout stateid sequence ids, to see which is newer, - * taking into account wraparound issues. - */ -static bool pnfs_seqid_is_newer(u32 s1, u32 s2) -{ - return (s32)(s1 - s2) > 0; -} - static bool pnfs_should_free_range(const struct pnfs_layout_range *lseg_range, const struct pnfs_layout_range *recall_range) @@ -984,8 +991,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, new_barrier = be32_to_cpu(new->seqid); else if (new_barrier == 0) return; - if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) - lo->plh_barrier = new_barrier; + pnfs_barrier_update(lo, new_barrier); } static bool @@ -1183,20 +1189,17 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, return false; set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); pnfs_get_layout_hdr(lo); + nfs4_stateid_copy(stateid, &lo->plh_stateid); + *cred = get_cred(lo->plh_lc_cred); if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { - nfs4_stateid_copy(stateid, &lo->plh_stateid); - *cred = get_cred(lo->plh_lc_cred); if (lo->plh_return_seq != 0) stateid->seqid = cpu_to_be32(lo->plh_return_seq); if (iomode != NULL) *iomode = lo->plh_return_iomode; pnfs_clear_layoutreturn_info(lo); - return true; - } - nfs4_stateid_copy(stateid, &lo->plh_stateid); - *cred = get_cred(lo->plh_lc_cred); - if (iomode != NULL) + } else if (iomode != NULL) *iomode = IOMODE_ANY; + pnfs_barrier_update(lo, be32_to_cpu(stateid->seqid)); return true; } -- GitLab From d29b468da4f940bd2bff2628ba8d2d652671d244 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 22 Jan 2021 10:05:51 -0500 Subject: [PATCH 1372/2012] pNFS/NFSv4: Improve rejection of out-of-order layouts If a layoutget ends up being reordered w.r.t. a layoutreturn, e.g. due to a layoutget-on-open not knowing a priori which file to lock, then we must assume the layout is no longer being considered valid state by the server. Incrementally improve our ability to reject such states by using the cached old stateid in conjunction with the plh_barrier to try to identify them. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index acb63ec000539..af64b4e6fd1ff 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1000,7 +1000,7 @@ pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, { u32 seqid = be32_to_cpu(stateid->seqid); - return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); + return !pnfs_seqid_is_newer(seqid, lo->plh_barrier) && lo->plh_barrier; } /* lget is set to 1 if called from inside send_layoutget call chain */ @@ -1912,6 +1912,11 @@ static void nfs_layoutget_end(struct pnfs_layout_hdr *lo) wake_up_var(&lo->plh_outstanding); } +static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo) +{ + return test_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags); +} + static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) { unsigned long *bitlock = &lo->plh_flags; @@ -2386,17 +2391,17 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out_forget; } - if (!pnfs_layout_is_valid(lo)) { - /* We have a completely new layout */ - pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, true); - } else if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { + if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { /* existing state ID, make sure the sequence number matches. */ if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { + if (!pnfs_layout_is_valid(lo) && + pnfs_is_first_layoutget(lo)) + lo->plh_barrier = 0; dprintk("%s forget reply due to sequence\n", __func__); goto out_forget; } pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, false); - } else { + } else if (pnfs_layout_is_valid(lo)) { /* * We got an entirely new state ID. Mark all segments for the * inode invalid, and retry the layoutget @@ -2409,6 +2414,11 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, &range, 0); goto out_forget; + } else { + /* We have a completely new layout */ + if (!pnfs_is_first_layoutget(lo)) + goto out_forget; + pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, true); } pnfs_get_lseg(lseg); -- GitLab From 2569063c7140c65a0d0ad075e95ddfbcda9ba3c0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 27 Dec 2020 19:34:58 +0800 Subject: [PATCH 1373/2012] blk-mq: test QUEUE_FLAG_HCTX_ACTIVE for sbitmap_shared in hctx_may_queue In case of blk_mq_is_sbitmap_shared(), we should test QUEUE_FLAG_HCTX_ACTIVE against q->queue_flags instead of BLK_MQ_S_TAG_ACTIVE. So fix it. Cc: John Garry Cc: Kashyap Desai Fixes: f1b49fdc1c64 ("blk-mq: Record active_queues_shared_sbitmap per tag_set for when using shared sbitmap") Signed-off-by: Ming Lei Reviewed-by: John Garry Signed-off-by: Jens Axboe --- block/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq.h b/block/blk-mq.h index c1458d9502f1c..3616453ca28c8 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -304,7 +304,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, struct request_queue *q = hctx->queue; struct blk_mq_tag_set *set = q->tag_set; - if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &q->queue_flags)) + if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) return true; users = atomic_read(&set->active_queues_shared_sbitmap); } else { -- GitLab From ef49d40b61a3e18a11edd5eb1c30b0183af9e850 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sun, 17 Jan 2021 16:50:17 +0800 Subject: [PATCH 1374/2012] block: Fix an error handling in add_partition Once we have called device_initialize(), we should use put_device() to give up the reference on error, just like what we have done on failure of device_add(). Signed-off-by: Dinghao Liu Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/partitions/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/partitions/core.c b/block/partitions/core.c index e7d776db803b1..23460cee9de50 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -384,7 +384,7 @@ static struct block_device *add_partition(struct gendisk *disk, int partno, err = blk_alloc_devt(bdev, &devt); if (err) - goto out_bdput; + goto out_put; pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ -- GitLab From 794c613383433ffc4fceec8eaa081b9f1962e287 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 18 Jan 2021 21:45:23 +0800 Subject: [PATCH 1375/2012] HID: multitouch: Apply MT_QUIRK_CONFIDENCE quirk for multi-input devices Palm ejection stops working on some Elan and Synaptics touchpad after commit 40d5bb87377a ("HID: multitouch: enable multi-input as a quirk for some devices"). The commit changes the mt_class from MT_CLS_WIN_8 to MT_CLS_WIN_8_FORCE_MULTI_INPUT, so MT_QUIRK_CONFIDENCE isn't applied anymore. So also apply the quirk since MT_CLS_WIN_8_FORCE_MULTI_INPUT is essentially MT_CLS_WIN_8. Fixes: 40d5bb87377a ("HID: multitouch: enable multi-input as a quirk for some devices") Cc: stable@vger.kernel.org Signed-off-by: Kai-Heng Feng Signed-off-by: Benjamin Tissoires --- drivers/hid/hid-multitouch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 0743ef51d3b24..8429ebe7097e4 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -758,7 +758,8 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi, MT_STORE_FIELD(inrange_state); return 1; case HID_DG_CONFIDENCE: - if (cls->name == MT_CLS_WIN_8 && + if ((cls->name == MT_CLS_WIN_8 || + cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT) && (field->application == HID_DG_TOUCHPAD || field->application == HID_DG_TOUCHSCREEN)) app->quirks |= MT_QUIRK_CONFIDENCE; -- GitLab From 08d60e5999540110576e7c1346d486220751b7f9 Mon Sep 17 00:00:00 2001 From: John Ogness Date: Sun, 24 Jan 2021 21:33:28 +0106 Subject: [PATCH 1376/2012] printk: fix string termination for record_print_text() Commit f0e386ee0c0b ("printk: fix buffer overflow potential for print_text()") added string termination in record_print_text(). However it used the wrong base pointer for adding the terminator. This led to a 0-byte being written somewhere beyond the buffer. Use the correct base pointer when adding the terminator. Fixes: f0e386ee0c0b ("printk: fix buffer overflow potential for print_text()") Reported-by: Sven Schnelle Signed-off-by: John Ogness Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20210124202728.4718-1-john.ogness@linutronix.de --- kernel/printk/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 17fa6dc77053b..c55cd18206895 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1399,7 +1399,7 @@ static size_t record_print_text(struct printk_record *r, bool syslog, * not counted in the return value. */ if (buf_size > 0) - text[len] = 0; + r->text_buf[len] = 0; return len; } -- GitLab From c5b5ff607d6fe5f4284acabd07066f96ecf96ac4 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Mon, 25 Jan 2021 10:30:51 +0200 Subject: [PATCH 1377/2012] ALSA: hda: intel-dsp-config: add PCI id for TGL-H Adding PCI id for TGL-H. Like for other TGL platforms, SOF is used if Soundwire codecs or PCH-DMIC is detected. Signed-off-by: Bard Liao Reviewed-by: Xiuli Pan Reviewed-by: Libin Yang Signed-off-by: Kai Vehmanen Link: https://lore.kernel.org/r/20210125083051.828205-1-kai.vehmanen@linux.intel.com Signed-off-by: Takashi Iwai --- sound/hda/intel-dsp-config.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index 6a0d070c60c9a..c456861725172 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -307,6 +307,10 @@ static const struct config_entry config_table[] = { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0xa0c8, }, + { + .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, + .device = 0x43c8, + }, #endif /* Elkhart Lake */ -- GitLab From 78e5330329ee206d6aa4593a90320fd837f7966e Mon Sep 17 00:00:00 2001 From: Dom Cobley Date: Thu, 21 Jan 2021 11:57:58 +0100 Subject: [PATCH 1378/2012] drm/vc4: Correct lbm size and calculation LBM base address is measured in units of pixels per cycle. That is 4 for 2711 (hvs5) and 2 for 2708. We are wasting 75% of lbm by indexing without the scaling. But we were also using too high a size for the lbm resulting in partial corruption (right hand side) of vertically scaled images, usually at 4K or lower resolutions with more layers. The physical RAM of LBM on 2711 is 8 * 1920 * 16 * 12-bit (pixels are stored 12-bits per component regardless of format). The LBM address indexes work in units of pixels per clock, so for 4 pixels per clock that means we have 32 * 1920 = 60K Fixes: c54619b0bfb3 ("drm/vc4: Add support for the BCM2711 HVS5") Signed-off-by: Dom Cobley Signed-off-by: Maxime Ripard Reviewed-by: Dave Stevenson Tested-By: Lucas Nussbaum Tested-By: Ryutaroh Matsumoto Link: https://patchwork.freedesktop.org/patch/msgid/20210121105759.1262699-1-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_hvs.c | 8 ++++---- drivers/gpu/drm/vc4/vc4_plane.c | 7 ++++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index cccd341e5d670..3b722252d1fbe 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -620,11 +620,11 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) * for now we just allocate globally. */ if (!hvs->hvs5) - /* 96kB */ - drm_mm_init(&hvs->lbm_mm, 0, 96 * 1024); + /* 48k words of 2x12-bit pixels */ + drm_mm_init(&hvs->lbm_mm, 0, 48 * 1024); else - /* 70k words */ - drm_mm_init(&hvs->lbm_mm, 0, 70 * 2 * 1024); + /* 60k words of 4x12-bit pixels */ + drm_mm_init(&hvs->lbm_mm, 0, 60 * 1024); /* Upload filter kernels. We only have the one for now, so we * keep it around for the lifetime of the driver. diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 6b39cc2ca18d0..1a4369c1fb164 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -437,6 +437,7 @@ static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst) static u32 vc4_lbm_size(struct drm_plane_state *state) { struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); u32 pix_per_line; u32 lbm; @@ -472,7 +473,11 @@ static u32 vc4_lbm_size(struct drm_plane_state *state) lbm = pix_per_line * 16; } - lbm = roundup(lbm, 32); + /* Align it to 64 or 128 (hvs5) bytes */ + lbm = roundup(lbm, vc4->hvs->hvs5 ? 128 : 64); + + /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */ + lbm /= vc4->hvs->hvs5 ? 4 : 2; return lbm; } -- GitLab From f6b57101a6b31277a4bde1d8028c46e898bd2ff2 Mon Sep 17 00:00:00 2001 From: Dom Cobley Date: Thu, 21 Jan 2021 11:57:59 +0100 Subject: [PATCH 1379/2012] drm/vc4: Correct POS1_SCL for hvs5 Fixes failure with 4096x1080 resolutions [ 284.315379] WARNING: CPU: 1 PID: 901 at drivers/gpu/drm/vc4/vc4_plane.c:981 vc4_plane_mode_set+0x1374/0x13c4 [ 284.315385] Modules linked in: ir_rc5_decoder rpivid_hevc(C) bcm2835_codec(C) bcm2835_isp(C) bcm2835_mmal_vchiq(C) bcm2835_gpiomem v4l2_mem2mem videobuf2_dma_contig videobuf2_memops videobuf2_v4l2 videobuf2_common videodev mc cdc_acm xpad ir_rc6_decoder rc_rc6_mce gpio_ir_recv fuse [ 284.315509] CPU: 1 PID: 901 Comm: kodi.bin Tainted: G C 5.10.7 #1 [ 284.315514] Hardware name: BCM2711 [ 284.315518] Backtrace: [ 284.315533] [] (dump_backtrace) from [] (show_stack+0x20/0x24) [ 284.315540] r7:ffffffff r6:00000000 r5:68000013 r4:c18ecf1c [ 284.315549] [] (show_stack) from [] (dump_stack+0xc4/0xf0) [ 284.315558] [] (dump_stack) from [] (__warn+0xfc/0x158) [ 284.315564] r9:00000000 r8:00000009 r7:000003d5 r6:00000009 r5:c08cc7dc r4:c0fd09b8 [ 284.315572] [] (__warn) from [] (warn_slowpath_fmt+0x74/0xe4) [ 284.315577] r7:c08cc7dc r6:000003d5 r5:c0fd09b8 r4:00000000 [ 284.315584] [] (warn_slowpath_fmt) from [] (vc4_plane_mode_set+0x1374/0x13c4) [ 284.315589] r8:00000000 r7:00000000 r6:00001000 r5:c404c600 r4:c2e34600 [ 284.315596] [] (vc4_plane_mode_set) from [] (vc4_plane_atomic_check+0x40/0x1c0) [ 284.315601] r10:00000001 r9:c2e34600 r8:c0e67068 r7:c0fc44e0 r6:c2ce3640 r5:c3d636c0 [ 284.315605] r4:c2e34600 [ 284.315614] [] (vc4_plane_atomic_check) from [] (drm_atomic_helper_check_planes+0xec/0x1ec) [ 284.315620] r9:c2e34600 r8:c0e67068 r7:c0fc44e0 r6:c2ce3640 r5:c3d636c0 r4:00000006 [ 284.315627] [] (drm_atomic_helper_check_planes) from [] (drm_atomic_helper_check+0x54/0x9c) [ 284.315633] r9:c2e35400 r8:00000006 r7:00000000 r6:c2ba7800 r5:c3d636c0 r4:00000000 [ 284.315641] [] (drm_atomic_helper_check) from [] (vc4_atomic_check+0x25c/0x454) [ 284.315645] r7:00000000 r6:c2ba7800 r5:00000001 r4:c3d636c0 [ 284.315652] [] (vc4_atomic_check) from [] (drm_atomic_check_only+0x5cc/0x7e0) [ 284.315658] r10:c404c6c8 r9:ffffffff r8:c472c480 r7:00000003 r6:c3d636c0 r5:00000000 [ 284.315662] r4:0000003c r3:c08b7a4c [ 284.315670] [] (drm_atomic_check_only) from [] (drm_mode_atomic_ioctl+0x758/0xa7c) [ 284.315675] r10:c3d46000 r9:c3d636c0 r8:c2ce8a70 r7:027e3a54 r6:00000043 r5:c1fbb800 [ 284.315679] r4:0281a858 [ 284.315688] [] (drm_mode_atomic_ioctl) from [] (drm_ioctl_kernel+0xc4/0x108) [ 284.315693] r10:c03864bc r9:c1fbb800 r8:c3d47e64 r7:c089b308 r6:00000002 r5:c2ba7800 [ 284.315697] r4:00000000 [ 284.315705] [] (drm_ioctl_kernel) from [] (drm_ioctl+0x1e8/0x3a0) [ 284.315711] r9:c1fbb800 r8:000000bc r7:c3d47e64 r6:00000038 r5:c0e59570 r4:00000038 [ 284.315719] [] (drm_ioctl) from [] (sys_ioctl+0x35c/0x914) [ 284.315724] r10:c2d08200 r9:00000000 r8:c36fa300 r7:befdd870 r6:c03864bc r5:c36fa301 [ 284.315728] r4:c03864bc [ 284.315735] [] (sys_ioctl) from [] (ret_fast_syscall+0x0/0x28) [ 284.315739] Exception stack(0xc3d47fa8 to 0xc3d47ff0) [ 284.315745] 7fa0: 027eb750 befdd870 00000000 c03864bc befdd870 00000000 [ 284.315750] 7fc0: 027eb750 befdd870 c03864bc 00000036 027e3948 0281a640 0281a850 027e3a50 [ 284.315756] 7fe0: b4b64100 befdd844 b4b5ba2c b49c994c [ 284.315762] r10:00000036 r9:c3d46000 r8:c0200204 r7:00000036 r6:c03864bc r5:befdd870 [ 284.315765] r4:027eb750 Fixes: c54619b0bfb3 ("drm/vc4: Add support for the BCM2711 HVS5") Signed-off-by: Dom Cobley Signed-off-by: Maxime Ripard Reviewed-by: Dave Stevenson Tested-By: Lucas Nussbaum Tested-By: Ryutaroh Matsumoto Link: https://patchwork.freedesktop.org/patch/msgid/20210121105759.1262699-2-maxime@cerno.tech --- drivers/gpu/drm/vc4/vc4_plane.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 1a4369c1fb164..5612cab552270 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -917,9 +917,9 @@ static int vc4_plane_mode_set(struct drm_plane *plane, if (!vc4_state->is_unity) { vc4_dlist_write(vc4_state, VC4_SET_FIELD(vc4_state->crtc_w, - SCALER_POS1_SCL_WIDTH) | + SCALER5_POS1_SCL_WIDTH) | VC4_SET_FIELD(vc4_state->crtc_h, - SCALER_POS1_SCL_HEIGHT)); + SCALER5_POS1_SCL_HEIGHT)); } /* Position Word 2: Source Image Size */ -- GitLab From bd9038faa9d7f162b47e1577e35ec5eac39f9d90 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 21 Jan 2021 18:57:24 -0600 Subject: [PATCH 1380/2012] ASoC: SOF: Intel: soundwire: fix select/depend unmet dependencies The LKP bot reports the following issue: WARNING: unmet direct dependencies detected for SOUNDWIRE_INTEL Depends on [m]: SOUNDWIRE [=m] && ACPI [=y] && SND_SOC [=y] Selected by [y]: - SND_SOC_SOF_INTEL_SOUNDWIRE [=y] && SOUND [=y] && !UML && SND [=y] && SND_SOC [=y] && SND_SOC_SOF_TOPLEVEL [=y] && SND_SOC_SOF_INTEL_TOPLEVEL [=y] && SND_SOC_SOF_INTEL_PCI [=y] This comes from having tristates being configured independently, when in practice the CONFIG_SOUNDWIRE needs to be aligned with the SOF choices: when the SOF code is compiled as built-in, the CONFIG_SOUNDWIRE also needs to be 'y'. The easiest fix is to replace the 'depends' with a 'select' and have a single user selection to activate SoundWire on Intel platforms. This still allows regmap to be compiled independently as a module. This is just a temporary fix, the select/depend usage will be revisited and the SOF Kconfig re-organized, as suggested by Arnd Bergman. Reported-by: kernel test robot Fixes: a115ab9b8b93e ('ASoC: SOF: Intel: add build support for SoundWire') Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210122005725.94163-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index d306c370e5d16..4797a1cf8c805 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -355,7 +355,7 @@ config SND_SOC_SOF_HDA config SND_SOC_SOF_INTEL_SOUNDWIRE_LINK bool "SOF support for SoundWire" - depends on SOUNDWIRE && ACPI + depends on ACPI help This adds support for SoundWire with Sound Open Firmware for Intel(R) platforms. @@ -371,6 +371,7 @@ config SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE config SND_SOC_SOF_INTEL_SOUNDWIRE tristate + select SOUNDWIRE select SOUNDWIRE_INTEL help This option is not user-selectable but automagically handled by -- GitLab From 8a3fea95fab14dd19487d1e499eee3b3d1050d70 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 21 Jan 2021 18:57:25 -0600 Subject: [PATCH 1381/2012] ASoC: SOF: SND_INTEL_DSP_CONFIG dependency The sof-pci-dev driver fails to link when built into the kernel and CONFIG_SND_INTEL_DSP_CONFIG is set to =m: arm-linux-gnueabi-ld: sound/soc/sof/sof-pci-dev.o: in function `sof_pci_probe': sof-pci-dev.c:(.text+0x1c): undefined reference to `snd_intel_dsp_driver_probe' As a temporary fix, use IS_REACHABLE to prevent the problem from happening. A more complete solution is to move this code to Intel-specific parts, restructure the drivers and Kconfig as discussed with Arnd Bergmann and Takashi Iwai. Fixes: 82d9d54a6c0e ("ALSA: hda: add Intel DSP configuration / probe code") Reported-by: Arnd Bergmann Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210122005725.94163-3-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/sof-acpi-dev.c | 11 ++++++----- sound/soc/sof/sof-pci-dev.c | 10 ++++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/sound/soc/sof/sof-acpi-dev.c b/sound/soc/sof/sof-acpi-dev.c index 2a369c2c65514..cc2e257087e4c 100644 --- a/sound/soc/sof/sof-acpi-dev.c +++ b/sound/soc/sof/sof-acpi-dev.c @@ -131,12 +131,13 @@ static int sof_acpi_probe(struct platform_device *pdev) if (!id) return -ENODEV; - ret = snd_intel_acpi_dsp_driver_probe(dev, id->id); - if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) { - dev_dbg(dev, "SOF ACPI driver not selected, aborting probe\n"); - return -ENODEV; + if (IS_REACHABLE(CONFIG_SND_INTEL_DSP_CONFIG)) { + ret = snd_intel_acpi_dsp_driver_probe(dev, id->id); + if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) { + dev_dbg(dev, "SOF ACPI driver not selected, aborting probe\n"); + return -ENODEV; + } } - dev_dbg(dev, "ACPI DSP detected"); sof_pdata = devm_kzalloc(dev, sizeof(*sof_pdata), GFP_KERNEL); diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c index 63b989e3ec409..215711ac74509 100644 --- a/sound/soc/sof/sof-pci-dev.c +++ b/sound/soc/sof/sof-pci-dev.c @@ -344,10 +344,12 @@ static int sof_pci_probe(struct pci_dev *pci, const struct snd_sof_dsp_ops *ops; int ret; - ret = snd_intel_dsp_driver_probe(pci); - if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) { - dev_dbg(&pci->dev, "SOF PCI driver not selected, aborting probe\n"); - return -ENODEV; + if (IS_REACHABLE(CONFIG_SND_INTEL_DSP_CONFIG)) { + ret = snd_intel_dsp_driver_probe(pci); + if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) { + dev_dbg(&pci->dev, "SOF PCI driver not selected, aborting probe\n"); + return -ENODEV; + } } dev_dbg(&pci->dev, "PCI DSP detected"); -- GitLab From e953daeb68b1abd8a7d44902786349fdeef5c297 Mon Sep 17 00:00:00 2001 From: Eliot Blennerhassett Date: Fri, 22 Jan 2021 21:27:08 +1300 Subject: [PATCH 1382/2012] ASoC: ak4458: correct reset polarity Reset (aka power off) happens when the reset gpio is made active. Change function name to ak4458_reset to match devicetree property "reset-gpios" Signed-off-by: Eliot Blennerhassett Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/ce650f47-4ff6-e486-7846-cc3d033f3601@blennerhassett.gen.nz Signed-off-by: Mark Brown --- sound/soc/codecs/ak4458.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/sound/soc/codecs/ak4458.c b/sound/soc/codecs/ak4458.c index 1010c9ee2e836..472caad17012e 100644 --- a/sound/soc/codecs/ak4458.c +++ b/sound/soc/codecs/ak4458.c @@ -595,18 +595,10 @@ static struct snd_soc_dai_driver ak4497_dai = { .ops = &ak4458_dai_ops, }; -static void ak4458_power_off(struct ak4458_priv *ak4458) +static void ak4458_reset(struct ak4458_priv *ak4458, bool active) { if (ak4458->reset_gpiod) { - gpiod_set_value_cansleep(ak4458->reset_gpiod, 0); - usleep_range(1000, 2000); - } -} - -static void ak4458_power_on(struct ak4458_priv *ak4458) -{ - if (ak4458->reset_gpiod) { - gpiod_set_value_cansleep(ak4458->reset_gpiod, 1); + gpiod_set_value_cansleep(ak4458->reset_gpiod, active); usleep_range(1000, 2000); } } @@ -620,7 +612,7 @@ static int ak4458_init(struct snd_soc_component *component) if (ak4458->mute_gpiod) gpiod_set_value_cansleep(ak4458->mute_gpiod, 1); - ak4458_power_on(ak4458); + ak4458_reset(ak4458, false); ret = snd_soc_component_update_bits(component, AK4458_00_CONTROL1, 0x80, 0x80); /* ACKS bit = 1; 10000000 */ @@ -650,7 +642,7 @@ static void ak4458_remove(struct snd_soc_component *component) { struct ak4458_priv *ak4458 = snd_soc_component_get_drvdata(component); - ak4458_power_off(ak4458); + ak4458_reset(ak4458, true); } #ifdef CONFIG_PM @@ -660,7 +652,7 @@ static int __maybe_unused ak4458_runtime_suspend(struct device *dev) regcache_cache_only(ak4458->regmap, true); - ak4458_power_off(ak4458); + ak4458_reset(ak4458, true); if (ak4458->mute_gpiod) gpiod_set_value_cansleep(ak4458->mute_gpiod, 0); @@ -685,8 +677,8 @@ static int __maybe_unused ak4458_runtime_resume(struct device *dev) if (ak4458->mute_gpiod) gpiod_set_value_cansleep(ak4458->mute_gpiod, 1); - ak4458_power_off(ak4458); - ak4458_power_on(ak4458); + ak4458_reset(ak4458, true); + ak4458_reset(ak4458, false); regcache_cache_only(ak4458->regmap, false); regcache_mark_dirty(ak4458->regmap); -- GitLab From 339f6c73d5abe85550a0c962edc8a5df1f2b4273 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Mon, 25 Jan 2021 14:14:53 +0800 Subject: [PATCH 1383/2012] ASoC: mediatek: mt8192-mt6359: add format constraints for RT5682 MT8192 determines the I2S clock rates according to the sampling rates. There is only 1 set of I2S in between MT8192 and RT5682. If playing and capturing via RT5682 in different sampling rates, the I2S data will be corrupted. Adds format constraints to the corresponding DAI links to make sure the sampling rates are symmetric. Fixes: 18b13ff23fab ("ASoC: mediatek: mt8192: add machine driver with mt6359, rt1015 and rt5682") Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20210125061453.1056535-1-tzungbi@google.com Signed-off-by: Mark Brown --- .../mt8192/mt8192-mt6359-rt1015-rt5682.c | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c b/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c index 716fbb4126b5f..ae2c748eb19c4 100644 --- a/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c +++ b/sound/soc/mediatek/mt8192/mt8192-mt6359-rt1015-rt5682.c @@ -401,6 +401,53 @@ static const struct snd_soc_ops mt8192_mt6359_rt1015_rt5682_capture1_ops = { .startup = mt8192_mt6359_rt1015_rt5682_cap1_startup, }; +static int +mt8192_mt6359_rt5682_startup(struct snd_pcm_substream *substream) +{ + static const unsigned int channels[] = { + 1, 2 + }; + static const struct snd_pcm_hw_constraint_list constraints_channels = { + .count = ARRAY_SIZE(channels), + .list = channels, + .mask = 0, + }; + static const unsigned int rates[] = { + 48000 + }; + static const struct snd_pcm_hw_constraint_list constraints_rates = { + .count = ARRAY_SIZE(rates), + .list = rates, + .mask = 0, + }; + + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); + struct snd_pcm_runtime *runtime = substream->runtime; + int ret; + + ret = snd_pcm_hw_constraint_list(runtime, 0, + SNDRV_PCM_HW_PARAM_CHANNELS, + &constraints_channels); + if (ret < 0) { + dev_err(rtd->dev, "hw_constraint_list channels failed\n"); + return ret; + } + + ret = snd_pcm_hw_constraint_list(runtime, 0, + SNDRV_PCM_HW_PARAM_RATE, + &constraints_rates); + if (ret < 0) { + dev_err(rtd->dev, "hw_constraint_list rate failed\n"); + return ret; + } + + return 0; +} + +static const struct snd_soc_ops mt8192_mt6359_rt5682_ops = { + .startup = mt8192_mt6359_rt5682_startup, +}; + /* FE */ SND_SOC_DAILINK_DEFS(playback1, DAILINK_COMP_ARRAY(COMP_CPU("DL1")), @@ -648,6 +695,7 @@ static struct snd_soc_dai_link mt8192_mt6359_dai_links[] = { SND_SOC_DPCM_TRIGGER_PRE}, .dynamic = 1, .dpcm_playback = 1, + .ops = &mt8192_mt6359_rt5682_ops, SND_SOC_DAILINK_REG(playback3), }, { @@ -721,6 +769,7 @@ static struct snd_soc_dai_link mt8192_mt6359_dai_links[] = { SND_SOC_DPCM_TRIGGER_PRE}, .dynamic = 1, .dpcm_capture = 1, + .ops = &mt8192_mt6359_rt5682_ops, SND_SOC_DAILINK_REG(capture2), }, { -- GitLab From 70041000450d0a071bf9931d634c8e2820340236 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Mon, 25 Jan 2021 11:44:42 +0100 Subject: [PATCH 1384/2012] ASoC: qcom: lpass: Fix out-of-bounds DAI ID lookup The "dai_id" given into LPAIF_INTFDMA_REG(...) is already the real DAI ID, not an index into v->dai_driver. Looking it up again seems entirely redundant. For IPQ806x (and SC7180 since commit 09a4f6f5d21c ("ASoC: dt-bindings: lpass: Fix and common up lpass dai ids") this is now often an out-of-bounds read because the indexes in the "dai_driver" array no longer match the actual DAI ID. Cc: Srinivasa Rao Mandadapu Cc: Srinivas Kandagatla Fixes: 7cb37b7bd0d3 ("ASoC: qcom: Add support for lpass hdmi driver") Signed-off-by: Stephan Gerhold Reviewed-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210125104442.135899-1-stephan@gerhold.net Signed-off-by: Mark Brown --- sound/soc/qcom/lpass-lpaif-reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/qcom/lpass-lpaif-reg.h b/sound/soc/qcom/lpass-lpaif-reg.h index 405542832e994..baf72f124ea9b 100644 --- a/sound/soc/qcom/lpass-lpaif-reg.h +++ b/sound/soc/qcom/lpass-lpaif-reg.h @@ -133,7 +133,7 @@ #define LPAIF_WRDMAPERCNT_REG(v, chan) LPAIF_WRDMA_REG_ADDR(v, 0x14, (chan)) #define LPAIF_INTFDMA_REG(v, chan, reg, dai_id) \ - ((v->dai_driver[dai_id].id == LPASS_DP_RX) ? \ + ((dai_id == LPASS_DP_RX) ? \ LPAIF_HDMI_RDMA##reg##_REG(v, chan) : \ LPAIF_RDMA##reg##_REG(v, chan)) -- GitLab From 9ad9bc59dde106e56dd59ce2bec7c1b08e1f0eb4 Mon Sep 17 00:00:00 2001 From: Libin Yang Date: Mon, 25 Jan 2021 10:11:17 +0200 Subject: [PATCH 1385/2012] ASoC: Intel: sof_sdw: set proper flags for Dell TGL-H SKU 0A5E Add flag "SOF_RT711_JD_SRC_JD2", flag "SOF_RT715_DAI_ID_FIX" and "SOF_SDW_FOUR_SPK" to the Dell TGL-H based SKU "0A5E". Signed-off-by: Libin Yang Co-developed-by: Hui Wang Signed-off-by: Hui Wang Reviewed-by: Bard Liao Reviewed-by: Pierre-Louis Bossart Signed-off-by: Kai Vehmanen Link: https://lore.kernel.org/r/20210125081117.814488-1-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_sdw.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index ca968901ac96f..6d0d6ef711e0f 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -67,6 +67,16 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { .driver_data = (void *)(SOF_RT711_JD_SRC_JD2 | SOF_RT715_DAI_ID_FIX), }, + { + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0A5E") + }, + .driver_data = (void *)(SOF_RT711_JD_SRC_JD2 | + SOF_RT715_DAI_ID_FIX | + SOF_SDW_FOUR_SPK), + }, { .callback = sof_sdw_quirk_cb, .matches = { -- GitLab From 396cf2a46adddbf51373e16225c1d25254310046 Mon Sep 17 00:00:00 2001 From: Daniel Walker Date: Thu, 21 Jan 2021 15:12:36 -0800 Subject: [PATCH 1386/2012] spidev: Add cisco device compatible Add compatible string for Cisco device present on the Cisco Petra platform. Signed-off-by: Daniel Walker Cc: xe-linux-external@cisco.com Link: https://lore.kernel.org/r/20210121231237.30664-2-danielwa@cisco.com Signed-off-by: Mark Brown --- drivers/spi/spidev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 859910ec8d9f6..8cb4d923aeaab 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -682,6 +682,7 @@ static const struct of_device_id spidev_dt_ids[] = { { .compatible = "lwn,bk4" }, { .compatible = "dh,dhcom-board" }, { .compatible = "menlo,m53cpld" }, + { .compatible = "cisco,spi-petra" }, {}, }; MODULE_DEVICE_TABLE(of, spidev_dt_ids); -- GitLab From bf544e9aa570034e094a8a40d5f9e1e2c4916d18 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Fri, 15 Jan 2021 13:05:47 +0200 Subject: [PATCH 1387/2012] iwlwifi: mvm: skip power command when unbinding vif during CSA In the new CSA flow, we remain associated during CSA, but still do a unbind-bind to the vif. However, sending the power command right after when vif is unbound but still associated causes FW to assert (0x3400) since it cannot tell the LMAC id. Just skip this command, we will send it again in a bit, when assigning the new context. Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210115130252.64a2254ac5c3.Iaa3a9050bf3d7c9cd5beaf561e932e6defc12ec3@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index da32937ba9a78..43ff0407916af 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -4194,6 +4194,9 @@ static void __iwl_mvm_unassign_vif_chanctx(struct iwl_mvm *mvm, iwl_mvm_binding_remove_vif(mvm, vif); out: + if (fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_CHANNEL_SWITCH_CMD) && + switching_chanctx) + return; mvmvif->phy_ctxt = NULL; iwl_mvm_power_update_mac(mvm); } -- GitLab From 5c56d862c749669d45c256f581eac4244be00d4d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Jan 2021 13:05:48 +0200 Subject: [PATCH 1388/2012] iwlwifi: mvm: take mutex for calling iwl_mvm_get_sync_time() We need to take the mutex to call iwl_mvm_get_sync_time(), do it. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210115130252.4bb5ccf881a6.I62973cbb081e80aa5b0447a5c3b9c3251a65cf6b@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index 573e46956c14d..38d0bfb649ccc 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -459,7 +459,10 @@ static ssize_t iwl_dbgfs_os_device_timediff_read(struct file *file, const size_t bufsz = sizeof(buf); int pos = 0; + mutex_lock(&mvm->mutex); iwl_mvm_get_sync_time(mvm, &curr_gp2, &curr_os); + mutex_unlock(&mvm->mutex); + do_div(curr_os, NSEC_PER_USEC); diff = curr_os - curr_gp2; pos += scnprintf(buf + pos, bufsz - pos, "diff=%lld\n", diff); -- GitLab From 34b9434cd0d425330a0467e767f8d047ef62964d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Jan 2021 13:05:49 +0200 Subject: [PATCH 1389/2012] iwlwifi: pcie: avoid potential PNVM leaks If we erroneously try to set the PNVM data again after it has already been set, we could leak the old DMA memory. Avoid that and warn, we shouldn't be doing this. Signed-off-by: Johannes Berg Fixes: 6972592850c0 ("iwlwifi: read and parse PNVM file") Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210115130252.929c2d680429.I086b9490e6c005f3bcaa881b617e9f61908160f3@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c index 36bf414a388af..afe5852284356 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c @@ -294,6 +294,9 @@ int iwl_trans_pcie_ctx_info_gen3_set_pnvm(struct iwl_trans *trans, return ret; } + if (WARN_ON(prph_sc_ctrl->pnvm_cfg.pnvm_size)) + return -EBUSY; + prph_sc_ctrl->pnvm_cfg.pnvm_base_addr = cpu_to_le64(trans_pcie->pnvm_dram.physical); prph_sc_ctrl->pnvm_cfg.pnvm_size = -- GitLab From 1c58bed4b7f7551239b9005ad0a9a6566a3d9fbe Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Jan 2021 13:05:50 +0200 Subject: [PATCH 1390/2012] iwlwifi: pnvm: don't skip everything when not reloading Even if we don't reload the file from disk, we still need to trigger the PNVM load flow with the device; fix that. Signed-off-by: Johannes Berg Fixes: 6972592850c0 ("iwlwifi: read and parse PNVM file") Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210115130252.85ef56c4ef8c.I3b853ce041a0755d45e448035bef1837995d191b@changeid --- drivers/net/wireless/intel/iwlwifi/fw/pnvm.c | 50 ++++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c index 6d8f7bff12432..ebd1a09a2fb8a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c @@ -224,40 +224,40 @@ static int iwl_pnvm_parse(struct iwl_trans *trans, const u8 *data, int iwl_pnvm_load(struct iwl_trans *trans, struct iwl_notif_wait_data *notif_wait) { - const struct firmware *pnvm; struct iwl_notification_wait pnvm_wait; static const u16 ntf_cmds[] = { WIDE_ID(REGULATORY_AND_NVM_GROUP, PNVM_INIT_COMPLETE_NTFY) }; - char pnvm_name[64]; - int ret; /* if the SKU_ID is empty, there's nothing to do */ if (!trans->sku_id[0] && !trans->sku_id[1] && !trans->sku_id[2]) return 0; - /* if we already have it, nothing to do either */ - if (trans->pnvm_loaded) - return 0; + /* load from disk only if we haven't done it before */ + if (!trans->pnvm_loaded) { + const struct firmware *pnvm; + char pnvm_name[64]; + int ret; + + /* + * The prefix unfortunately includes a hyphen at the end, so + * don't add the dot here... + */ + snprintf(pnvm_name, sizeof(pnvm_name), "%spnvm", + trans->cfg->fw_name_pre); + + /* ...but replace the hyphen with the dot here. */ + if (strlen(trans->cfg->fw_name_pre) < sizeof(pnvm_name)) + pnvm_name[strlen(trans->cfg->fw_name_pre) - 1] = '.'; + + ret = firmware_request_nowarn(&pnvm, pnvm_name, trans->dev); + if (ret) { + IWL_DEBUG_FW(trans, "PNVM file %s not found %d\n", + pnvm_name, ret); + } else { + iwl_pnvm_parse(trans, pnvm->data, pnvm->size); - /* - * The prefix unfortunately includes a hyphen at the end, so - * don't add the dot here... - */ - snprintf(pnvm_name, sizeof(pnvm_name), "%spnvm", - trans->cfg->fw_name_pre); - - /* ...but replace the hyphen with the dot here. */ - if (strlen(trans->cfg->fw_name_pre) < sizeof(pnvm_name)) - pnvm_name[strlen(trans->cfg->fw_name_pre) - 1] = '.'; - - ret = firmware_request_nowarn(&pnvm, pnvm_name, trans->dev); - if (ret) { - IWL_DEBUG_FW(trans, "PNVM file %s not found %d\n", - pnvm_name, ret); - } else { - iwl_pnvm_parse(trans, pnvm->data, pnvm->size); - - release_firmware(pnvm); + release_firmware(pnvm); + } } iwl_init_notification_wait(notif_wait, &pnvm_wait, -- GitLab From 82a08d0cd7b503be426fb856a0fb73c9c976aae1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Jan 2021 13:05:51 +0200 Subject: [PATCH 1391/2012] iwlwifi: pnvm: don't try to load after failures If loading the PNVM file failed on the first try during the interface up, the file is unlikely to show up later, and we already don't try to reload it if it changes, so just don't try loading it again and again. This also fixes some issues where we may try to load it at resume time, which may not be possible yet. Signed-off-by: Johannes Berg Fixes: 6972592850c0 ("iwlwifi: read and parse PNVM file") Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210115130252.5ac6828a0bbe.I7d308358b21d3c0c84b1086999dbc7267f86e219@changeid --- drivers/net/wireless/intel/iwlwifi/fw/pnvm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c index ebd1a09a2fb8a..895a907acdf0f 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c @@ -232,7 +232,7 @@ int iwl_pnvm_load(struct iwl_trans *trans, if (!trans->sku_id[0] && !trans->sku_id[1] && !trans->sku_id[2]) return 0; - /* load from disk only if we haven't done it before */ + /* load from disk only if we haven't done it (or tried) before */ if (!trans->pnvm_loaded) { const struct firmware *pnvm; char pnvm_name[64]; @@ -253,6 +253,12 @@ int iwl_pnvm_load(struct iwl_trans *trans, if (ret) { IWL_DEBUG_FW(trans, "PNVM file %s not found %d\n", pnvm_name, ret); + /* + * Pretend we've loaded it - at least we've tried and + * couldn't load it at all, so there's no point in + * trying again over and over. + */ + trans->pnvm_loaded = true; } else { iwl_pnvm_parse(trans, pnvm->data, pnvm->size); -- GitLab From a800f95858d02a9174c48b4286c0799d3905890f Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 15 Jan 2021 13:05:52 +0200 Subject: [PATCH 1392/2012] iwlwifi: fix the NMI flow for old devices I noticed that the flow that triggers an NMI on the firmware for old devices (tested on 7265) doesn't work. Apparently, the firmware / device is still in low power when we write the register that triggers the NMI. We call the "grab_nic_access" function to make sure the device is awake but that wasn't enough. I played with this and noticed that if we wait 1 ms after the device reports it is awake before we write to the NMI register, the device always sees our write and the firmware gets properly asserted. Triggering an NMI to the firmware can be done with the debugfs hook: echo 1 > /sys/kernel/debug/iwlwifi/0000\:00\:03.0/iwlmvm/fw_nmi What happened before is that the firmware would just stall without running its NMI routine. Because of that the driver wouldn't get the "firmware crashed" interrupt. After a while the driver would notice that the firmware is not responding to some command and it would read the error data from the firmware, but this data is populated in the NMI service routine in the firmware which was not called. So in the logs it looked like: iwlwifi 0000:00:03.0: Error sending REPLY_ERROR: time out after 2000ms. iwlwifi 0000:00:03.0: Current CMD queue read_ptr 33 write_ptr 34 iwlwifi 0000:00:03.0: Loaded firmware version: 29.09bd31e1.0 7265D-29.ucode iwlwifi 0000:00:03.0: 0x00000000 | ADVANCED_SYSASSERT iwlwifi 0000:00:03.0: 0x00000000 | trm_hw_status0 iwlwifi 0000:00:03.0: 0x00000000 | trm_hw_status1 iwlwifi 0000:00:03.0: 0x00000000 | branchlink2 iwlwifi 0000:00:03.0: 0x00000000 | interruptlink1 iwlwifi 0000:00:03.0: 0x00000000 | interruptlink2 iwlwifi 0000:00:03.0: 0x00000000 | data1 iwlwifi 0000:00:03.0: 0x00000000 | data2 iwlwifi 0000:00:03.0: 0x00000000 | data3 iwlwifi 0000:00:03.0: 0x00000000 | beacon time iwlwifi 0000:00:03.0: 0x00000000 | tsf low ... With this fix, immediately after we trigger the NMI to the firmware, we get the expected: iwlwifi 0000:00:03.0: Microcode SW error detected. Restarting 0x2000000. iwlwifi 0000:00:03.0: Start IWL Error Log Dump: iwlwifi 0000:00:03.0: Status: 0x00000040, count: 6 iwlwifi 0000:00:03.0: Loaded firmware version: 29.09bd31e1.0 7265D-29.ucode iwlwifi 0000:00:03.0: 0x00000084 | NMI_INTERRUPT_UNKNOWN iwlwifi 0000:00:03.0: 0x000002F1 | trm_hw_status0 iwlwifi 0000:00:03.0: 0x00000000 | trm_hw_status1 iwlwifi 0000:00:03.0: 0x00043D6C | branchlink2 iwlwifi 0000:00:03.0: 0x0004AFD6 | interruptlink1 iwlwifi 0000:00:03.0: 0x000008C4 | interruptlink2 iwlwifi 0000:00:03.0: 0x00000000 | data1 iwlwifi 0000:00:03.0: 0x00000080 | data2 iwlwifi 0000:00:03.0: 0x07030000 | data3 iwlwifi 0000:00:03.0: 0x003FD4C3 | beacon time iwlwifi 0000:00:03.0: 0x00C22AC3 | tsf low iwlwifi 0000:00:03.0: 0x00000000 | tsf hi iwlwifi 0000:00:03.0: 0x00000000 | time gp1 iwlwifi 0000:00:03.0: 0x00C22AC3 | time gp2 iwlwifi 0000:00:03.0: 0x00000001 | uCode revision type iwlwifi 0000:00:03.0: 0x0000001D | uCode version major Notice the first line: "Microcode SW error detected:" which is printed in the driver's ISR, which means that the driver actually got an interrupt from the firmware saying that it crashed. And then we have the properly populated error data. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210115130252.70e67cc75d88.I6615cad4361862e7f3c9f2d3cafb6a8c61e16781@changeid --- drivers/net/wireless/intel/iwlwifi/iwl-io.c | 9 +++++---- drivers/net/wireless/intel/iwlwifi/iwl-io.h | 10 ++++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.c b/drivers/net/wireless/intel/iwlwifi/iwl-io.c index 2ac20d0a30eb6..2b7ef1583e7fd 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-io.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.c @@ -150,16 +150,17 @@ u32 iwl_read_prph(struct iwl_trans *trans, u32 ofs) } IWL_EXPORT_SYMBOL(iwl_read_prph); -void iwl_write_prph(struct iwl_trans *trans, u32 ofs, u32 val) +void iwl_write_prph_delay(struct iwl_trans *trans, u32 ofs, u32 val, u32 delay_ms) { unsigned long flags; if (iwl_trans_grab_nic_access(trans, &flags)) { + mdelay(delay_ms); iwl_write_prph_no_grab(trans, ofs, val); iwl_trans_release_nic_access(trans, &flags); } } -IWL_EXPORT_SYMBOL(iwl_write_prph); +IWL_EXPORT_SYMBOL(iwl_write_prph_delay); int iwl_poll_prph_bit(struct iwl_trans *trans, u32 addr, u32 bits, u32 mask, int timeout) @@ -219,8 +220,8 @@ IWL_EXPORT_SYMBOL(iwl_clear_bits_prph); void iwl_force_nmi(struct iwl_trans *trans) { if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_9000) - iwl_write_prph(trans, DEVICE_SET_NMI_REG, - DEVICE_SET_NMI_VAL_DRV); + iwl_write_prph_delay(trans, DEVICE_SET_NMI_REG, + DEVICE_SET_NMI_VAL_DRV, 1); else if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210) iwl_write_umac_prph(trans, UREG_NIC_SET_NMI_DRIVER, UREG_NIC_SET_NMI_DRIVER_NMI_FROM_DRIVER); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.h b/drivers/net/wireless/intel/iwlwifi/iwl-io.h index 39bceee4e9e76..3c21c0e081f8e 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-io.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2018-2019 Intel Corporation + * Copyright (C) 2018-2020 Intel Corporation */ #ifndef __iwl_io_h__ #define __iwl_io_h__ @@ -37,7 +37,13 @@ u32 iwl_read_prph_no_grab(struct iwl_trans *trans, u32 ofs); u32 iwl_read_prph(struct iwl_trans *trans, u32 ofs); void iwl_write_prph_no_grab(struct iwl_trans *trans, u32 ofs, u32 val); void iwl_write_prph64_no_grab(struct iwl_trans *trans, u64 ofs, u64 val); -void iwl_write_prph(struct iwl_trans *trans, u32 ofs, u32 val); +void iwl_write_prph_delay(struct iwl_trans *trans, u32 ofs, + u32 val, u32 delay_ms); +static inline void iwl_write_prph(struct iwl_trans *trans, u32 ofs, u32 val) +{ + iwl_write_prph_delay(trans, ofs, val, 0); +} + int iwl_poll_prph_bit(struct iwl_trans *trans, u32 addr, u32 bits, u32 mask, int timeout); void iwl_set_bits_prph(struct iwl_trans *trans, u32 ofs, u32 mask); -- GitLab From 0f8d5656b3fd100c132b02aa72038f032fc6e30e Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 15 Jan 2021 13:05:53 +0200 Subject: [PATCH 1393/2012] iwlwifi: queue: don't crash if txq->entries is NULL The code was really awkward, we would first dereference txq->entries when calling iwl_txq_genX_tfd_unmap and then we would check that txq->entries is non-NULL. Fix that by exiting if txq->entries is NULL. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210115130252.173359fc236d.I75c7c2397d20df8d7fbc24cb16a5232d5c551889@changeid --- drivers/net/wireless/intel/iwlwifi/queue/tx.c | 49 +++++++++---------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/queue/tx.c b/drivers/net/wireless/intel/iwlwifi/queue/tx.c index 27eea909e32da..62c0c4cbe481a 100644 --- a/drivers/net/wireless/intel/iwlwifi/queue/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/queue/tx.c @@ -142,26 +142,25 @@ void iwl_txq_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) * idx is bounded by n_window */ int idx = iwl_txq_get_cmd_index(txq, txq->read_ptr); + struct sk_buff *skb; lockdep_assert_held(&txq->lock); + if (!txq->entries) + return; + iwl_txq_gen2_tfd_unmap(trans, &txq->entries[idx].meta, iwl_txq_get_tfd(trans, txq, idx)); - /* free SKB */ - if (txq->entries) { - struct sk_buff *skb; - - skb = txq->entries[idx].skb; + skb = txq->entries[idx].skb; - /* Can be called from irqs-disabled context - * If skb is not NULL, it means that the whole queue is being - * freed and that the queue is not empty - free the skb - */ - if (skb) { - iwl_op_mode_free_skb(trans->op_mode, skb); - txq->entries[idx].skb = NULL; - } + /* Can be called from irqs-disabled context + * If skb is not NULL, it means that the whole queue is being + * freed and that the queue is not empty - free the skb + */ + if (skb) { + iwl_op_mode_free_skb(trans->op_mode, skb); + txq->entries[idx].skb = NULL; } } @@ -1494,28 +1493,28 @@ void iwl_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) */ int rd_ptr = txq->read_ptr; int idx = iwl_txq_get_cmd_index(txq, rd_ptr); + struct sk_buff *skb; lockdep_assert_held(&txq->lock); + if (!txq->entries) + return; + /* We have only q->n_window txq->entries, but we use * TFD_QUEUE_SIZE_MAX tfds */ iwl_txq_gen1_tfd_unmap(trans, &txq->entries[idx].meta, txq, rd_ptr); /* free SKB */ - if (txq->entries) { - struct sk_buff *skb; - - skb = txq->entries[idx].skb; + skb = txq->entries[idx].skb; - /* Can be called from irqs-disabled context - * If skb is not NULL, it means that the whole queue is being - * freed and that the queue is not empty - free the skb - */ - if (skb) { - iwl_op_mode_free_skb(trans->op_mode, skb); - txq->entries[idx].skb = NULL; - } + /* Can be called from irqs-disabled context + * If skb is not NULL, it means that the whole queue is being + * freed and that the queue is not empty - free the skb + */ + if (skb) { + iwl_op_mode_free_skb(trans->op_mode, skb); + txq->entries[idx].skb = NULL; } } -- GitLab From ed0022da8bd9a3ba1c0e1497457be28d52afa7e1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Jan 2021 13:05:54 +0200 Subject: [PATCH 1394/2012] iwlwifi: pcie: set LTR on more devices To avoid completion timeouts during device boot, set up the LTR timeouts on more devices - similar to what we had before for AX210. This also corrects the AX210 workaround to be done only on discrete (non-integrated) devices, otherwise the registers have no effect. Signed-off-by: Johannes Berg Fixes: edb625208d84 ("iwlwifi: pcie: set LTR to avoid completion timeout") Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210115130252.fb819e19530b.I0396f82922db66426f52fbb70d32a29c8fd66951@changeid --- drivers/net/wireless/intel/iwlwifi/iwl-prph.h | 6 +++ .../intel/iwlwifi/pcie/ctxt-info-gen3.c | 39 +++++++++++-------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h index 0b03fdedc1f70..1158e256f6012 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h @@ -301,6 +301,12 @@ #define RADIO_RSP_ADDR_POS (6) #define RADIO_RSP_RD_CMD (3) +/* LTR control (Qu only) */ +#define HPM_MAC_LTR_CSR 0xa0348c +#define HPM_MAC_LRT_ENABLE_ALL 0xf +/* also uses CSR_LTR_* for values */ +#define HPM_UMAC_LTR 0xa03480 + /* FW monitor */ #define MON_BUFF_SAMPLE_CTL (0xa03c00) #define MON_BUFF_BASE_ADDR (0xa03c1c) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c index afe5852284356..342a53e4488c7 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c @@ -75,6 +75,15 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, const struct fw_img *fw) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + u32 ltr_val = CSR_LTR_LONG_VAL_AD_NO_SNOOP_REQ | + u32_encode_bits(CSR_LTR_LONG_VAL_AD_SCALE_USEC, + CSR_LTR_LONG_VAL_AD_NO_SNOOP_SCALE) | + u32_encode_bits(250, + CSR_LTR_LONG_VAL_AD_NO_SNOOP_VAL) | + CSR_LTR_LONG_VAL_AD_SNOOP_REQ | + u32_encode_bits(CSR_LTR_LONG_VAL_AD_SCALE_USEC, + CSR_LTR_LONG_VAL_AD_SNOOP_SCALE) | + u32_encode_bits(250, CSR_LTR_LONG_VAL_AD_SNOOP_VAL); struct iwl_context_info_gen3 *ctxt_info_gen3; struct iwl_prph_scratch *prph_scratch; struct iwl_prph_scratch_ctrl_cfg *prph_sc_ctrl; @@ -206,23 +215,19 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, iwl_set_bit(trans, CSR_CTXT_INFO_BOOT_CTRL, CSR_AUTO_FUNC_BOOT_ENA); - if (trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_AX210) { - /* - * The firmware initializes this again later (to a smaller - * value), but for the boot process initialize the LTR to - * ~250 usec. - */ - u32 val = CSR_LTR_LONG_VAL_AD_NO_SNOOP_REQ | - u32_encode_bits(CSR_LTR_LONG_VAL_AD_SCALE_USEC, - CSR_LTR_LONG_VAL_AD_NO_SNOOP_SCALE) | - u32_encode_bits(250, - CSR_LTR_LONG_VAL_AD_NO_SNOOP_VAL) | - CSR_LTR_LONG_VAL_AD_SNOOP_REQ | - u32_encode_bits(CSR_LTR_LONG_VAL_AD_SCALE_USEC, - CSR_LTR_LONG_VAL_AD_SNOOP_SCALE) | - u32_encode_bits(250, CSR_LTR_LONG_VAL_AD_SNOOP_VAL); - - iwl_write32(trans, CSR_LTR_LONG_VAL_AD, val); + /* + * To workaround hardware latency issues during the boot process, + * initialize the LTR to ~250 usec (see ltr_val above). + * The firmware initializes this again later (to a smaller value). + */ + if ((trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_AX210 || + trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_22000) && + !trans->trans_cfg->integrated) { + iwl_write32(trans, CSR_LTR_LONG_VAL_AD, ltr_val); + } else if (trans->trans_cfg->integrated && + trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_22000) { + iwl_write_prph(trans, HPM_MAC_LTR_CSR, HPM_MAC_LRT_ENABLE_ALL); + iwl_write_prph(trans, HPM_UMAC_LTR, ltr_val); } if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) -- GitLab From 98c7d21f957b10d9c07a3a60a3a5a8f326a197e5 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 15 Jan 2021 13:05:55 +0200 Subject: [PATCH 1395/2012] iwlwifi: pcie: add a NULL check in iwl_pcie_txq_unmap I hit a NULL pointer exception in this function when the init flow went really bad. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210115130252.2e8da9f2c132.I0234d4b8ddaf70aaa5028a20c863255e05bc1f84@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 5dda0015522dd..83f4964f3cb29 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -201,6 +201,11 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = trans->txqs.txq[txq_id]; + if (!txq) { + IWL_ERR(trans, "Trying to free a queue that wasn't allocated?\n"); + return; + } + spin_lock_bh(&txq->lock); while (txq->write_ptr != txq->read_ptr) { IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n", -- GitLab From 2d6bc752cc2806366d9a4fd577b3f6c1f7a7e04e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Jan 2021 13:05:56 +0200 Subject: [PATCH 1396/2012] iwlwifi: pcie: fix context info memory leak If the image loader allocation fails, we leak all the previously allocated memory. Fix this. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210115130252.97172cbaa67c.I3473233d0ad01a71aa9400832fb2b9f494d88a11@changeid --- .../net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c index 342a53e4488c7..5b5134dd49af8 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c @@ -198,8 +198,10 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, /* Allocate IML */ iml_img = dma_alloc_coherent(trans->dev, trans->iml_len, &trans_pcie->iml_dma_addr, GFP_KERNEL); - if (!iml_img) - return -ENOMEM; + if (!iml_img) { + ret = -ENOMEM; + goto err_free_ctxt_info; + } memcpy(iml_img, trans->iml, trans->iml_len); @@ -237,6 +239,11 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans, return 0; +err_free_ctxt_info: + dma_free_coherent(trans->dev, sizeof(*trans_pcie->ctxt_info_gen3), + trans_pcie->ctxt_info_gen3, + trans_pcie->ctxt_info_dma_addr); + trans_pcie->ctxt_info_gen3 = NULL; err_free_prph_info: dma_free_coherent(trans->dev, sizeof(*prph_info), -- GitLab From 6701317476bbfb1f341aa935ddf75eb73af784f9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Jan 2021 13:05:57 +0200 Subject: [PATCH 1397/2012] iwlwifi: pcie: use jiffies for memory read spin time limit There's no reason to use ktime_get() since we don't need any better precision than jiffies, and since we no longer disable interrupts around this code (when grabbing NIC access), jiffies will work fine. Use jiffies instead of ktime_get(). This cleanup is preparation for the following patch "iwlwifi: pcie: reschedule in long-running memory reads". The code gets simpler with the weird clock use etc. removed before we add cond_resched(). Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210115130253.621c948b1fad.I3ee9f4bc4e74a0c9125d42fb7c35cd80df4698a1@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 285e0d5860210..e3760c41b31e5 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2107,7 +2107,7 @@ static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr, while (offs < dwords) { /* limit the time we spin here under lock to 1/2s */ - ktime_t timeout = ktime_add_us(ktime_get(), 500 * USEC_PER_MSEC); + unsigned long end = jiffies + HZ / 2; if (iwl_trans_grab_nic_access(trans, &flags)) { iwl_write32(trans, HBUS_TARG_MEM_RADDR, @@ -2118,11 +2118,7 @@ static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr, HBUS_TARG_MEM_RDAT); offs++; - /* calling ktime_get is expensive so - * do it once in 128 reads - */ - if (offs % 128 == 0 && ktime_after(ktime_get(), - timeout)) + if (time_after(jiffies, end)) break; } iwl_trans_release_nic_access(trans, &flags); -- GitLab From 3d372c4edfd4dffb7dea71c6b096fb414782b776 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 15 Jan 2021 13:05:58 +0200 Subject: [PATCH 1398/2012] iwlwifi: pcie: reschedule in long-running memory reads If we spin for a long time in memory reads that (for some reason in hardware) take a long time, then we'll eventually get messages such as watchdog: BUG: soft lockup - CPU#2 stuck for 24s! [kworker/2:2:272] This is because the reading really does take a very long time, and we don't schedule, so we're hogging the CPU with this task, at least if CONFIG_PREEMPT is not set, e.g. with CONFIG_PREEMPT_VOLUNTARY=y. Previously I misinterpreted the situation and thought that this was only going to happen if we had interrupts disabled, and then fixed this (which is good anyway, however), but that didn't always help; looking at it again now I realized that the spin unlock will only reschedule if CONFIG_PREEMPT is used. In order to avoid this issue, change the code to cond_resched() if we've been spinning for too long here. Signed-off-by: Johannes Berg Fixes: 04516706bb99 ("iwlwifi: pcie: limit memory read spin time") Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210115130253.217a9d6a6a12.If964cb582ab0aaa94e81c4ff3b279eaafda0fd3f@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index e3760c41b31e5..ab93a848a4667 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -2108,6 +2108,7 @@ static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr, while (offs < dwords) { /* limit the time we spin here under lock to 1/2s */ unsigned long end = jiffies + HZ / 2; + bool resched = false; if (iwl_trans_grab_nic_access(trans, &flags)) { iwl_write32(trans, HBUS_TARG_MEM_RADDR, @@ -2118,10 +2119,15 @@ static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr, HBUS_TARG_MEM_RDAT); offs++; - if (time_after(jiffies, end)) + if (time_after(jiffies, end)) { + resched = true; break; + } } iwl_trans_release_nic_access(trans, &flags); + + if (resched) + cond_resched(); } else { return -EBUSY; } -- GitLab From aefbe5c445c7e2f0e082b086ba1e45502dac4b0e Mon Sep 17 00:00:00 2001 From: Matt Chen Date: Fri, 22 Jan 2021 14:52:36 +0200 Subject: [PATCH 1399/2012] iwlwifi: mvm: fix the return type for DSM functions 1 and 2 The return type value of functions 1 and 2 were considered to be an integer inside a buffer, but they can also be only an integer, without the buffer. Fix the code in iwl_acpi_get_dsm_u8() to handle it as a single integer value, as well as packed inside a buffer. Signed-off-by: Matt Chen Fixes: 9db93491f29e ("iwlwifi: acpi: support device specific method (DSM)") Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210122144849.5757092adcd6.Ic24524627b899c9a01af38107a62a626bdf5ae3a@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 65 +++++++++++++++----- drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 7 ++- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 25 ++++---- 3 files changed, 68 insertions(+), 29 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 15248b0643803..d8b7776a8dde1 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -80,19 +80,45 @@ static void *iwl_acpi_get_dsm_object(struct device *dev, int rev, int func, } /* - * Evaluate a DSM with no arguments and a single u8 return value (inside a - * buffer object), verify and return that value. + * Generic function to evaluate a DSM with no arguments + * and an integer return value, + * (as an integer object or inside a buffer object), + * verify and assign the value in the "value" parameter. + * return 0 in success and the appropriate errno otherwise. */ -int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func) +static int iwl_acpi_get_dsm_integer(struct device *dev, int rev, int func, + u64 *value, size_t expected_size) { union acpi_object *obj; - int ret; + int ret = 0; obj = iwl_acpi_get_dsm_object(dev, rev, func, NULL); - if (IS_ERR(obj)) + if (IS_ERR(obj)) { + IWL_DEBUG_DEV_RADIO(dev, + "Failed to get DSM object. func= %d\n", + func); return -ENOENT; + } + + if (obj->type == ACPI_TYPE_INTEGER) { + *value = obj->integer.value; + } else if (obj->type == ACPI_TYPE_BUFFER) { + __le64 le_value = 0; - if (obj->type != ACPI_TYPE_BUFFER) { + if (WARN_ON_ONCE(expected_size > sizeof(le_value))) + return -EINVAL; + + /* if the buffer size doesn't match the expected size */ + if (obj->buffer.length != expected_size) + IWL_DEBUG_DEV_RADIO(dev, + "ACPI: DSM invalid buffer size, padding or truncating (%d)\n", + obj->buffer.length); + + /* assuming LE from Intel BIOS spec */ + memcpy(&le_value, obj->buffer.pointer, + min_t(size_t, expected_size, (size_t)obj->buffer.length)); + *value = le64_to_cpu(le_value); + } else { IWL_DEBUG_DEV_RADIO(dev, "ACPI: DSM method did not return a valid object, type=%d\n", obj->type); @@ -100,15 +126,6 @@ int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func) goto out; } - if (obj->buffer.length != sizeof(u8)) { - IWL_DEBUG_DEV_RADIO(dev, - "ACPI: DSM method returned invalid buffer, length=%d\n", - obj->buffer.length); - ret = -EINVAL; - goto out; - } - - ret = obj->buffer.pointer[0]; IWL_DEBUG_DEV_RADIO(dev, "ACPI: DSM method evaluated: func=%d, ret=%d\n", func, ret); @@ -116,6 +133,24 @@ out: ACPI_FREE(obj); return ret; } + +/* + * Evaluate a DSM with no arguments and a u8 return value, + */ +int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func, u8 *value) +{ + int ret; + u64 val; + + ret = iwl_acpi_get_dsm_integer(dev, rev, func, &val, sizeof(u8)); + + if (ret < 0) + return ret; + + /* cast val (u64) to be u8 */ + *value = (u8)val; + return 0; +} IWL_EXPORT_SYMBOL(iwl_acpi_get_dsm_u8); union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev, diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index 042dd247d387d..1cce30d1ef559 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright (C) 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #ifndef __iwl_fw_acpi__ #define __iwl_fw_acpi__ @@ -99,7 +99,7 @@ struct iwl_fw_runtime; void *iwl_acpi_get_object(struct device *dev, acpi_string method); -int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func); +int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func, u8 *value); union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev, union acpi_object *data, @@ -159,7 +159,8 @@ static inline void *iwl_acpi_get_dsm_object(struct device *dev, int rev, return ERR_PTR(-ENOENT); } -static inline int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func) +static inline +int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func, u8 *value) { return -ENOENT; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 0637eb1cff4e5..313e9f106f465 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1090,20 +1090,22 @@ static void iwl_mvm_tas_init(struct iwl_mvm *mvm) static u8 iwl_mvm_eval_dsm_indonesia_5g2(struct iwl_mvm *mvm) { + u8 value; + int ret = iwl_acpi_get_dsm_u8((&mvm->fwrt)->dev, 0, - DSM_FUNC_ENABLE_INDONESIA_5G2); + DSM_FUNC_ENABLE_INDONESIA_5G2, &value); if (ret < 0) IWL_DEBUG_RADIO(mvm, "Failed to evaluate DSM function ENABLE_INDONESIA_5G2, ret=%d\n", ret); - else if (ret >= DSM_VALUE_INDONESIA_MAX) + else if (value >= DSM_VALUE_INDONESIA_MAX) IWL_DEBUG_RADIO(mvm, - "DSM function ENABLE_INDONESIA_5G2 return invalid value, ret=%d\n", - ret); + "DSM function ENABLE_INDONESIA_5G2 return invalid value, value=%d\n", + value); - else if (ret == DSM_VALUE_INDONESIA_ENABLE) { + else if (value == DSM_VALUE_INDONESIA_ENABLE) { IWL_DEBUG_RADIO(mvm, "Evaluated DSM function ENABLE_INDONESIA_5G2: Enabling 5g2\n"); return DSM_VALUE_INDONESIA_ENABLE; @@ -1114,25 +1116,26 @@ static u8 iwl_mvm_eval_dsm_indonesia_5g2(struct iwl_mvm *mvm) static u8 iwl_mvm_eval_dsm_disable_srd(struct iwl_mvm *mvm) { + u8 value; int ret = iwl_acpi_get_dsm_u8((&mvm->fwrt)->dev, 0, - DSM_FUNC_DISABLE_SRD); + DSM_FUNC_DISABLE_SRD, &value); if (ret < 0) IWL_DEBUG_RADIO(mvm, "Failed to evaluate DSM function DISABLE_SRD, ret=%d\n", ret); - else if (ret >= DSM_VALUE_SRD_MAX) + else if (value >= DSM_VALUE_SRD_MAX) IWL_DEBUG_RADIO(mvm, - "DSM function DISABLE_SRD return invalid value, ret=%d\n", - ret); + "DSM function DISABLE_SRD return invalid value, value=%d\n", + value); - else if (ret == DSM_VALUE_SRD_PASSIVE) { + else if (value == DSM_VALUE_SRD_PASSIVE) { IWL_DEBUG_RADIO(mvm, "Evaluated DSM function DISABLE_SRD: setting SRD to passive\n"); return DSM_VALUE_SRD_PASSIVE; - } else if (ret == DSM_VALUE_SRD_DISABLE) { + } else if (value == DSM_VALUE_SRD_DISABLE) { IWL_DEBUG_RADIO(mvm, "Evaluated DSM function DISABLE_SRD: disabling SRD\n"); return DSM_VALUE_SRD_DISABLE; -- GitLab From e223e42aac30bf81f9302c676cdf58cf2bf36950 Mon Sep 17 00:00:00 2001 From: Gregory Greenman Date: Fri, 22 Jan 2021 14:52:37 +0200 Subject: [PATCH 1400/2012] iwlwifi: mvm: invalidate IDs of internal stations at mvm start Having sta_id not set for aux_sta and snif_sta can potentially lead to a hard to debug issue in case remove station is called without an add. In this case sta_id 0, an unrelated regular station, will be removed. In fact, we do have a FW assert that occures rarely and from the debug data analysis it looks like sta_id 0 is removed by mistake, though it's hard to pinpoint the exact flow. The WARN_ON in this patch should help to find it. Signed-off-by: Gregory Greenman Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210122144849.5dc6dd9b22d5.I2add1b5ad24d0d0a221de79d439c09f88fcaf15d@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 4 ++++ drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 98f62d78cf9ca..03b41d911338d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -791,6 +791,10 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, if (!mvm->scan_cmd) goto out_free; + /* invalidate ids to prevent accidental removal of sta_id 0 */ + mvm->aux_sta.sta_id = IWL_MVM_INVALID_STA; + mvm->snif_sta.sta_id = IWL_MVM_INVALID_STA; + /* Set EBS as successful as long as not stated otherwise by the FW. */ mvm->last_ebs_successful = true; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index dc174410bf9c2..578c353ae02c9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -2057,6 +2057,9 @@ int iwl_mvm_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif) lockdep_assert_held(&mvm->mutex); + if (WARN_ON_ONCE(mvm->snif_sta.sta_id == IWL_MVM_INVALID_STA)) + return -EINVAL; + iwl_mvm_disable_txq(mvm, NULL, mvm->snif_queue, IWL_MAX_TID_COUNT, 0); ret = iwl_mvm_rm_sta_common(mvm, mvm->snif_sta.sta_id); if (ret) @@ -2071,6 +2074,9 @@ int iwl_mvm_rm_aux_sta(struct iwl_mvm *mvm) lockdep_assert_held(&mvm->mutex); + if (WARN_ON_ONCE(mvm->aux_sta.sta_id == IWL_MVM_INVALID_STA)) + return -EINVAL; + iwl_mvm_disable_txq(mvm, NULL, mvm->aux_queue, IWL_MAX_TID_COUNT, 0); ret = iwl_mvm_rm_sta_common(mvm, mvm->aux_sta.sta_id); if (ret) -- GitLab From 16062c12edb8ed2dfb15e6a914ff4edf858ab9e0 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 22 Jan 2021 14:52:38 +0200 Subject: [PATCH 1401/2012] iwlwifi: pcie: add rules to match Qu with Hr2 Until now we have been relying on matching the PCI ID and subsystem device ID in order to recognize Qu devices with Hr2. Add rules to match these devices, so that we don't have to add a new rule for every new ID we get. Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210122144849.591ce253ddd8.Ia4b9cc2c535625890c6d6b560db97ee9f2d5ca3b@changeid --- .../net/wireless/intel/iwlwifi/cfg/22000.c | 25 +++++++++++++++++++ .../net/wireless/intel/iwlwifi/iwl-config.h | 3 +++ drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 10 ++++++++ 3 files changed, 38 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index 7220fc8fd9b0d..8280092066e77 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -314,6 +314,7 @@ const struct iwl_cfg_trans_params iwl_ma_trans_cfg = { const char iwl_ax101_name[] = "Intel(R) Wi-Fi 6 AX101"; const char iwl_ax200_name[] = "Intel(R) Wi-Fi 6 AX200 160MHz"; const char iwl_ax201_name[] = "Intel(R) Wi-Fi 6 AX201 160MHz"; +const char iwl_ax203_name[] = "Intel(R) Wi-Fi 6 AX203"; const char iwl_ax211_name[] = "Intel(R) Wi-Fi 6 AX211 160MHz"; const char iwl_ax411_name[] = "Intel(R) Wi-Fi 6 AX411 160MHz"; const char iwl_ma_name[] = "Intel(R) Wi-Fi 6"; @@ -340,6 +341,18 @@ const struct iwl_cfg iwl_qu_b0_hr1_b0 = { .num_rbds = IWL_NUM_RBDS_22000_HE, }; +const struct iwl_cfg iwl_qu_b0_hr_b0 = { + .fw_name_pre = IWL_QU_B_HR_B_FW_PRE, + IWL_DEVICE_22500, + /* + * This device doesn't support receiving BlockAck with a large bitmap + * so we need to restrict the size of transmitted aggregation to the + * HT size; mac80211 would otherwise pick the HE max (256) by default. + */ + .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, + .num_rbds = IWL_NUM_RBDS_22000_HE, +}; + const struct iwl_cfg iwl_ax201_cfg_qu_hr = { .name = "Intel(R) Wi-Fi 6 AX201 160MHz", .fw_name_pre = IWL_QU_B_HR_B_FW_PRE, @@ -366,6 +379,18 @@ const struct iwl_cfg iwl_qu_c0_hr1_b0 = { .num_rbds = IWL_NUM_RBDS_22000_HE, }; +const struct iwl_cfg iwl_qu_c0_hr_b0 = { + .fw_name_pre = IWL_QU_C_HR_B_FW_PRE, + IWL_DEVICE_22500, + /* + * This device doesn't support receiving BlockAck with a large bitmap + * so we need to restrict the size of transmitted aggregation to the + * HT size; mac80211 would otherwise pick the HE max (256) by default. + */ + .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, + .num_rbds = IWL_NUM_RBDS_22000_HE, +}; + const struct iwl_cfg iwl_ax201_cfg_qu_c0_hr_b0 = { .name = "Intel(R) Wi-Fi 6 AX201 160MHz", .fw_name_pre = IWL_QU_C_HR_B_FW_PRE, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 27cb0406ba9a2..4826f5575dae6 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -491,6 +491,7 @@ extern const char iwl9260_killer_1550_name[]; extern const char iwl9560_killer_1550i_name[]; extern const char iwl9560_killer_1550s_name[]; extern const char iwl_ax200_name[]; +extern const char iwl_ax203_name[]; extern const char iwl_ax201_name[]; extern const char iwl_ax101_name[]; extern const char iwl_ax200_killer_1650w_name[]; @@ -574,6 +575,8 @@ extern const struct iwl_cfg iwl9560_2ac_cfg_soc; extern const struct iwl_cfg iwl_qu_b0_hr1_b0; extern const struct iwl_cfg iwl_qu_c0_hr1_b0; extern const struct iwl_cfg iwl_quz_a0_hr1_b0; +extern const struct iwl_cfg iwl_qu_b0_hr_b0; +extern const struct iwl_cfg iwl_qu_c0_hr_b0; extern const struct iwl_cfg iwl_ax200_cfg_cc; extern const struct iwl_cfg iwl_ax201_cfg_qu_hr; extern const struct iwl_cfg iwl_ax201_cfg_qu_hr; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 965982612e740..ed3f5b7aa71e9 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -910,6 +910,11 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_qu_b0_hr1_b0, iwl_ax101_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, + IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, + iwl_qu_b0_hr_b0, iwl_ax203_name), /* Qu C step */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, @@ -917,6 +922,11 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_qu_c0_hr1_b0, iwl_ax101_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP, + IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, + IWL_CFG_ANY, IWL_CFG_ANY, + iwl_qu_c0_hr_b0, iwl_ax203_name), /* QuZ */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, -- GitLab From 96d2bfb7948a96709ba57084d64ac56c1730557c Mon Sep 17 00:00:00 2001 From: Shaul Triebitz Date: Fri, 22 Jan 2021 14:52:39 +0200 Subject: [PATCH 1402/2012] iwlwifi: mvm: clear IN_D3 after wowlan status cmd In D3 resume flow, avoid the following race where sending packets before updating the sequence number (sequence number received from the wowlan status command response): Thread 1: __iwl_mvm_resume clears IWL_MVM_STATUS_IN_D3 and is cut by thread 2 before reaching iwl_mvm_query_wakeup_reasons. Thread 2: iwl_mvm_mac_itxq_xmit calls iwl_mvm_tx_skb since IWL_MVM_STATUS_IN_D3 is not set using a wrong sequence number. Thread 1: __iwl_mvm_resume continues and calls iwl_mvm_query_wakeup_reasons updating the sequence number received from the firmware. The next packet that will be sent now will cause sysassert 0x1096. Fix the bug by moving 'clear IWL_MVM_STATUS_IN_D3' to after sending the wowlan status command and updating the sequence number. Signed-off-by: Shaul Triebitz Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210122144849.fe927ec939c6.I103d3321fb55da7e6c6c51582cfadf94eb8b6c58@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index c025188fa9bc5..df018972a46bf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2020 Intel Corporation + * Copyright (C) 2012-2014, 2018-2021 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2016-2017 Intel Deutschland GmbH */ @@ -2032,8 +2032,6 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test) mutex_lock(&mvm->mutex); - clear_bit(IWL_MVM_STATUS_IN_D3, &mvm->status); - /* get the BSS vif pointer again */ vif = iwl_mvm_get_bss_vif(mvm); if (IS_ERR_OR_NULL(vif)) @@ -2148,6 +2146,8 @@ out_iterate: iwl_mvm_d3_disconnect_iter, keep ? vif : NULL); out: + clear_bit(IWL_MVM_STATUS_IN_D3, &mvm->status); + /* no need to reset the device in unified images, if successful */ if (unified_image && !ret) { /* nothing else to do if we already sent D0I3_END_CMD */ -- GitLab From 4886460c4d1576e85b12601b8b328278a483df86 Mon Sep 17 00:00:00 2001 From: Matti Gottlieb Date: Fri, 22 Jan 2021 14:52:40 +0200 Subject: [PATCH 1403/2012] iwlwifi: Fix IWL_SUBDEVICE_NO_160 macro to use the correct bit. The bit that indicates if the device supports 160MHZ is bit #9. The macro checks bit #8. Fix IWL_SUBDEVICE_NO_160 macro to use the correct bit. Signed-off-by: Matti Gottlieb Fixes: d6f2134a3831 ("iwlwifi: add mac/rf types and 160MHz to the device tables") Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210122144849.bddbf9b57a75.I16e09e2b1404b16bfff70852a5a654aa468579e2@changeid --- drivers/net/wireless/intel/iwlwifi/iwl-config.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 4826f5575dae6..86e1d57df65ed 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* - * Copyright (C) 2005-2014, 2018-2020 Intel Corporation + * Copyright (C) 2005-2014, 2018-2021 Intel Corporation * Copyright (C) 2016-2017 Intel Deutschland GmbH */ #ifndef __IWL_CONFIG_H__ @@ -445,7 +445,7 @@ struct iwl_cfg { #define IWL_CFG_CORES_BT_GNSS 0x5 #define IWL_SUBDEVICE_RF_ID(subdevice) ((u16)((subdevice) & 0x00F0) >> 4) -#define IWL_SUBDEVICE_NO_160(subdevice) ((u16)((subdevice) & 0x0100) >> 9) +#define IWL_SUBDEVICE_NO_160(subdevice) ((u16)((subdevice) & 0x0200) >> 9) #define IWL_SUBDEVICE_CORES(subdevice) ((u16)((subdevice) & 0x1C00) >> 10) struct iwl_dev_info { -- GitLab From 7a21b1d4a728a483f07c638ccd8610d4b4f12684 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 Jan 2021 14:52:41 +0200 Subject: [PATCH 1404/2012] iwlwifi: mvm: guard against device removal in reprobe If we get into a problem severe enough to attempt a reprobe, we schedule a worker to do that. However, if the problem gets more severe and the device is actually destroyed before this worker has a chance to run, we use a free device. Bump up the reference count of the device until the worker runs to avoid this situation. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210122144849.871f0892e4b2.I94819e11afd68d875f3e242b98bef724b8236f1e@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 03b41d911338d..61618f607927d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1209,6 +1209,7 @@ static void iwl_mvm_reprobe_wk(struct work_struct *wk) reprobe = container_of(wk, struct iwl_mvm_reprobe, work); if (device_reprobe(reprobe->dev)) dev_err(reprobe->dev, "reprobe failed!\n"); + put_device(reprobe->dev); kfree(reprobe); module_put(THIS_MODULE); } @@ -1259,7 +1260,7 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error) module_put(THIS_MODULE); return; } - reprobe->dev = mvm->trans->dev; + reprobe->dev = get_device(mvm->trans->dev); INIT_WORK(&reprobe->work, iwl_mvm_reprobe_wk); schedule_work(&reprobe->work); } else if (test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, -- GitLab From 0bed6a2a14afaae240cc431e49c260568488b51c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 Jan 2021 14:52:42 +0200 Subject: [PATCH 1405/2012] iwlwifi: queue: bail out on invalid freeing If we find an entry without an SKB, we currently continue, but that will just result in an infinite loop since we won't increment the read pointer, and will try the same thing over and over again. Fix this. Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20210122144849.abe2dedcc3ac.Ia6b03f9eeb617fd819e56dd5376f4bb8edc7b98a@changeid --- drivers/net/wireless/intel/iwlwifi/queue/tx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/queue/tx.c b/drivers/net/wireless/intel/iwlwifi/queue/tx.c index 62c0c4cbe481a..7ff1bb0ccc9cd 100644 --- a/drivers/net/wireless/intel/iwlwifi/queue/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/queue/tx.c @@ -840,10 +840,8 @@ void iwl_txq_gen2_unmap(struct iwl_trans *trans, int txq_id) int idx = iwl_txq_get_cmd_index(txq, txq->read_ptr); struct sk_buff *skb = txq->entries[idx].skb; - if (WARN_ON_ONCE(!skb)) - continue; - - iwl_txq_free_tso_page(trans, skb); + if (!WARN_ON_ONCE(!skb)) + iwl_txq_free_tso_page(trans, skb); } iwl_txq_gen2_free_tfd(trans, txq); txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr); -- GitLab From 0acb20a5438c36e0cf2b8bf255f314b59fcca6ef Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 17 Jan 2021 22:46:01 +0100 Subject: [PATCH 1406/2012] mt7601u: fix kernel crash unplugging the device The following crash log can occur unplugging the usb dongle since, after the urb poison in mt7601u_free_tx_queue(), usb_submit_urb() will always fail resulting in a skb kfree while the skb has been already queued. Fix the issue enqueuing the skb only if usb_submit_urb() succeed. Hardware name: Hewlett-Packard 500-539ng/2B2C, BIOS 80.06 04/01/2015 Workqueue: usb_hub_wq hub_event RIP: 0010:skb_trim+0x2c/0x30 RSP: 0000:ffffb4c88005bba8 EFLAGS: 00010206 RAX: 000000004ad483ee RBX: ffff9a236625dee0 RCX: 000000000000662f RDX: 000000000000000c RSI: 0000000000000000 RDI: ffff9a2343179300 RBP: ffff9a2343179300 R08: 0000000000000001 R09: 0000000000000000 R10: ffff9a23748f7840 R11: 0000000000000001 R12: ffff9a236625e4d4 R13: ffff9a236625dee0 R14: 0000000000001080 R15: 0000000000000008 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fd410a34ef8 CR3: 00000001416ee001 CR4: 00000000001706f0 Call Trace: mt7601u_tx_status+0x3e/0xa0 [mt7601u] mt7601u_dma_cleanup+0xca/0x110 [mt7601u] mt7601u_cleanup+0x22/0x30 [mt7601u] mt7601u_disconnect+0x22/0x60 [mt7601u] usb_unbind_interface+0x8a/0x270 ? kernfs_find_ns+0x35/0xd0 __device_release_driver+0x17a/0x230 device_release_driver+0x24/0x30 bus_remove_device+0xdb/0x140 device_del+0x18b/0x430 ? kobject_put+0x98/0x1d0 usb_disable_device+0xc6/0x1f0 usb_disconnect.cold+0x7e/0x20a hub_event+0xbf3/0x1870 process_one_work+0x1b6/0x350 worker_thread+0x53/0x3e0 ? process_one_work+0x350/0x350 kthread+0x11b/0x140 ? __kthread_bind_mask+0x60/0x60 ret_from_fork+0x22/0x30 Fixes: 23377c200b2eb ("mt7601u: fix possible memory leak when the device is disconnected") Signed-off-by: Lorenzo Bianconi Acked-by: Jakub Kicinski Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/3b85219f669a63a8ced1f43686de05915a580489.1610919247.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt7601u/dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c index 47710da5b2a58..af7d1ecb777c0 100644 --- a/drivers/net/wireless/mediatek/mt7601u/dma.c +++ b/drivers/net/wireless/mediatek/mt7601u/dma.c @@ -309,7 +309,6 @@ static int mt7601u_dma_submit_tx(struct mt7601u_dev *dev, } e = &q->e[q->end]; - e->skb = skb; usb_fill_bulk_urb(e->urb, usb_dev, snd_pipe, skb->data, skb->len, mt7601u_complete_tx, q); ret = usb_submit_urb(e->urb, GFP_ATOMIC); @@ -327,6 +326,7 @@ static int mt7601u_dma_submit_tx(struct mt7601u_dev *dev, q->end = (q->end + 1) % q->entries; q->used++; + e->skb = skb; if (q->used >= q->entries) ieee80211_stop_queue(dev->hw, skb_get_queue_mapping(skb)); -- GitLab From d8cbaa3de403af6a9cf56598171d2c130ef56f0d Mon Sep 17 00:00:00 2001 From: Aditya Srivastava Date: Sun, 10 Jan 2021 17:45:21 +0530 Subject: [PATCH 1407/2012] rtlwifi: rtl_pci: fix bool comparison in expressions There are certain conditional expressions in rtl_pci, where a boolean variable is compared with true/false, in forms such as (foo == true) or (false != bar), which does not comply with checkpatch.pl (CHECK: BOOL_COMPARISON), according to which boolean variables should be themselves used in the condition, rather than comparing with true/false E.g., in drivers/net/wireless/realtek/rtlwifi/ps.c, "if (find_p2p_ie == true)" can be replaced with "if (find_p2p_ie)" Replace all such expressions with the bool variables appropriately Signed-off-by: Aditya Srivastava Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210110121525.2407-2-yashsri421@gmail.com --- drivers/net/wireless/realtek/rtlwifi/ps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index f99882255d480..629c03271bde7 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -798,9 +798,9 @@ static void rtl_p2p_noa_ie(struct ieee80211_hw *hw, void *data, ie += 3 + noa_len; } - if (find_p2p_ie == true) { + if (find_p2p_ie) { if ((p2pinfo->p2p_ps_mode > P2P_PS_NONE) && - (find_p2p_ps_ie == false)) + (!find_p2p_ps_ie)) rtl_p2p_ps_cmd(hw, P2P_PS_DISABLE); } } -- GitLab From f7c76283fc5f532027404a346c69ebd111c92fdc Mon Sep 17 00:00:00 2001 From: Aditya Srivastava Date: Sun, 10 Jan 2021 17:45:22 +0530 Subject: [PATCH 1408/2012] rtlwifi: rtl8192c-common: fix bool comparison in expressions There are certain conditional expressions in rtl8192c-common, where a boolean variable is compared with true/false, in forms such as (foo == true) or (false != bar), which does not comply with checkpatch.pl (CHECK: BOOL_COMPARISON), according to which boolean variables should be themselves used in the condition, rather than comparing with true/false E.g., in drivers/net/wireless/realtek/rtlwifi/rtl8192c/dm_common.c, "else if (initialized == false) {" can be replaced with "else if (!initialized) {" Replace all such expressions with the bool variables appropriately Signed-off-by: Aditya Srivastava Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210110121525.2407-3-yashsri421@gmail.com --- drivers/net/wireless/realtek/rtlwifi/rtl8192c/dm_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/dm_common.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/dm_common.c index 265a1a336304e..0b6a15c2e5ccd 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/dm_common.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/dm_common.c @@ -380,7 +380,7 @@ static void rtl92c_dm_initial_gain_multi_sta(struct ieee80211_hw *hw) initialized = false; dm_digtable->dig_ext_port_stage = DIG_EXT_PORT_STAGE_MAX; return; - } else if (initialized == false) { + } else if (!initialized) { initialized = true; dm_digtable->dig_ext_port_stage = DIG_EXT_PORT_STAGE_0; dm_digtable->cur_igvalue = 0x20; @@ -509,7 +509,7 @@ static void rtl92c_dm_dig(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); - if (rtlpriv->dm.dm_initialgain_enable == false) + if (!rtlpriv->dm.dm_initialgain_enable) return; if (!(rtlpriv->dm.dm_flag & DYNAMIC_FUNC_DIG)) return; -- GitLab From 64338f0dfd6a3be20453789c6a4b2c7246e2aef8 Mon Sep 17 00:00:00 2001 From: Aditya Srivastava Date: Sun, 10 Jan 2021 17:45:23 +0530 Subject: [PATCH 1409/2012] rtlwifi: rtl8188ee: fix bool comparison in expressions There are certain conditional expressions in rtl8188ee, where a boolean variable is compared with true/false, in forms such as (foo == true) or (false != bar), which does not comply with checkpatch.pl (CHECK: BOOL_COMPARISON), according to which boolean variables should be themselves used in the condition, rather than comparing with true/false E.g., in drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c, "if (mac->act_scanning == true)" can be replaced with "if (mac->act_scanning)" Replace all such expressions with the bool variables appropriately Signed-off-by: Aditya Srivastava Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210110121525.2407-4-yashsri421@gmail.com --- drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c | 8 ++++---- drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c index d10c14c694da8..6f61d6a106272 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c @@ -474,11 +474,11 @@ static void rtl88e_dm_dig(struct ieee80211_hw *hw) u8 dm_dig_max, dm_dig_min; u8 current_igi = dm_dig->cur_igvalue; - if (rtlpriv->dm.dm_initialgain_enable == false) + if (!rtlpriv->dm.dm_initialgain_enable) return; - if (dm_dig->dig_enable_flag == false) + if (!dm_dig->dig_enable_flag) return; - if (mac->act_scanning == true) + if (mac->act_scanning) return; if (mac->link_state >= MAC80211_LINKED) @@ -1637,7 +1637,7 @@ static void rtl88e_dm_fast_ant_training(struct ieee80211_hw *hw) } } - if (bpkt_filter_match == false) { + if (!bpkt_filter_match) { rtl_set_bbreg(hw, DM_REG_TXAGC_A_1_MCS32_11N, BIT(16), 0); rtl_set_bbreg(hw, DM_REG_IGI_A_11N, BIT(7), 0); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c index bd9160b166c56..861cc663ca930 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c @@ -1269,12 +1269,12 @@ void rtl88ee_set_check_bssid(struct ieee80211_hw *hw, bool check_bssid) if (rtlpriv->psc.rfpwr_state != ERFON) return; - if (check_bssid == true) { + if (check_bssid) { reg_rcr |= (RCR_CBSSID_DATA | RCR_CBSSID_BCN); rtlpriv->cfg->ops->set_hw_reg(hw, HW_VAR_RCR, (u8 *)(®_rcr)); _rtl88ee_set_bcn_ctrl_reg(hw, 0, BIT(4)); - } else if (check_bssid == false) { + } else if (!check_bssid) { reg_rcr &= (~(RCR_CBSSID_DATA | RCR_CBSSID_BCN)); _rtl88ee_set_bcn_ctrl_reg(hw, BIT(4), 0); rtlpriv->cfg->ops->set_hw_reg(hw, -- GitLab From 33ae4623d544f1b76e0f656fa49e431b4503f23b Mon Sep 17 00:00:00 2001 From: Aditya Srivastava Date: Sun, 10 Jan 2021 17:45:24 +0530 Subject: [PATCH 1410/2012] rtlwifi: rtl8192se: fix bool comparison in expressions There are certain conditional expressions in rtl8192se, where a boolean variable is compared with true/false, in forms such as (foo == true) or (false != bar), which does not comply with checkpatch.pl (CHECK: BOOL_COMPARISON), according to which boolean variables should be themselves used in the condition, rather than comparing with true/false Replace all such expressions with the bool variables appropriately Signed-off-by: Aditya Srivastava Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210110121525.2407-5-yashsri421@gmail.com --- drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c index 47fabce5c2359..73a5d8a068fc3 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c @@ -458,7 +458,7 @@ static u8 _rtl92se_halset_sysclk(struct ieee80211_hw *hw, u8 data) tmpvalue = rtl_read_byte(rtlpriv, SYS_CLKR + 1); bresult = ((tmpvalue & BIT(7)) == (data & BIT(7))); - if ((data & (BIT(6) | BIT(7))) == false) { + if (!(data & (BIT(6) | BIT(7)))) { waitcount = 100; tmpvalue = 0; @@ -1268,7 +1268,7 @@ static u8 _rtl92s_set_sysclk(struct ieee80211_hw *hw, u8 data) tmp = rtl_read_byte(rtlpriv, SYS_CLKR + 1); result = ((tmp & BIT(7)) == (data & BIT(7))); - if ((data & (BIT(6) | BIT(7))) == false) { + if (!(data & (BIT(6) | BIT(7)))) { waitcnt = 100; tmp = 0; -- GitLab From 9264cabc12040dacc50f517e872d26d6e3a8a531 Mon Sep 17 00:00:00 2001 From: Aditya Srivastava Date: Sun, 10 Jan 2021 17:45:25 +0530 Subject: [PATCH 1411/2012] rtlwifi: rtl8821ae: fix bool comparison in expressions There are certain conditional expressions in rtl8821ae, where a boolean variable is compared with true/false, in forms such as (foo == true) or (false != bar), which does not comply with checkpatch.pl (CHECK: BOOL_COMPARISON), according to which boolean variables should be themselves used in the condition, rather than comparing with true/false E.g., in drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c, "if (rtlefuse->autoload_failflag == false)" can be replaced with "if (!rtlefuse->autoload_failflag)" Replace all such expressions with the bool variables appropriately Signed-off-by: Aditya Srivastava Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210110121525.2407-6-yashsri421@gmail.com --- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c index 372d6f8caf06e..e214b9062cc10 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c @@ -1812,7 +1812,7 @@ static bool _rtl8821ae_phy_bb8821a_config_parafile(struct ieee80211_hw *hw) return false; } _rtl8821ae_phy_init_tx_power_by_rate(hw); - if (rtlefuse->autoload_failflag == false) { + if (!rtlefuse->autoload_failflag) { rtstatus = _rtl8821ae_phy_config_bb_with_pgheaderfile(hw, BASEBAND_CONFIG_PHY_REG); } @@ -3980,7 +3980,7 @@ static void _rtl8821ae_iqk_tx(struct ieee80211_hw *hw, enum radio_path path) } } - if (tx0iqkok == false) + if (!tx0iqkok) break; /* TXK fail, Don't do RXK */ if (vdf_enable == 1) { @@ -4090,7 +4090,7 @@ static void _rtl8821ae_iqk_tx(struct ieee80211_hw *hw, enum radio_path path) } } - if (tx0iqkok == false) { /* If RX mode TXK fail, then take TXK Result */ + if (!tx0iqkok) { /* If RX mode TXK fail, then take TXK Result */ tx_x0_rxk[cal] = tx_x0[cal]; tx_y0_rxk[cal] = tx_y0[cal]; tx0iqkok = true; @@ -4249,7 +4249,7 @@ static void _rtl8821ae_iqk_tx(struct ieee80211_hw *hw, enum radio_path path) } } - if (tx0iqkok == false) { /* If RX mode TXK fail, then take TXK Result */ + if (!tx0iqkok) { /* If RX mode TXK fail, then take TXK Result */ tx_x0_rxk[cal] = tx_x0[cal]; tx_y0_rxk[cal] = tx_y0[cal]; tx0iqkok = true; -- GitLab From 6598f32d9dfe2c5324c9dfd7046929104d390c74 Mon Sep 17 00:00:00 2001 From: Chin-Yen Lee Date: Wed, 13 Jan 2021 09:43:42 +0800 Subject: [PATCH 1412/2012] rtw88: 8723de: adjust the LTR setting The LTR mechanism enables PCIE Endpoints to report the service latency requirements and CPU will enter appropriate sleep state to save power based on the LTR value. 8723de provides two registers to config the LTR, and the original setting is too short for CPU to ente sleep state. The patch adjust the LTR setting. Signed-off-by: Chin-Yen Lee Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210113014342.3615-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw88/rtw8723d.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c index 9268ea8b6dda1..3fdbaf7302c5e 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c @@ -60,8 +60,8 @@ static const struct rtw_hw_reg rtw8723d_txagc[] = { #define WLAN_MAX_AGG_NR 0x0A #define WLAN_AMPDU_MAX_TIME 0x1C #define WLAN_ANT_SEL 0x82 -#define WLAN_LTR_IDLE_LAT 0x883C883C -#define WLAN_LTR_ACT_LAT 0x880B880B +#define WLAN_LTR_IDLE_LAT 0x90039003 +#define WLAN_LTR_ACT_LAT 0x883c883c #define WLAN_LTR_CTRL1 0xCB004010 #define WLAN_LTR_CTRL2 0x01233425 -- GitLab From 2a9269b1cdc35e7c341a7e0cf310c4c65c7faeec Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 14 Jan 2021 00:26:26 +0100 Subject: [PATCH 1413/2012] mt7601u: use ieee80211_rx_list to pass frames to the network stack as a batch Similar to mt76 driver, rely on ieee80211_rx_list in order to improve icache footprint Signed-off-by: Lorenzo Bianconi Acked-by: Jakub Kicinski Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/5c72fa2dda45c1ae3f285af80c02f3db23341d85.1610580222.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt7601u/dma.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c index 98733c23d408b..46d05f8392874 100644 --- a/drivers/net/wireless/mediatek/mt7601u/dma.c +++ b/drivers/net/wireless/mediatek/mt7601u/dma.c @@ -74,7 +74,8 @@ bad_frame: } static void mt7601u_rx_process_seg(struct mt7601u_dev *dev, u8 *data, - u32 seg_len, struct page *p) + u32 seg_len, struct page *p, + struct list_head *list) { struct sk_buff *skb; struct mt7601u_rxwi *rxwi; @@ -104,9 +105,13 @@ static void mt7601u_rx_process_seg(struct mt7601u_dev *dev, u8 *data, if (!skb) return; - spin_lock(&dev->mac_lock); - ieee80211_rx(dev->hw, skb); - spin_unlock(&dev->mac_lock); + local_bh_disable(); + rcu_read_lock(); + + ieee80211_rx_list(dev->hw, NULL, skb, list); + + rcu_read_unlock(); + local_bh_enable(); } static u16 mt7601u_rx_next_seg_len(u8 *data, u32 data_len) @@ -130,6 +135,7 @@ mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf_rx *e) u32 seg_len, data_len = e->urb->actual_length; u8 *data = page_address(e->p); struct page *new_p = NULL; + LIST_HEAD(list); int cnt = 0; if (!test_bit(MT7601U_STATE_INITIALIZED, &dev->state)) @@ -140,7 +146,8 @@ mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf_rx *e) new_p = dev_alloc_pages(MT_RX_ORDER); while ((seg_len = mt7601u_rx_next_seg_len(data, data_len))) { - mt7601u_rx_process_seg(dev, data, seg_len, new_p ? e->p : NULL); + mt7601u_rx_process_seg(dev, data, seg_len, + new_p ? e->p : NULL, &list); data_len -= seg_len; data += seg_len; @@ -150,6 +157,8 @@ mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf_rx *e) if (cnt > 1) trace_mt_rx_dma_aggr(dev, cnt, !!new_p); + netif_receive_skb_list(&list); + if (new_p) { /* we have one extra ref from the allocator */ __free_pages(e->p, MT_RX_ORDER); -- GitLab From cb88d01b67383a095e3f7caeb4cdade5a6cf0417 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 15 Jan 2021 08:56:13 +0200 Subject: [PATCH 1414/2012] wlcore: Fix command execute failure 19 for wl12xx We can currently get a "command execute failure 19" error on beacon loss if the signal is weak: wlcore: Beacon loss detected. roles:0xff wlcore: Connection loss work (role_id: 0). ... wlcore: ERROR command execute failure 19 ... WARNING: CPU: 0 PID: 1552 at drivers/net/wireless/ti/wlcore/main.c:803 ... (wl12xx_queue_recovery_work.part.0 [wlcore]) (wl12xx_cmd_role_start_sta [wlcore]) (wl1271_op_bss_info_changed [wlcore]) (ieee80211_prep_connection [mac80211]) Error 19 is defined as CMD_STATUS_WRONG_NESTING from the wlcore firmware, and seems to mean that the firmware no longer wants to see the quirk handling for WLCORE_QUIRK_START_STA_FAILS done. This quirk got added with commit 18eab430700d ("wlcore: workaround start_sta problem in wl12xx fw"), and it seems that this already got fixed in the firmware long time ago back in 2012 as wl18xx never had this quirk in place to start with. As we no longer even support firmware that early, to me it seems that it's safe to just drop WLCORE_QUIRK_START_STA_FAILS to fix the error. Looks like earlier firmware got disabled back in 2013 with commit 0e284c074ef9 ("wl12xx: increase minimum singlerole firmware version required"). If it turns out we still need WLCORE_QUIRK_START_STA_FAILS with any firmware that the driver works with, we can simply revert this patch and add extra checks for firmware version used. With this fix wlcore reconnects properly after a beacon loss. Cc: Raz Bouganim Signed-off-by: Tony Lindgren Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210115065613.7731-1-tony@atomide.com --- drivers/net/wireless/ti/wl12xx/main.c | 3 --- drivers/net/wireless/ti/wlcore/main.c | 15 +-------------- drivers/net/wireless/ti/wlcore/wlcore.h | 3 --- 3 files changed, 1 insertion(+), 20 deletions(-) diff --git a/drivers/net/wireless/ti/wl12xx/main.c b/drivers/net/wireless/ti/wl12xx/main.c index 3c9c623bb4283..9d7dbfe7fe0c3 100644 --- a/drivers/net/wireless/ti/wl12xx/main.c +++ b/drivers/net/wireless/ti/wl12xx/main.c @@ -635,7 +635,6 @@ static int wl12xx_identify_chip(struct wl1271 *wl) wl->quirks |= WLCORE_QUIRK_LEGACY_NVS | WLCORE_QUIRK_DUAL_PROBE_TMPL | WLCORE_QUIRK_TKIP_HEADER_SPACE | - WLCORE_QUIRK_START_STA_FAILS | WLCORE_QUIRK_AP_ZERO_SESSION_ID; wl->sr_fw_name = WL127X_FW_NAME_SINGLE; wl->mr_fw_name = WL127X_FW_NAME_MULTI; @@ -659,7 +658,6 @@ static int wl12xx_identify_chip(struct wl1271 *wl) wl->quirks |= WLCORE_QUIRK_LEGACY_NVS | WLCORE_QUIRK_DUAL_PROBE_TMPL | WLCORE_QUIRK_TKIP_HEADER_SPACE | - WLCORE_QUIRK_START_STA_FAILS | WLCORE_QUIRK_AP_ZERO_SESSION_ID; wl->plt_fw_name = WL127X_PLT_FW_NAME; wl->sr_fw_name = WL127X_FW_NAME_SINGLE; @@ -688,7 +686,6 @@ static int wl12xx_identify_chip(struct wl1271 *wl) wl->quirks |= WLCORE_QUIRK_TX_BLOCKSIZE_ALIGN | WLCORE_QUIRK_DUAL_PROBE_TMPL | WLCORE_QUIRK_TKIP_HEADER_SPACE | - WLCORE_QUIRK_START_STA_FAILS | WLCORE_QUIRK_AP_ZERO_SESSION_ID; wlcore_set_min_fw_ver(wl, WL128X_CHIP_VER, diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index fb0305d075dd3..8509b989940c2 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -2872,21 +2872,8 @@ static int wlcore_join(struct wl1271 *wl, struct wl12xx_vif *wlvif) if (is_ibss) ret = wl12xx_cmd_role_start_ibss(wl, wlvif); - else { - if (wl->quirks & WLCORE_QUIRK_START_STA_FAILS) { - /* - * TODO: this is an ugly workaround for wl12xx fw - * bug - we are not able to tx/rx after the first - * start_sta, so make dummy start+stop calls, - * and then call start_sta again. - * this should be fixed in the fw. - */ - wl12xx_cmd_role_start_sta(wl, wlvif); - wl12xx_cmd_role_stop_sta(wl, wlvif); - } - + else ret = wl12xx_cmd_role_start_sta(wl, wlvif); - } return ret; } diff --git a/drivers/net/wireless/ti/wlcore/wlcore.h b/drivers/net/wireless/ti/wlcore/wlcore.h index b7821311ac75b..81c94d390623b 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore.h +++ b/drivers/net/wireless/ti/wlcore/wlcore.h @@ -547,9 +547,6 @@ wlcore_set_min_fw_ver(struct wl1271 *wl, unsigned int chip, /* Each RX/TX transaction requires an end-of-transaction transfer */ #define WLCORE_QUIRK_END_OF_TRANSACTION BIT(0) -/* the first start_role(sta) sometimes doesn't work on wl12xx */ -#define WLCORE_QUIRK_START_STA_FAILS BIT(1) - /* wl127x and SPI don't support SDIO block size alignment */ #define WLCORE_QUIRK_TX_BLOCKSIZE_ALIGN BIT(2) -- GitLab From f43fcaef87a3ec6e234a20c8606342f33a8bd61d Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 17 Jan 2021 22:46:56 +0100 Subject: [PATCH 1415/2012] mt7601u: process tx URBs with status EPROTO properly Similar to commit 0e40dbd56d67 ("mt7601u: process URBs in status EPROTO properly"), do not process tx URBs if marked with status set to EPROTO. Signed-off-by: Lorenzo Bianconi Acked-by: Jakub Kicinski Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/72392e8341aa8591c0b9962661a6ca26b1198f32.1610919534.git.lorenzo@kernel.org --- drivers/net/wireless/mediatek/mt7601u/dma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c index 46d05f8392874..1342cf77ef14c 100644 --- a/drivers/net/wireless/mediatek/mt7601u/dma.c +++ b/drivers/net/wireless/mediatek/mt7601u/dma.c @@ -247,6 +247,7 @@ static void mt7601u_complete_tx(struct urb *urb) case -ECONNRESET: case -ESHUTDOWN: case -ENOENT: + case -EPROTO: return; default: dev_err_ratelimited(dev->dev, "tx urb failed: %d\n", -- GitLab From 4832bb371c4175ffb506a96accbb08ef2b2466e7 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 19 Jan 2021 11:06:21 +0100 Subject: [PATCH 1416/2012] iwl4965: do not process non-QOS frames on txq->sched_retry path We have already WARN_ON(!qc) for non-QOS frame on txq->sched_retry path, but we continue to process, what makes no sense since tid is not initialized. Non QOS frame should never happen when aggregation is enabled on queue, so do not process that. Patch should fix smatch warning: drivers/net/wireless/intel/iwlegacy/4965-mac.c:2822 il4965_hdl_tx() error: uninitialized symbol 'tid'. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210119100621.439134-1-stf_xl@wp.pl --- drivers/net/wireless/intel/iwlegacy/4965-mac.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index 28675a4ad8612..98cd06287b436 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -2813,8 +2813,10 @@ il4965_hdl_tx(struct il_priv *il, struct il_rx_buf *rxb) spin_lock_irqsave(&il->sta_lock, flags); if (txq->sched_retry) { const u32 scd_ssn = il4965_get_scd_ssn(tx_resp); - struct il_ht_agg *agg = NULL; - WARN_ON(!qc); + struct il_ht_agg *agg; + + if (WARN_ON(!qc)) + goto out; agg = &il->stations[sta_id].tid[tid].agg; @@ -2830,9 +2832,7 @@ il4965_hdl_tx(struct il_priv *il, struct il_rx_buf *rxb) D_TX_REPLY("Retry scheduler reclaim scd_ssn " "%d idx %d\n", scd_ssn, idx); freed = il4965_tx_queue_reclaim(il, txq_id, idx); - if (qc) - il4965_free_tfds_in_queue(il, sta_id, tid, - freed); + il4965_free_tfds_in_queue(il, sta_id, tid, freed); if (il->mac80211_registered && il_queue_space(&txq->q) > txq->q.low_mark && @@ -2862,6 +2862,7 @@ il4965_hdl_tx(struct il_priv *il, struct il_rx_buf *rxb) il_queue_space(&txq->q) > txq->q.low_mark) il_wake_queue(il, txq); } +out: if (qc && likely(sta_id != IL_INVALID_STATION)) il4965_txq_check_empty(il, sta_id, tid, txq_id); -- GitLab From 36af2d5c4433fb40ee2af912c4ac0a30991aecfc Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 22 Jan 2021 20:53:02 +0800 Subject: [PATCH 1417/2012] ACPI: sysfs: Prefer "compatible" modalias Commit 8765c5ba1949 ("ACPI / scan: Rework modalias creation when "compatible" is present") may create two "MODALIAS=" in one uevent file if specific conditions are met. This breaks systemd-udevd, which assumes each "key" in one uevent file to be unique. The internal implementation of systemd-udevd overwrites the first MODALIAS with the second one, so its kmod rule doesn't load the driver for the first MODALIAS. So if both the ACPI modalias and the OF modalias are present, use the latter to ensure that there will be only one MODALIAS. Link: https://github.com/systemd/systemd/pull/18163 Suggested-by: Mika Westerberg Fixes: 8765c5ba1949 ("ACPI / scan: Rework modalias creation when "compatible" is present") Signed-off-by: Kai-Heng Feng Reviewed-by: Mika Westerberg Reviewed-by: Greg Kroah-Hartman Cc: 4.1+ # 4.1+ [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/device_sysfs.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/device_sysfs.c b/drivers/acpi/device_sysfs.c index 96869f1538b93..bfca116482b8b 100644 --- a/drivers/acpi/device_sysfs.c +++ b/drivers/acpi/device_sysfs.c @@ -251,20 +251,12 @@ int __acpi_device_uevent_modalias(struct acpi_device *adev, if (add_uevent_var(env, "MODALIAS=")) return -ENOMEM; - len = create_pnp_modalias(adev, &env->buf[env->buflen - 1], - sizeof(env->buf) - env->buflen); - if (len < 0) - return len; - - env->buflen += len; - if (!adev->data.of_compatible) - return 0; - - if (len > 0 && add_uevent_var(env, "MODALIAS=")) - return -ENOMEM; - - len = create_of_modalias(adev, &env->buf[env->buflen - 1], - sizeof(env->buf) - env->buflen); + if (adev->data.of_compatible) + len = create_of_modalias(adev, &env->buf[env->buflen - 1], + sizeof(env->buf) - env->buflen); + else + len = create_pnp_modalias(adev, &env->buf[env->buflen - 1], + sizeof(env->buf) - env->buflen); if (len < 0) return len; -- GitLab From 81b704d3e4674e09781d331df73d76675d5ad8cb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 14 Jan 2021 19:34:22 +0100 Subject: [PATCH 1418/2012] ACPI: thermal: Do not call acpi_thermal_check() directly Calling acpi_thermal_check() from acpi_thermal_notify() directly is problematic if _TMP triggers Notify () on the thermal zone for which it has been evaluated (which happens on some systems), because it causes a new acpi_thermal_notify() invocation to be queued up every time and if that takes place too often, an indefinite number of pending work items may accumulate in kacpi_notify_wq over time. Besides, it is not really useful to queue up a new invocation of acpi_thermal_check() if one of them is pending already. For these reasons, rework acpi_thermal_notify() to queue up a thermal check instead of calling acpi_thermal_check() directly and only allow one thermal check to be pending at a time. Moreover, only allow one acpi_thermal_check_fn() instance at a time to run thermal_zone_device_update() for one thermal zone and make it return early if it sees other instances running for the same thermal zone. While at it, fold acpi_thermal_check() into acpi_thermal_check_fn(), as it is only called from there after the other changes made here. [This issue appears to have been exposed by commit 6d25be5782e4 ("sched/core, workqueues: Distangle worker accounting from rq lock"), but it is unclear why it was not visible earlier.] BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=208877 Reported-by: Stephen Berman Diagnosed-by: Sebastian Andrzej Siewior Signed-off-by: Rafael J. Wysocki Reviewed-by: Sebastian Andrzej Siewior Tested-by: Stephen Berman Cc: All applicable --- drivers/acpi/thermal.c | 46 ++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 12c0ece746f04..859b1de31ddc0 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -174,6 +174,8 @@ struct acpi_thermal { struct thermal_zone_device *thermal_zone; int kelvin_offset; /* in millidegrees */ struct work_struct thermal_check_work; + struct mutex thermal_check_lock; + refcount_t thermal_check_count; }; /* -------------------------------------------------------------------------- @@ -495,14 +497,6 @@ static int acpi_thermal_get_trip_points(struct acpi_thermal *tz) return 0; } -static void acpi_thermal_check(void *data) -{ - struct acpi_thermal *tz = data; - - thermal_zone_device_update(tz->thermal_zone, - THERMAL_EVENT_UNSPECIFIED); -} - /* sys I/F for generic thermal sysfs support */ static int thermal_get_temp(struct thermal_zone_device *thermal, int *temp) @@ -900,6 +894,12 @@ static void acpi_thermal_unregister_thermal_zone(struct acpi_thermal *tz) Driver Interface -------------------------------------------------------------------------- */ +static void acpi_queue_thermal_check(struct acpi_thermal *tz) +{ + if (!work_pending(&tz->thermal_check_work)) + queue_work(acpi_thermal_pm_queue, &tz->thermal_check_work); +} + static void acpi_thermal_notify(struct acpi_device *device, u32 event) { struct acpi_thermal *tz = acpi_driver_data(device); @@ -910,17 +910,17 @@ static void acpi_thermal_notify(struct acpi_device *device, u32 event) switch (event) { case ACPI_THERMAL_NOTIFY_TEMPERATURE: - acpi_thermal_check(tz); + acpi_queue_thermal_check(tz); break; case ACPI_THERMAL_NOTIFY_THRESHOLDS: acpi_thermal_trips_update(tz, ACPI_TRIPS_REFRESH_THRESHOLDS); - acpi_thermal_check(tz); + acpi_queue_thermal_check(tz); acpi_bus_generate_netlink_event(device->pnp.device_class, dev_name(&device->dev), event, 0); break; case ACPI_THERMAL_NOTIFY_DEVICES: acpi_thermal_trips_update(tz, ACPI_TRIPS_REFRESH_DEVICES); - acpi_thermal_check(tz); + acpi_queue_thermal_check(tz); acpi_bus_generate_netlink_event(device->pnp.device_class, dev_name(&device->dev), event, 0); break; @@ -1020,7 +1020,25 @@ static void acpi_thermal_check_fn(struct work_struct *work) { struct acpi_thermal *tz = container_of(work, struct acpi_thermal, thermal_check_work); - acpi_thermal_check(tz); + + /* + * In general, it is not sufficient to check the pending bit, because + * subsequent instances of this function may be queued after one of them + * has started running (e.g. if _TMP sleeps). Avoid bailing out if just + * one of them is running, though, because it may have done the actual + * check some time ago, so allow at least one of them to block on the + * mutex while another one is running the update. + */ + if (!refcount_dec_not_one(&tz->thermal_check_count)) + return; + + mutex_lock(&tz->thermal_check_lock); + + thermal_zone_device_update(tz->thermal_zone, THERMAL_EVENT_UNSPECIFIED); + + refcount_inc(&tz->thermal_check_count); + + mutex_unlock(&tz->thermal_check_lock); } static int acpi_thermal_add(struct acpi_device *device) @@ -1052,6 +1070,8 @@ static int acpi_thermal_add(struct acpi_device *device) if (result) goto free_memory; + refcount_set(&tz->thermal_check_count, 3); + mutex_init(&tz->thermal_check_lock); INIT_WORK(&tz->thermal_check_work, acpi_thermal_check_fn); pr_info(PREFIX "%s [%s] (%ld C)\n", acpi_device_name(device), @@ -1117,7 +1137,7 @@ static int acpi_thermal_resume(struct device *dev) tz->state.active |= tz->trips.active[i].flags.enabled; } - queue_work(acpi_thermal_pm_queue, &tz->thermal_check_work); + acpi_queue_thermal_check(tz); return AE_OK; } -- GitLab From ac55ad2b5fadb6af8826963d7d3331c9950a2608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Mon, 18 Jan 2021 17:55:18 +0100 Subject: [PATCH 1419/2012] s390/dasd: Fix inconsistent kobject removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our intention was to only remove path kobjects whenever a device is being set offline. However, one corner case was missing. If a device is disabled and enabled (using the IOCTLs BIODASDDISABLE and BIODASDENABLE respectively), the enabling process will call dasd_eckd_reload_device() which itself calls dasd_eckd_read_conf() in order to update path information. During that update, dasd_eckd_clear_conf_data() clears all old data and also removes all kobjects. This will leave us with an inconsistent state of path kobjects and a subsequent path verification leads to a failing kobject creation. Fix this by removing kobjects only in the context of offlining a device as initially intended. Fixes: 19508b204740 ("s390/dasd: Display FC Endpoint Security information via sysfs") Reported-by: Stefan Haberland Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Reviewed-by: Cornelia Huck Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_devmap.c | 20 ++++++++++++++------ drivers/s390/block/dasd_eckd.c | 3 ++- drivers/s390/block/dasd_int.h | 2 +- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 16bb135c20aa5..03d27ee9cac65 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1874,18 +1874,26 @@ void dasd_path_create_kobjects(struct dasd_device *device) } EXPORT_SYMBOL(dasd_path_create_kobjects); -/* - * As we keep kobjects for the lifetime of a device, this function must not be - * called anywhere but in the context of offlining a device. - */ -void dasd_path_remove_kobj(struct dasd_device *device, int chp) +static void dasd_path_remove_kobj(struct dasd_device *device, int chp) { if (device->path[chp].in_sysfs) { kobject_put(&device->path[chp].kobj); device->path[chp].in_sysfs = false; } } -EXPORT_SYMBOL(dasd_path_remove_kobj); + +/* + * As we keep kobjects for the lifetime of a device, this function must not be + * called anywhere but in the context of offlining a device. + */ +void dasd_path_remove_kobjects(struct dasd_device *device) +{ + int i; + + for (i = 0; i < 8; i++) + dasd_path_remove_kobj(device, i); +} +EXPORT_SYMBOL(dasd_path_remove_kobjects); int dasd_add_sysfs_files(struct ccw_device *cdev) { diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 3caa1ee5f4b0a..65eb87cbbb9b2 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1036,7 +1036,6 @@ static void dasd_eckd_clear_conf_data(struct dasd_device *device) device->path[i].ssid = 0; device->path[i].chpid = 0; dasd_path_notoper(device, i); - dasd_path_remove_kobj(device, i); } } @@ -2173,6 +2172,7 @@ out_err2: device->block = NULL; out_err1: dasd_eckd_clear_conf_data(device); + dasd_path_remove_kobjects(device); kfree(device->private); device->private = NULL; return rc; @@ -2191,6 +2191,7 @@ static void dasd_eckd_uncheck_device(struct dasd_device *device) private->vdsneq = NULL; private->gneq = NULL; dasd_eckd_clear_conf_data(device); + dasd_path_remove_kobjects(device); } static struct dasd_ccw_req * diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 3bc008f9136cc..b8a04c42d1d2e 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -858,7 +858,7 @@ int dasd_add_sysfs_files(struct ccw_device *); void dasd_remove_sysfs_files(struct ccw_device *); void dasd_path_create_kobj(struct dasd_device *, int); void dasd_path_create_kobjects(struct dasd_device *); -void dasd_path_remove_kobj(struct dasd_device *, int); +void dasd_path_remove_kobjects(struct dasd_device *); struct dasd_device *dasd_device_from_cdev(struct ccw_device *); struct dasd_device *dasd_device_from_cdev_locked(struct ccw_device *); -- GitLab From 56c91a18432b631ca18438841fd1831ef756cabf Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 22 Jan 2021 15:42:14 +0800 Subject: [PATCH 1420/2012] kernel: kexec: remove the lock operation of system_transition_mutex Function kernel_kexec() is called with lock system_transition_mutex held in reboot system call. While inside kernel_kexec(), it will acquire system_transition_mutex agin. This will lead to dead lock. The dead lock should be easily triggered, it hasn't caused any failure report just because the feature 'kexec jump' is almost not used by anyone as far as I know. An inquiry can be made about who is using 'kexec jump' and where it's used. Before that, let's simply remove the lock operation inside CONFIG_KEXEC_JUMP ifdeffery scope. Fixes: 55f2503c3b69 ("PM / reboot: Eliminate race between reboot and suspend") Signed-off-by: Baoquan He Reported-by: Dan Carpenter Reviewed-by: Pingfan Liu Cc: 4.19+ # 4.19+ Signed-off-by: Rafael J. Wysocki --- kernel/kexec_core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 4f8efc278aa75..aa919585c24b4 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1134,7 +1134,6 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { - lock_system_sleep(); pm_prepare_console(); error = freeze_processes(); if (error) { @@ -1197,7 +1196,6 @@ int kernel_kexec(void) thaw_processes(); Restore_console: pm_restore_console(); - unlock_system_sleep(); } #endif -- GitLab From 2f96e40212d435b328459ba6b3956395eed8fa9f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 15 Jan 2021 16:26:17 -0500 Subject: [PATCH 1421/2012] btrfs: fix possible free space tree corruption with online conversion While running btrfs/011 in a loop I would often ASSERT() while trying to add a new free space entry that already existed, or get an EEXIST while adding a new block to the extent tree, which is another indication of double allocation. This occurs because when we do the free space tree population, we create the new root and then populate the tree and commit the transaction. The problem is when you create a new root, the root node and commit root node are the same. During this initial transaction commit we will run all of the delayed refs that were paused during the free space tree generation, and thus begin to cache block groups. While caching block groups the caching thread will be reading from the main root for the free space tree, so as we make allocations we'll be changing the free space tree, which can cause us to add the same range twice which results in either the ASSERT(ret != -EEXIST); in __btrfs_add_free_space, or in a variety of different errors when running delayed refs because of a double allocation. Fix this by marking the fs_info as unsafe to load the free space tree, and fall back on the old slow method. We could be smarter than this, for example caching the block group while we're populating the free space tree, but since this is a serious problem I've opted for the simplest solution. CC: stable@vger.kernel.org # 4.9+ Fixes: a5ed91828518 ("Btrfs: implement the free space B-tree") Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 10 +++++++++- fs/btrfs/ctree.h | 3 +++ fs/btrfs/free-space-tree.c | 10 +++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 0886e81e55402..48ebc106a606c 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -673,7 +673,15 @@ static noinline void caching_thread(struct btrfs_work *work) wake_up(&caching_ctl->wait); } - if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) + /* + * If we are in the transaction that populated the free space tree we + * can't actually cache from the free space tree as our commit root and + * real root are the same, so we could change the contents of the blocks + * while caching. Instead do the slow caching in this case, and after + * the transaction has committed we will be safe. + */ + if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && + !(test_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags))) ret = load_free_space_tree(caching_ctl); else ret = load_extent_tree_free(caching_ctl); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0225c5208f44c..47ca8edafb5e6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -564,6 +564,9 @@ enum { /* Indicate that we need to cleanup space cache v1 */ BTRFS_FS_CLEANUP_SPACE_CACHE_V1, + + /* Indicate that we can't trust the free space tree for caching yet */ + BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, }; /* diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index e33a65bd9a0c2..a33bca94d133e 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1150,6 +1150,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) return PTR_ERR(trans); set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); + set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); free_space_root = btrfs_create_tree(trans, BTRFS_FREE_SPACE_TREE_OBJECTID); if (IS_ERR(free_space_root)) { @@ -1171,11 +1172,18 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); + ret = btrfs_commit_transaction(trans); - return btrfs_commit_transaction(trans); + /* + * Now that we've committed the transaction any reading of our commit + * root will be safe, so we can cache from the free space tree now. + */ + clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); + return ret; abort: clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); + clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); return ret; -- GitLab From c41ec4529d3448df8998950d7bada757a1b321cf Mon Sep 17 00:00:00 2001 From: Su Yue Date: Thu, 21 Jan 2021 19:39:10 +0800 Subject: [PATCH 1422/2012] btrfs: fix lockdep warning due to seqcount_mutex on 32bit arch This effectively reverts commit d5c8238849e7 ("btrfs: convert data_seqcount to seqcount_mutex_t"). While running fstests on 32 bits test box, many tests failed because of warnings in dmesg. One of those warnings (btrfs/003): [66.441317] WARNING: CPU: 6 PID: 9251 at include/linux/seqlock.h:279 btrfs_remove_chunk+0x58b/0x7b0 [btrfs] [66.441446] CPU: 6 PID: 9251 Comm: btrfs Tainted: G O 5.11.0-rc4-custom+ #5 [66.441449] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.14.0-1 04/01/2014 [66.441451] EIP: btrfs_remove_chunk+0x58b/0x7b0 [btrfs] [66.441472] EAX: 00000000 EBX: 00000001 ECX: c576070c EDX: c6b15803 [66.441475] ESI: 10000000 EDI: 00000000 EBP: c56fbcfc ESP: c56fbc70 [66.441477] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010246 [66.441481] CR0: 80050033 CR2: 05c8da20 CR3: 04b20000 CR4: 00350ed0 [66.441485] Call Trace: [66.441510] btrfs_relocate_chunk+0xb1/0x100 [btrfs] [66.441529] ? btrfs_lookup_block_group+0x17/0x20 [btrfs] [66.441562] btrfs_balance+0x8ed/0x13b0 [btrfs] [66.441586] ? btrfs_ioctl_balance+0x333/0x3c0 [btrfs] [66.441619] ? __this_cpu_preempt_check+0xf/0x11 [66.441643] btrfs_ioctl_balance+0x333/0x3c0 [btrfs] [66.441664] ? btrfs_ioctl_get_supported_features+0x30/0x30 [btrfs] [66.441683] btrfs_ioctl+0x414/0x2ae0 [btrfs] [66.441700] ? __lock_acquire+0x35f/0x2650 [66.441717] ? lockdep_hardirqs_on+0x87/0x120 [66.441720] ? lockdep_hardirqs_on_prepare+0xd0/0x1e0 [66.441724] ? call_rcu+0x2d3/0x530 [66.441731] ? __might_fault+0x41/0x90 [66.441736] ? kvm_sched_clock_read+0x15/0x50 [66.441740] ? sched_clock+0x8/0x10 [66.441745] ? sched_clock_cpu+0x13/0x180 [66.441750] ? btrfs_ioctl_get_supported_features+0x30/0x30 [btrfs] [66.441750] ? btrfs_ioctl_get_supported_features+0x30/0x30 [btrfs] [66.441768] __ia32_sys_ioctl+0x165/0x8a0 [66.441773] ? __this_cpu_preempt_check+0xf/0x11 [66.441785] ? __might_fault+0x89/0x90 [66.441791] __do_fast_syscall_32+0x54/0x80 [66.441796] do_fast_syscall_32+0x32/0x70 [66.441801] do_SYSENTER_32+0x15/0x20 [66.441805] entry_SYSENTER_32+0x9f/0xf2 [66.441808] EIP: 0xab7b5549 [66.441814] EAX: ffffffda EBX: 00000003 ECX: c4009420 EDX: bfa91f5c [66.441816] ESI: 00000003 EDI: 00000001 EBP: 00000000 ESP: bfa91e98 [66.441818] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000292 [66.441833] irq event stamp: 42579 [66.441835] hardirqs last enabled at (42585): [] console_unlock+0x495/0x590 [66.441838] hardirqs last disabled at (42590): [] console_unlock+0x405/0x590 [66.441840] softirqs last enabled at (41698): [] call_on_stack+0x1c/0x60 [66.441843] softirqs last disabled at (41681): [] call_on_stack+0x1c/0x60 ======================================================================== btrfs_remove_chunk+0x58b/0x7b0: __seqprop_mutex_assert at linux/./include/linux/seqlock.h:279 (inlined by) btrfs_device_set_bytes_used at linux/fs/btrfs/volumes.h:212 (inlined by) btrfs_remove_chunk at linux/fs/btrfs/volumes.c:2994 ======================================================================== The warning is produced by lockdep_assert_held() in __seqprop_mutex_assert() if CONFIG_LOCKDEP is enabled. And "olumes.c:2994 is btrfs_device_set_bytes_used() with mutex lock fs_info->chunk_mutex held already. After adding some debug prints, the cause was found that many __alloc_device() are called with NULL @fs_info (during scanning ioctl). Inside the function, btrfs_device_data_ordered_init() is expanded to seqcount_mutex_init(). In this scenario, its second parameter info->chunk_mutex is &NULL->chunk_mutex which equals to offsetof(struct btrfs_fs_info, chunk_mutex) unexpectedly. Thus, seqcount_mutex_init() is called in wrong way. And later btrfs_device_get/set helpers trigger lockdep warnings. The device and filesystem object lifetimes are different and we'd have to synchronize initialization of the btrfs_device::data_seqcount with the fs_info, possibly using some additional synchronization. It would still not prevent concurrent access to the seqcount lock when it's used for read and initialization. Commit d5c8238849e7 ("btrfs: convert data_seqcount to seqcount_mutex_t") does not mention a particular problem being fixed so revert should not cause any harm and we'll get the lockdep warning fixed. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=210139 Reported-by: Erhard F Fixes: d5c8238849e7 ("btrfs: convert data_seqcount to seqcount_mutex_t") CC: stable@vger.kernel.org # 5.10 CC: Davidlohr Bueso Signed-off-by: Su Yue Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 +- fs/btrfs/volumes.h | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0c7f4f6237e8c..b900cc7849b6a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -433,7 +433,7 @@ static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info) atomic_set(&dev->reada_in_flight, 0); atomic_set(&dev->dev_stats_ccnt, 0); - btrfs_device_data_ordered_init(dev, fs_info); + btrfs_device_data_ordered_init(dev); INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); extent_io_tree_init(fs_info, &dev->alloc_state, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 1997a4649a66c..c43663d9c22e0 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -39,10 +39,10 @@ struct btrfs_io_geometry { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) #include #define __BTRFS_NEED_DEVICE_DATA_ORDERED -#define btrfs_device_data_ordered_init(device, info) \ - seqcount_mutex_init(&device->data_seqcount, &info->chunk_mutex) +#define btrfs_device_data_ordered_init(device) \ + seqcount_init(&device->data_seqcount) #else -#define btrfs_device_data_ordered_init(device, info) do { } while (0) +#define btrfs_device_data_ordered_init(device) do { } while (0) #endif #define BTRFS_DEV_STATE_WRITEABLE (0) @@ -76,8 +76,7 @@ struct btrfs_device { blk_status_t last_flush_error; #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED - /* A seqcount_t with associated chunk_mutex (for lockdep) */ - seqcount_mutex_t data_seqcount; + seqcount_t data_seqcount; #endif /* the internal btrfs device id */ @@ -168,9 +167,11 @@ btrfs_device_get_##name(const struct btrfs_device *dev) \ static inline void \ btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \ { \ + preempt_disable(); \ write_seqcount_begin(&dev->data_seqcount); \ dev->name = size; \ write_seqcount_end(&dev->data_seqcount); \ + preempt_enable(); \ } #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) #define BTRFS_DEVICE_GETSET_FUNCS(name) \ -- GitLab From 9ad6d91f056b99dbe59a262810cb342519ea8d39 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 22 Jan 2021 19:07:45 +0000 Subject: [PATCH 1423/2012] btrfs: fix log replay failure due to race with space cache rebuild After a sudden power failure we may end up with a space cache on disk that is not valid and needs to be rebuilt from scratch. If that happens, during log replay when we attempt to pin an extent buffer from a log tree, at btrfs_pin_extent_for_log_replay(), we do not wait for the space cache to be rebuilt through the call to: btrfs_cache_block_group(cache, 1); That is because that only waits for the task (work queue job) that loads the space cache to change the cache state from BTRFS_CACHE_FAST to any other value. That is ok when the space cache on disk exists and is valid, but when the cache is not valid and needs to be rebuilt, it ends up returning as soon as the cache state changes to BTRFS_CACHE_STARTED (done at caching_thread()). So this means that we can end up trying to unpin a range which is not yet marked as free in the block group. This results in the call to btrfs_remove_free_space() to return -EINVAL to btrfs_pin_extent_for_log_replay(), which in turn makes the log replay fail as well as mounting the filesystem. More specifically the -EINVAL comes from free_space_cache.c:remove_from_bitmap(), because the requested range is not marked as free space (ones in the bitmap), we have the following condition triggered: static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, (...) if (ret < 0 || search_start != *offset) return -EINVAL; (...) It's the "search_start != *offset" that results in the condition being evaluated to true. When this happens we got the following in dmesg/syslog: [72383.415114] BTRFS: device fsid 32b95b69-0ea9-496a-9f02-3f5a56dc9322 devid 1 transid 1432 /dev/sdb scanned by mount (3816007) [72383.417837] BTRFS info (device sdb): disk space caching is enabled [72383.418536] BTRFS info (device sdb): has skinny extents [72383.423846] BTRFS info (device sdb): start tree-log replay [72383.426416] BTRFS warning (device sdb): block group 30408704 has wrong amount of free space [72383.427686] BTRFS warning (device sdb): failed to load free space cache for block group 30408704, rebuilding it now [72383.454291] BTRFS: error (device sdb) in btrfs_recover_log_trees:6203: errno=-22 unknown (Failed to pin buffers while recovering log root tree.) [72383.456725] BTRFS: error (device sdb) in btrfs_replay_log:2253: errno=-22 unknown (Failed to recover log tree) [72383.460241] BTRFS error (device sdb): open_ctree failed We also mark the range for the extent buffer in the excluded extents io tree. That is fine when the space cache is valid on disk and we can load it, in which case it causes no problems. However, for the case where we need to rebuild the space cache, because it is either invalid or it is missing, having the extent buffer range marked in the excluded extents io tree leads to a -EINVAL failure from the call to btrfs_remove_free_space(), resulting in the log replay and mount to fail. This is because by having the range marked in the excluded extents io tree, the caching thread ends up never adding the range of the extent buffer as free space in the block group since the calls to add_new_free_space(), called from load_extent_tree_free(), filter out any ranges that are marked as excluded extents. So fix this by making sure that during log replay we wait for the caching task to finish completely when we need to rebuild a space cache, and also drop the need to mark the extent buffer range in the excluded extents io tree, as well as clearing ranges from that tree at btrfs_finish_extent_commit(). This started to happen with some frequency on large filesystems having block groups with a lot of fragmentation since the recent commit e747853cae3ae3 ("btrfs: load free space cache asynchronously"), but in fact the issue has been there for years, it was just much less likely to happen. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 61 +++++++++++++----------------------------- 1 file changed, 18 insertions(+), 43 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 30b1a630dc2f8..0c335dae5af7a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2602,8 +2602,6 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, struct btrfs_block_group *cache; int ret; - btrfs_add_excluded_extent(trans->fs_info, bytenr, num_bytes); - cache = btrfs_lookup_block_group(trans->fs_info, bytenr); if (!cache) return -EINVAL; @@ -2615,11 +2613,19 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, * the pinned extents. */ btrfs_cache_block_group(cache, 1); + /* + * Make sure we wait until the cache is completely built in case it is + * missing or is invalid and therefore needs to be rebuilt. + */ + ret = btrfs_wait_block_group_cache_done(cache); + if (ret) + goto out; pin_down_extent(trans, cache, bytenr, num_bytes, 0); /* remove us from the free space cache (if we're there at all) */ ret = btrfs_remove_free_space(cache, bytenr, num_bytes); +out: btrfs_put_block_group(cache); return ret; } @@ -2629,50 +2635,22 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info, { int ret; struct btrfs_block_group *block_group; - struct btrfs_caching_control *caching_ctl; block_group = btrfs_lookup_block_group(fs_info, start); if (!block_group) return -EINVAL; - btrfs_cache_block_group(block_group, 0); - caching_ctl = btrfs_get_caching_control(block_group); - - if (!caching_ctl) { - /* Logic error */ - BUG_ON(!btrfs_block_group_done(block_group)); - ret = btrfs_remove_free_space(block_group, start, num_bytes); - } else { - /* - * We must wait for v1 caching to finish, otherwise we may not - * remove our space. - */ - btrfs_wait_space_cache_v1_finished(block_group, caching_ctl); - mutex_lock(&caching_ctl->mutex); - - if (start >= caching_ctl->progress) { - ret = btrfs_add_excluded_extent(fs_info, start, - num_bytes); - } else if (start + num_bytes <= caching_ctl->progress) { - ret = btrfs_remove_free_space(block_group, - start, num_bytes); - } else { - num_bytes = caching_ctl->progress - start; - ret = btrfs_remove_free_space(block_group, - start, num_bytes); - if (ret) - goto out_lock; + btrfs_cache_block_group(block_group, 1); + /* + * Make sure we wait until the cache is completely built in case it is + * missing or is invalid and therefore needs to be rebuilt. + */ + ret = btrfs_wait_block_group_cache_done(block_group); + if (ret) + goto out; - num_bytes = (start + num_bytes) - - caching_ctl->progress; - start = caching_ctl->progress; - ret = btrfs_add_excluded_extent(fs_info, start, - num_bytes); - } -out_lock: - mutex_unlock(&caching_ctl->mutex); - btrfs_put_caching_control(caching_ctl); - } + ret = btrfs_remove_free_space(block_group, start, num_bytes); +out: btrfs_put_block_group(block_group); return ret; } @@ -2863,9 +2841,6 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) mutex_unlock(&fs_info->unused_bg_unpin_mutex); break; } - if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) - clear_extent_bits(&fs_info->excluded_extents, start, - end, EXTENT_UPTODATE); if (btrfs_test_opt(fs_info, DISCARD_SYNC)) ret = btrfs_discard_extent(fs_info, start, -- GitLab From fef9c8d28e28a808274a18fbd8cc2685817fd62a Mon Sep 17 00:00:00 2001 From: Laurent Badel Date: Fri, 22 Jan 2021 17:19:41 +0100 Subject: [PATCH 1424/2012] PM: hibernate: flush swap writer after marking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flush the swap writer after, not before, marking the files, to ensure the signature is properly written. Fixes: 6f612af57821 ("PM / Hibernate: Group swap ops") Signed-off-by: Laurent Badel Cc: All applicable Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index c73f2e295167d..72e33054a2e1b 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -497,10 +497,10 @@ static int swap_writer_finish(struct swap_map_handle *handle, unsigned int flags, int error) { if (!error) { - flush_swap_writer(handle); pr_info("S"); error = mark_swapfiles(handle, flags); pr_cont("|\n"); + flush_swap_writer(handle); } if (error) -- GitLab From b98e762e3d71e893b221f871825dc64694cfb258 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 25 Jan 2021 12:21:02 -0500 Subject: [PATCH 1425/2012] nbd: freeze the queue while we're adding connections When setting up a device, we can krealloc the config->socks array to add new sockets to the configuration. However if we happen to get a IO request in at this point even though we aren't setup we could hit a UAF, as we deref config->socks without any locking, assuming that the configuration was setup already and that ->socks is safe to access it as we have a reference on the configuration. But there's nothing really preventing IO from occurring at this point of the device setup, we don't want to incur the overhead of a lock to access ->socks when it will never change while the device is running. To fix this UAF scenario simply freeze the queue if we are adding sockets. This will protect us from this particular case without adding any additional overhead for the normal running case. Cc: stable@vger.kernel.org Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 6727358e147dd..e6ea5d344f87b 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1022,6 +1022,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, if (!sock) return err; + /* + * We need to make sure we don't get any errant requests while we're + * reallocating the ->socks array. + */ + blk_mq_freeze_queue(nbd->disk->queue); + if (!netlink && !nbd->task_setup && !test_bit(NBD_RT_BOUND, &config->runtime_flags)) nbd->task_setup = current; @@ -1060,10 +1066,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, nsock->cookie = 0; socks[config->num_connections++] = nsock; atomic_inc(&config->live_connections); + blk_mq_unfreeze_queue(nbd->disk->queue); return 0; put_socket: + blk_mq_unfreeze_queue(nbd->disk->queue); sockfd_put(sock); return err; } -- GitLab From 0bc92e7f0d9ab06afacff7e5b0e08b5ce8f3f32f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 13 Jan 2021 11:59:21 +0100 Subject: [PATCH 1426/2012] ASoC: audio-graph-card: update audio-graph-card.yaml reference Changeset 97198614f6c3 ("ASoC: audio-graph-card: switch to yaml base Documentation") renamed: Documentation/devicetree/bindings/sound/audio-graph-card.txt to: Documentation/devicetree/bindings/sound/audio-graph-card.yaml. Update its cross-reference accordingly. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/8a779e6b9644d19c5d77b382059f6ccf9781434d.1610535350.git.mchehab+huawei@kernel.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/display/bridge/sii902x.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/bridge/sii902x.txt b/Documentation/devicetree/bindings/display/bridge/sii902x.txt index 02c21b5847418..3bc760cc31cbb 100644 --- a/Documentation/devicetree/bindings/display/bridge/sii902x.txt +++ b/Documentation/devicetree/bindings/display/bridge/sii902x.txt @@ -40,7 +40,7 @@ Optional properties: documents on how to describe the way the sii902x device is connected to the rest of the audio system: Documentation/devicetree/bindings/sound/simple-card.yaml - Documentation/devicetree/bindings/sound/audio-graph-card.txt + Documentation/devicetree/bindings/sound/audio-graph-card.yaml Note: In case of the audio-graph-card binding the used port index should be 3. -- GitLab From 601bd38ccd25e831865dd8442e3491fc8ce9604d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 13 Jan 2021 11:59:22 +0100 Subject: [PATCH 1427/2012] dt-bindings: display: mediatek: update mediatek,dpi.yaml reference Changeset 9273cf7d3942 ("dt-bindings: display: mediatek: convert the dpi bindings to yaml") renamed: Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt to: Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.yaml. Update its cross-reference accordingly. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/3bf906f39b797d18800abd387187cce71296e5eb.1610535350.git.mchehab+huawei@kernel.org Signed-off-by: Rob Herring --- .../devicetree/bindings/display/mediatek/mediatek,disp.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt b/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt index 33977e15bebdf..865e1e1b88ac5 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt @@ -23,7 +23,7 @@ connected to. For a description of the display interface sink function blocks, see Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt and -Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt. +Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.yaml. Required properties (all function blocks): - compatible: "mediatek,-disp-", one of -- GitLab From c5dde04b9059c91515d609a41e9c1a148ee4d850 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 13 Jan 2021 11:59:23 +0100 Subject: [PATCH 1428/2012] dt-bindings: memory: mediatek: update mediatek,smi-larb.yaml references Changeset 27bb0e42855a ("dt-bindings: memory: mediatek: Convert SMI to DT schema") renamed: Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt to: Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml. Update its cross-references accordingly. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/c70bd79b311a65babe7374eaf81974563400a943.1610535350.git.mchehab+huawei@kernel.org Signed-off-by: Rob Herring --- .../devicetree/bindings/display/mediatek/mediatek,disp.txt | 2 +- .../devicetree/bindings/media/mediatek-jpeg-decoder.txt | 2 +- .../devicetree/bindings/media/mediatek-jpeg-encoder.txt | 2 +- Documentation/devicetree/bindings/media/mediatek-mdp.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt b/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt index 865e1e1b88ac5..ed76332ec01e8 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt @@ -61,7 +61,7 @@ Required properties (DMA function blocks): "mediatek,-disp-wdma" the supported chips are mt2701, mt8167 and mt8173. - larb: Should contain a phandle pointing to the local arbiter device as defined - in Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt + in Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml - iommus: Should point to the respective IOMMU block with master port as argument, see Documentation/devicetree/bindings/iommu/mediatek,iommu.txt for details. diff --git a/Documentation/devicetree/bindings/media/mediatek-jpeg-decoder.txt b/Documentation/devicetree/bindings/media/mediatek-jpeg-decoder.txt index 044b11913c49b..cf60c5acc0e4e 100644 --- a/Documentation/devicetree/bindings/media/mediatek-jpeg-decoder.txt +++ b/Documentation/devicetree/bindings/media/mediatek-jpeg-decoder.txt @@ -16,7 +16,7 @@ Required properties: - power-domains: a phandle to the power domain, see Documentation/devicetree/bindings/power/power_domain.txt for details. - mediatek,larb: must contain the local arbiters in the current Socs, see - Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt + Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml for details. - iommus: should point to the respective IOMMU block with master port as argument, see Documentation/devicetree/bindings/iommu/mediatek,iommu.txt diff --git a/Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.txt b/Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.txt index 736be7cad3857..acfb50375b8ac 100644 --- a/Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.txt +++ b/Documentation/devicetree/bindings/media/mediatek-jpeg-encoder.txt @@ -14,7 +14,7 @@ Required properties: - power-domains: a phandle to the power domain, see Documentation/devicetree/bindings/power/power_domain.txt for details. - mediatek,larb: must contain the local arbiters in the current SoCs, see - Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt + Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml for details. - iommus: should point to the respective IOMMU block with master port as argument, see Documentation/devicetree/bindings/iommu/mediatek,iommu.txt diff --git a/Documentation/devicetree/bindings/media/mediatek-mdp.txt b/Documentation/devicetree/bindings/media/mediatek-mdp.txt index 0d03e3ae2be2f..f4798d04e9252 100644 --- a/Documentation/devicetree/bindings/media/mediatek-mdp.txt +++ b/Documentation/devicetree/bindings/media/mediatek-mdp.txt @@ -28,7 +28,7 @@ Required properties (DMA function blocks, child node): argument, see Documentation/devicetree/bindings/iommu/mediatek,iommu.txt for details. - mediatek,larb: must contain the local arbiters in the current Socs, see - Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt + Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.yaml for details. Example: -- GitLab From 3490e333bda0709a5a2c9b7ab9b0209bb16619d8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 13 Jan 2021 11:59:24 +0100 Subject: [PATCH 1429/2012] dt-bindings:iio:adc: update adc.yaml reference Changeset b70d154d6558 ("dt-bindings:iio:adc: convert adc.txt to yaml") renamed: Documentation/devicetree/bindings/iio/adc/adc.txt to: Documentation/devicetree/bindings/iio/adc/adc.yaml. Update its cross-reference accordingly. Signed-off-by: Mauro Carvalho Chehab Acked-by: Jonathan Cameron Link: https://lore.kernel.org/r/8e37dba8ae9099acd649bab8a1cf718caa4f3e6a.1610535350.git.mchehab+huawei@kernel.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml index e0cc3b2e89574..22b7ed3723f67 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7192.yaml @@ -80,7 +80,7 @@ properties: type: boolean bipolar: - description: see Documentation/devicetree/bindings/iio/adc/adc.txt + description: see Documentation/devicetree/bindings/iio/adc/adc.yaml type: boolean required: -- GitLab From 9f12e37cae44a96132fc3031535a0b165486941a Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 25 Jan 2021 11:09:25 -0800 Subject: [PATCH 1430/2012] Commit 9bb48c82aced ("tty: implement write_iter") converted the tty layer to use write_iter. Fix the redirected_tty_write declaration also in n_tty and change the comparisons to use write_iter instead of write. [ Also moved the declaration of redirected_tty_write() to the proper location in a header file. The reason for the bug was the bogus extern declaration in n_tty.c silently not matching the changed definition in tty_io.c, and because it wasn't in a shared header file, there was no cross-checking of the declaration. Sami noticed because Clang's Control Flow Integrity checking ended up incidentally noticing the inconsistent declaration. - Linus ] Fixes: 9bb48c82aced ("tty: implement write_iter") Signed-off-by: Sami Tolvanen Signed-off-by: Linus Torvalds --- drivers/tty/n_tty.c | 7 ++----- drivers/tty/tty_io.c | 2 -- include/linux/tty.h | 1 + 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 319d68c8a5df3..219e85756171b 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -2081,9 +2081,6 @@ static int canon_copy_from_read_buf(struct tty_struct *tty, return 0; } -extern ssize_t redirected_tty_write(struct file *, const char __user *, - size_t, loff_t *); - /** * job_control - check job control * @tty: tty @@ -2105,7 +2102,7 @@ static int job_control(struct tty_struct *tty, struct file *file) /* NOTE: not yet done after every sleep pending a thorough check of the logic of this change. -- jlc */ /* don't stop on /dev/console */ - if (file->f_op->write == redirected_tty_write) + if (file->f_op->write_iter == redirected_tty_write) return 0; return __tty_check_change(tty, SIGTTIN); @@ -2309,7 +2306,7 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file, ssize_t retval = 0; /* Job control check -- must be done at start (POSIX.1 7.1.1.4). */ - if (L_TOSTOP(tty) && file->f_op->write != redirected_tty_write) { + if (L_TOSTOP(tty) && file->f_op->write_iter != redirected_tty_write) { retval = tty_check_change(tty); if (retval) return retval; diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 4a208a95e9219..48de20916ca78 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -144,10 +144,8 @@ DEFINE_MUTEX(tty_mutex); static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *); static ssize_t tty_write(struct kiocb *, struct iov_iter *); -ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); static __poll_t tty_poll(struct file *, poll_table *); static int tty_open(struct inode *, struct file *); -long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT static long tty_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); diff --git a/include/linux/tty.h b/include/linux/tty.h index c873f475f0a76..37803f3e6d49e 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -421,6 +421,7 @@ extern void tty_kclose(struct tty_struct *tty); extern int tty_dev_name_to_number(const char *name, dev_t *number); extern int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout); extern void tty_ldisc_unlock(struct tty_struct *tty); +extern ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *); #else static inline void tty_kref_put(struct tty_struct *tty) { } -- GitLab From f8ad8187c3b536ee2b10502a8340c014204a1af0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Jan 2021 10:16:15 +0100 Subject: [PATCH 1431/2012] fs/pipe: allow sendfile() to pipe again After commit 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops") sendfile() could no longer send data from a real file to a pipe, breaking for example certain cgit setups (e.g. when running behind fcgiwrap), because in this case cgit will try to do exactly this: sendfile() to a pipe. Fix this by using iter_file_splice_write for the splice_write method of pipes, as suggested by Christoph. Cc: stable@vger.kernel.org Fixes: 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops") Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Tested-by: Johannes Berg Signed-off-by: Johannes Berg Signed-off-by: Linus Torvalds --- fs/pipe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/pipe.c b/fs/pipe.c index c5989cfd564d4..39c96845a72fb 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1206,6 +1206,7 @@ const struct file_operations pipefifo_fops = { .unlocked_ioctl = pipe_ioctl, .release = pipe_release, .fasync = pipe_fasync, + .splice_write = iter_file_splice_write, }; /* -- GitLab From ba6dfce47c4d002d96cd02a304132fca76981172 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Thu, 21 Jan 2021 16:17:23 -0500 Subject: [PATCH 1432/2012] SUNRPC: Move simple_get_bytes and simple_get_netobj into private header Remove duplicated helper functions to parse opaque XDR objects and place inside new file net/sunrpc/auth_gss/auth_gss_internal.h. In the new file carry the license and copyright from the source file net/sunrpc/auth_gss/auth_gss.c. Finally, update the comment inside include/linux/sunrpc/xdr.h since lockd is not the only user of struct xdr_netobj. Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 3 +- net/sunrpc/auth_gss/auth_gss.c | 30 +----------------- net/sunrpc/auth_gss/auth_gss_internal.h | 42 +++++++++++++++++++++++++ net/sunrpc/auth_gss/gss_krb5_mech.c | 31 ++---------------- 4 files changed, 46 insertions(+), 60 deletions(-) create mode 100644 net/sunrpc/auth_gss/auth_gss_internal.h diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 19b6dea27367b..b26213ae8c1ab 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -25,8 +25,7 @@ struct rpc_rqst; #define XDR_QUADLEN(l) (((l) + 3) >> 2) /* - * Generic opaque `network object.' At the kernel level, this type - * is used only by lockd. + * Generic opaque `network object.' */ #define XDR_MAX_NETOBJ 1024 struct xdr_netobj { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 4ecc2a9595674..5f42aa5fc6128 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -29,6 +29,7 @@ #include #include +#include "auth_gss_internal.h" #include "../netns.h" #include @@ -125,35 +126,6 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); } -static const void * -simple_get_bytes(const void *p, const void *end, void *res, size_t len) -{ - const void *q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - memcpy(res, p, len); - return q; -} - -static inline const void * -simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) -{ - const void *q; - unsigned int len; - - p = simple_get_bytes(p, end, &len, sizeof(len)); - if (IS_ERR(p)) - return p; - q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - dest->data = kmemdup(p, len, GFP_NOFS); - if (unlikely(dest->data == NULL)) - return ERR_PTR(-ENOMEM); - dest->len = len; - return q; -} - static struct gss_cl_ctx * gss_cred_get_ctx(struct rpc_cred *cred) { diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h new file mode 100644 index 0000000000000..c5603242b54bf --- /dev/null +++ b/net/sunrpc/auth_gss/auth_gss_internal.h @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* + * linux/net/sunrpc/auth_gss/auth_gss_internal.h + * + * Internal definitions for RPCSEC_GSS client authentication + * + * Copyright (c) 2000 The Regents of the University of Michigan. + * All rights reserved. + * + */ +#include +#include +#include + +static inline const void * +simple_get_bytes(const void *p, const void *end, void *res, size_t len) +{ + const void *q = (const void *)((const char *)p + len); + if (unlikely(q > end || q < p)) + return ERR_PTR(-EFAULT); + memcpy(res, p, len); + return q; +} + +static inline const void * +simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) +{ + const void *q; + unsigned int len; + + p = simple_get_bytes(p, end, &len, sizeof(len)); + if (IS_ERR(p)) + return p; + q = (const void *)((const char *)p + len); + if (unlikely(q > end || q < p)) + return ERR_PTR(-EFAULT); + dest->data = kmemdup(p, len, GFP_NOFS); + if (unlikely(dest->data == NULL)) + return ERR_PTR(-ENOMEM); + dest->len = len; + return q; +} diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index ae9acf3a73898..1c092b05c2bba 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -21,6 +21,8 @@ #include #include +#include "auth_gss_internal.h" + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif @@ -143,35 +145,6 @@ get_gss_krb5_enctype(int etype) return NULL; } -static const void * -simple_get_bytes(const void *p, const void *end, void *res, int len) -{ - const void *q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - memcpy(res, p, len); - return q; -} - -static const void * -simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) -{ - const void *q; - unsigned int len; - - p = simple_get_bytes(p, end, &len, sizeof(len)); - if (IS_ERR(p)) - return p; - q = (const void *)((const char *)p + len); - if (unlikely(q > end || q < p)) - return ERR_PTR(-EFAULT); - res->data = kmemdup(p, len, GFP_NOFS); - if (unlikely(res->data == NULL)) - return ERR_PTR(-ENOMEM); - res->len = len; - return q; -} - static inline const void * get_key(const void *p, const void *end, struct krb5_ctx *ctx, struct crypto_sync_skcipher **res) -- GitLab From e4a7d1f7707eb44fd953a31dd59eff82009d879c Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Thu, 21 Jan 2021 16:17:24 -0500 Subject: [PATCH 1433/2012] SUNRPC: Handle 0 length opaque XDR object data properly When handling an auth_gss downcall, it's possible to get 0-length opaque object for the acceptor. In the case of a 0-length XDR object, make sure simple_get_netobj() fills in dest->data = NULL, and does not continue to kmemdup() which will set dest->data = ZERO_SIZE_PTR for the acceptor. The trace event code can handle NULL but not ZERO_SIZE_PTR for a string, and so without this patch the rpcgss_context trace event will crash the kernel as follows: [ 162.887992] BUG: kernel NULL pointer dereference, address: 0000000000000010 [ 162.898693] #PF: supervisor read access in kernel mode [ 162.900830] #PF: error_code(0x0000) - not-present page [ 162.902940] PGD 0 P4D 0 [ 162.904027] Oops: 0000 [#1] SMP PTI [ 162.905493] CPU: 4 PID: 4321 Comm: rpc.gssd Kdump: loaded Not tainted 5.10.0 #133 [ 162.908548] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [ 162.910978] RIP: 0010:strlen+0x0/0x20 [ 162.912505] Code: 48 89 f9 74 09 48 83 c1 01 80 39 00 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee c3 0f 1f 80 00 00 00 00 <80> 3f 00 74 10 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 31 [ 162.920101] RSP: 0018:ffffaec900c77d90 EFLAGS: 00010202 [ 162.922263] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00000000fffde697 [ 162.925158] RDX: 000000000000002f RSI: 0000000000000080 RDI: 0000000000000010 [ 162.928073] RBP: 0000000000000010 R08: 0000000000000e10 R09: 0000000000000000 [ 162.930976] R10: ffff8e698a590cb8 R11: 0000000000000001 R12: 0000000000000e10 [ 162.933883] R13: 00000000fffde697 R14: 000000010034d517 R15: 0000000000070028 [ 162.936777] FS: 00007f1e1eb93700(0000) GS:ffff8e6ab7d00000(0000) knlGS:0000000000000000 [ 162.940067] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 162.942417] CR2: 0000000000000010 CR3: 0000000104eba000 CR4: 00000000000406e0 [ 162.945300] Call Trace: [ 162.946428] trace_event_raw_event_rpcgss_context+0x84/0x140 [auth_rpcgss] [ 162.949308] ? __kmalloc_track_caller+0x35/0x5a0 [ 162.951224] ? gss_pipe_downcall+0x3a3/0x6a0 [auth_rpcgss] [ 162.953484] gss_pipe_downcall+0x585/0x6a0 [auth_rpcgss] [ 162.955953] rpc_pipe_write+0x58/0x70 [sunrpc] [ 162.957849] vfs_write+0xcb/0x2c0 [ 162.959264] ksys_write+0x68/0xe0 [ 162.960706] do_syscall_64+0x33/0x40 [ 162.962238] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 162.964346] RIP: 0033:0x7f1e1f1e57df Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss_internal.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h index c5603242b54bf..f6d9631bd9d00 100644 --- a/net/sunrpc/auth_gss/auth_gss_internal.h +++ b/net/sunrpc/auth_gss/auth_gss_internal.h @@ -34,9 +34,12 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) q = (const void *)((const char *)p + len); if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); - dest->data = kmemdup(p, len, GFP_NOFS); - if (unlikely(dest->data == NULL)) - return ERR_PTR(-ENOMEM); + if (len) { + dest->data = kmemdup(p, len, GFP_NOFS); + if (unlikely(dest->data == NULL)) + return ERR_PTR(-ENOMEM); + } else + dest->data = NULL; dest->len = len; return q; } -- GitLab From 83ace77f51175023c3757e2d08a92565f9b1c7f3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Jan 2021 16:30:03 +0100 Subject: [PATCH 1434/2012] netfilter: ctnetlink: remove get_ct indirection Use nf_ct_get() directly, its a small inline helper without dependencies. Add CONFIG_NF_CONNTRACK guards to elide the relevant part when conntrack isn't available at all. v2: add ifdef guard around nf_ct_get call (kernel test robot) Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 -- net/netfilter/nf_conntrack_netlink.c | 7 ------- net/netfilter/nfnetlink_log.c | 8 +++++++- net/netfilter/nfnetlink_queue.c | 10 ++++++++-- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 0101747de5493..f0f3a8354c3ce 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -463,8 +463,6 @@ extern struct nf_ct_hook __rcu *nf_ct_hook; struct nlattr; struct nfnl_ct_hook { - struct nf_conn *(*get_ct)(const struct sk_buff *skb, - enum ip_conntrack_info *ctinfo); size_t (*build_size)(const struct nf_conn *ct); int (*build)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 84caf3316946d..1469365bac7e4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2686,12 +2686,6 @@ ctnetlink_glue_build_size(const struct nf_conn *ct) ; } -static struct nf_conn *ctnetlink_glue_get_ct(const struct sk_buff *skb, - enum ip_conntrack_info *ctinfo) -{ - return nf_ct_get(skb, ctinfo); -} - static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) { const struct nf_conntrack_zone *zone; @@ -2925,7 +2919,6 @@ static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct, } static struct nfnl_ct_hook ctnetlink_glue_hook = { - .get_ct = ctnetlink_glue_get_ct, .build_size = ctnetlink_glue_build_size, .build = ctnetlink_glue_build, .parse = ctnetlink_glue_parse, diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b35e8d9a5b37e..26776b88a539f 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -43,6 +43,10 @@ #include "../bridge/br_private.h" #endif +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include +#endif + #define NFULNL_COPY_DISABLED 0xff #define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE #define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ @@ -733,14 +737,16 @@ nfulnl_log_packet(struct net *net, size += nla_total_size(sizeof(u_int32_t)); if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) size += nla_total_size(sizeof(u_int32_t)); +#if IS_ENABLED(CONFIG_NF_CONNTRACK) if (inst->flags & NFULNL_CFG_F_CONNTRACK) { nfnl_ct = rcu_dereference(nfnl_ct_hook); if (nfnl_ct != NULL) { - ct = nfnl_ct->get_ct(skb, &ctinfo); + ct = nf_ct_get(skb, &ctinfo); if (ct != NULL) size += nfnl_ct->build_size(ct); } } +#endif if (pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE) size += nfulnl_get_bridge_size(skb); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index d1d8bca03b4f0..48a07914fd942 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -444,13 +444,15 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, nfnl_ct = rcu_dereference(nfnl_ct_hook); +#if IS_ENABLED(CONFIG_NF_CONNTRACK) if (queue->flags & NFQA_CFG_F_CONNTRACK) { if (nfnl_ct != NULL) { - ct = nfnl_ct->get_ct(entskb, &ctinfo); + ct = nf_ct_get(entskb, &ctinfo); if (ct != NULL) size += nfnl_ct->build_size(ct); } } +#endif if (queue->flags & NFQA_CFG_F_UID_GID) { size += (nla_total_size(sizeof(u_int32_t)) /* uid */ @@ -1104,9 +1106,10 @@ static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct, struct nf_queue_entry *entry, enum ip_conntrack_info *ctinfo) { +#if IS_ENABLED(CONFIG_NF_CONNTRACK) struct nf_conn *ct; - ct = nfnl_ct->get_ct(entry->skb, ctinfo); + ct = nf_ct_get(entry->skb, ctinfo); if (ct == NULL) return NULL; @@ -1118,6 +1121,9 @@ static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct, NETLINK_CB(entry->skb).portid, nlmsg_report(nlh)); return ct; +#else + return NULL; +#endif } static int nfqa_parse_bridge(struct nf_queue_entry *entry, -- GitLab From 453b674178327950e8517172c82107c43af222e4 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 15 Jan 2021 21:31:24 +0200 Subject: [PATCH 1435/2012] dt-bindings: usb: j721e: add ranges and dma-coherent props Add missed 'ranges' and 'dma-coherent' properties as cdns-usb DT nodes has child node and DMA IO is coherent on TI K3 J721E/J7200 SoCs. This also fixes dtbs_check warning: cdns-usb@4104000: 'dma-coherent', 'ranges' do not match any of the regexes: '^usb@', 'pinctrl-[0-9]+' Signed-off-by: Grygorii Strashko Acked-by: Aswath Govindraju Reviewed-by: Aswath Govindraju Link: https://lore.kernel.org/r/20210115193124.5706-1-grygorii.strashko@ti.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml index 388245b91a55b..64b0b92aa0502 100644 --- a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml +++ b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml @@ -17,6 +17,8 @@ properties: reg: description: module registers + ranges: true + power-domains: description: PM domain provider node and an args specifier containing @@ -58,6 +60,8 @@ properties: '#size-cells': const: 2 + dma-coherent: true + patternProperties: "^usb@": type: object -- GitLab From 07d46d93c9acdfe0614071d73c415dd5f745cc6e Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Thu, 21 Jan 2021 23:00:44 +0100 Subject: [PATCH 1436/2012] uapi: fix big endian definition of ipv6_rpl_sr_hdr Following RFC 6554 [1], the current order of fields is wrong for big endian definition. Indeed, here is how the header looks like: +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Next Header | Hdr Ext Len | Routing Type | Segments Left | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | CmprI | CmprE | Pad | Reserved | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ This patch reorders fields so that big endian definition is now correct. [1] https://tools.ietf.org/html/rfc6554#section-3 Fixes: cfa933d938d8 ("include: uapi: linux: add rpl sr header definition") Signed-off-by: Justin Iurman Signed-off-by: Jakub Kicinski --- include/uapi/linux/rpl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/rpl.h b/include/uapi/linux/rpl.h index 1dccb55cf8c64..708adddf9f138 100644 --- a/include/uapi/linux/rpl.h +++ b/include/uapi/linux/rpl.h @@ -28,10 +28,10 @@ struct ipv6_rpl_sr_hdr { pad:4, reserved1:16; #elif defined(__BIG_ENDIAN_BITFIELD) - __u32 reserved:20, + __u32 cmpri:4, + cmpre:4, pad:4, - cmpri:4, - cmpre:4; + reserved:20; #else #error "Please fix " #endif -- GitLab From 24f97b6af9a000bfda9ee693110189d7d4d629fe Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 22 Jan 2021 13:08:22 +0100 Subject: [PATCH 1437/2012] tg3: improve PCI VPD access When working on the PCI VPD code I also tested with a Broadcom BCM95719 card. tg3 uses internal NVRAM access with this card, so I forced it to PCI VPD mode for testing. PCI VPD access fails (i + PCI_VPD_LRDT_TAG_SIZE + j > len) because only TG3_NVM_VPD_LEN (256) bytes are read, but PCI VPD has 400 bytes on this card. So add a constant TG3_NVM_PCI_VPD_MAX_LEN that defines the maximum PCI VPD size. The actual VPD size is returned by pci_read_vpd(). In addition it's not worth looping over pci_read_vpd(). If we miss the 125ms timeout per VPD dword read then definitely something is wrong, and if the tg3 module loading is killed then there's also not much benefit in retrying the VPD read. Signed-off-by: Heiner Kallweit Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/cb9e9113-0861-3904-87e0-d4c4ab3c8860@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/tg3.c | 30 +++++++++++------------------ drivers/net/ethernet/broadcom/tg3.h | 1 + 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 5143cdd0eecad..8936c2bc62867 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12826,11 +12826,13 @@ static __be32 *tg3_vpd_readblock(struct tg3 *tp, u32 *vpdlen) offset = tg3_nvram_logical_addr(tp, offset); } - } - if (!offset || !len) { - offset = TG3_NVM_VPD_OFF; - len = TG3_NVM_VPD_LEN; + if (!offset || !len) { + offset = TG3_NVM_VPD_OFF; + len = TG3_NVM_VPD_LEN; + } + } else { + len = TG3_NVM_PCI_VPD_MAX_LEN; } buf = kmalloc(len, GFP_KERNEL); @@ -12846,26 +12848,16 @@ static __be32 *tg3_vpd_readblock(struct tg3 *tp, u32 *vpdlen) if (tg3_nvram_read_be32(tp, offset + i, &buf[i/4])) goto error; } + *vpdlen = len; } else { - u8 *ptr; ssize_t cnt; - unsigned int pos = 0; - - ptr = (u8 *)&buf[0]; - for (i = 0; pos < len && i < 3; i++, pos += cnt, ptr += cnt) { - cnt = pci_read_vpd(tp->pdev, pos, - len - pos, ptr); - if (cnt == -ETIMEDOUT || cnt == -EINTR) - cnt = 0; - else if (cnt < 0) - goto error; - } - if (pos != len) + + cnt = pci_read_vpd(tp->pdev, 0, len, (u8 *)buf); + if (cnt < 0) goto error; + *vpdlen = cnt; } - *vpdlen = len; - return buf; error: diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index 1000c894064f0..46ec4fdfd16aa 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -2101,6 +2101,7 @@ /* Hardware Legacy NVRAM layout */ #define TG3_NVM_VPD_OFF 0x100 #define TG3_NVM_VPD_LEN 256 +#define TG3_NVM_PCI_VPD_MAX_LEN 512 /* Hardware Selfboot NVRAM layout */ #define TG3_NVM_HWSB_CFG1 0x00000004 -- GitLab From a10f373ad3c760dd40b41e2f69a800ee7b8da15e Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Fri, 8 Jan 2021 16:53:49 +0000 Subject: [PATCH 1438/2012] KVM: Documentation: Fix spec for KVM_CAP_ENABLE_CAP_VM The documentation classifies KVM_ENABLE_CAP with KVM_CAP_ENABLE_CAP_VM as a vcpu ioctl, which is incorrect. Fix it by specifying it as a VM ioctl. Fixes: e5d83c74a580 ("kvm: make KVM_CAP_ENABLE_CAP_VM architecture agnostic") Signed-off-by: Quentin Perret Message-Id: <20210108165349.747359-1-qperret@google.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index a9bf7f2ab76f5..40b943a9bd1da 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1336,7 +1336,7 @@ documentation when it pops into existence). :Capability: KVM_CAP_ENABLE_CAP_VM :Architectures: all -:Type: vcpu ioctl +:Type: vm ioctl :Parameters: struct kvm_enable_cap (in) :Returns: 0 on success; -1 on error -- GitLab From eb79cd00ce25974c21f34f1eeb92a580ff572971 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 13 Jan 2021 12:45:15 -0800 Subject: [PATCH 1439/2012] KVM: x86: Add more protection against undefined behavior in rsvd_bits() Add compile-time asserts in rsvd_bits() to guard against KVM passing in garbage hardcoded values, and cap the upper bound at '63' for dynamic values to prevent generating a mask that would overflow a u64. Suggested-by: Paolo Bonzini Signed-off-by: Sean Christopherson Message-Id: <20210113204515.3473079-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 581925e476d6c..261be1d2032ba 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -44,8 +44,15 @@ #define PT32_ROOT_LEVEL 2 #define PT32E_ROOT_LEVEL 3 -static inline u64 rsvd_bits(int s, int e) +static __always_inline u64 rsvd_bits(int s, int e) { + BUILD_BUG_ON(__builtin_constant_p(e) && __builtin_constant_p(s) && e < s); + + if (__builtin_constant_p(e)) + BUILD_BUG_ON(e > 63); + else + e &= 63; + if (e < s) return 0; -- GitLab From e61ab2a320c3dfd6209efe18a575979e07470597 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Mon, 18 Jan 2021 10:58:00 +0800 Subject: [PATCH 1440/2012] KVM: x86/pmu: Fix UBSAN shift-out-of-bounds warning in intel_pmu_refresh() Since we know vPMU will not work properly when (1) the guest bit_width(s) of the [gp|fixed] counters are greater than the host ones, or (2) guest requested architectural events exceeds the range supported by the host, so we can setup a smaller left shift value and refresh the guest cpuid entry, thus fixing the following UBSAN shift-out-of-bounds warning: shift exponent 197 is too large for 64-bit type 'long long unsigned int' Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x107/0x163 lib/dump_stack.c:120 ubsan_epilogue+0xb/0x5a lib/ubsan.c:148 __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:395 intel_pmu_refresh.cold+0x75/0x99 arch/x86/kvm/vmx/pmu_intel.c:348 kvm_vcpu_after_set_cpuid+0x65a/0xf80 arch/x86/kvm/cpuid.c:177 kvm_vcpu_ioctl_set_cpuid2+0x160/0x440 arch/x86/kvm/cpuid.c:308 kvm_arch_vcpu_ioctl+0x11b6/0x2d70 arch/x86/kvm/x86.c:4709 kvm_vcpu_ioctl+0x7b9/0xdb0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3386 vfs_ioctl fs/ioctl.c:48 [inline] __do_sys_ioctl fs/ioctl.c:753 [inline] __se_sys_ioctl fs/ioctl.c:739 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:739 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported-by: syzbot+ae488dc136a4cc6ba32b@syzkaller.appspotmail.com Signed-off-by: Like Xu Message-Id: <20210118025800.34620-1-like.xu@linux.intel.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/pmu_intel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index a886a47daebda..d1584ae6625a1 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -345,7 +345,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters, x86_pmu.num_counters_gp); + eax.split.bit_width = min_t(int, eax.split.bit_width, x86_pmu.bit_width_gp); pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1; + eax.split.mask_length = min_t(int, eax.split.mask_length, x86_pmu.events_mask_len); pmu->available_event_types = ~entry->ebx & ((1ull << eax.split.mask_length) - 1); @@ -355,6 +357,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed, x86_pmu.num_counters_fixed); + edx.split.bit_width_fixed = min_t(int, + edx.split.bit_width_fixed, x86_pmu.bit_width_fixed); pmu->counter_bitmask[KVM_PMC_FIXED] = ((u64)1 << edx.split.bit_width_fixed) - 1; } -- GitLab From 98dd2f108e448988d91e296173e773b06fb978b8 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Wed, 30 Dec 2020 16:19:16 +0800 Subject: [PATCH 1441/2012] KVM: x86/pmu: Fix HW_REF_CPU_CYCLES event pseudo-encoding in intel_arch_events[] The HW_REF_CPU_CYCLES event on the fixed counter 2 is pseudo-encoded as 0x0300 in the intel_perfmon_event_map[]. Correct its usage. Fixes: 62079d8a4312 ("KVM: PMU: add proper support for fixed counter 2") Signed-off-by: Like Xu Message-Id: <20201230081916.63417-1-like.xu@linux.intel.com> Reviewed-by: Sean Christopherson Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/pmu_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index d1584ae6625a1..cdf5f34518f43 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -29,7 +29,7 @@ static struct kvm_event_hw_type_mapping intel_arch_events[] = { [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, - [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES }, + [7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES }, }; /* mapping between fixed pmc index and intel_arch_events array */ -- GitLab From 1f7becf1b7e21794fc9d460765fe09679bc9b9e0 Mon Sep 17 00:00:00 2001 From: Jay Zhou Date: Mon, 18 Jan 2021 16:47:20 +0800 Subject: [PATCH 1442/2012] KVM: x86: get smi pending status correctly The injection process of smi has two steps: Qemu KVM Step1: cpu->interrupt_request &= \ ~CPU_INTERRUPT_SMI; kvm_vcpu_ioctl(cpu, KVM_SMI) call kvm_vcpu_ioctl_smi() and kvm_make_request(KVM_REQ_SMI, vcpu); Step2: kvm_vcpu_ioctl(cpu, KVM_RUN, 0) call process_smi() if kvm_check_request(KVM_REQ_SMI, vcpu) is true, mark vcpu->arch.smi_pending = true; The vcpu->arch.smi_pending will be set true in step2, unfortunately if vcpu paused between step1 and step2, the kvm_run->immediate_exit will be set and vcpu has to exit to Qemu immediately during step2 before mark vcpu->arch.smi_pending true. During VM migration, Qemu will get the smi pending status from KVM using KVM_GET_VCPU_EVENTS ioctl at the downtime, then the smi pending status will be lost. Signed-off-by: Jay Zhou Signed-off-by: Shengen Zhuang Message-Id: <20210118084720.1585-1-jianjay.zhou@huawei.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9a8969a6dd063..9025c7673af61 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -105,6 +105,7 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); +static void process_smi(struct kvm_vcpu *vcpu); static void enter_smm(struct kvm_vcpu *vcpu); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); static void store_regs(struct kvm_vcpu *vcpu); @@ -4230,6 +4231,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, { process_nmi(vcpu); + if (kvm_check_request(KVM_REQ_SMI, vcpu)) + process_smi(vcpu); + /* * In guest mode, payload delivery should be deferred, * so that the L1 hypervisor can intercept #PF before -- GitLab From 01ead84ccd23afadebe66aea0eda002ac29ca9be Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 8 Dec 2020 12:34:39 +0800 Subject: [PATCH 1443/2012] KVM: Documentation: Update description of KVM_{GET,CLEAR}_DIRTY_LOG Update various words, including the wrong parameter name and the vague description of the usage of "slot" field. Signed-off-by: Zenghui Yu Message-Id: <20201208043439.895-1-yuzenghui@huawei.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 40b943a9bd1da..99ceb978c8b08 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -360,10 +360,9 @@ since the last call to this ioctl. Bit 0 is the first page in the memory slot. Ensure the entire structure is cleared to avoid padding issues. -If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies -the address space for which you want to return the dirty bitmap. -They must be less than the value that KVM_CHECK_EXTENSION returns for -the KVM_CAP_MULTI_ADDRESS_SPACE capability. +If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of slot field specifies +the address space for which you want to return the dirty bitmap. See +KVM_SET_USER_MEMORY_REGION for details on the usage of slot field. The bits in the dirty bitmap are cleared before the ioctl returns, unless KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled. For more information, @@ -4435,7 +4434,7 @@ to I/O ports. :Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 :Architectures: x86, arm, arm64, mips :Type: vm ioctl -:Parameters: struct kvm_dirty_log (in) +:Parameters: struct kvm_clear_dirty_log (in) :Returns: 0 on success, -1 on error :: @@ -4462,10 +4461,9 @@ in KVM's dirty bitmap, and dirty tracking is re-enabled for that page (for example via write-protection, or by clearing the dirty bit in a page table entry). -If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies -the address space for which you want to return the dirty bitmap. -They must be less than the value that KVM_CHECK_EXTENSION returns for -the KVM_CAP_MULTI_ADDRESS_SPACE capability. +If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of slot field specifies +the address space for which you want to clear the dirty status. See +KVM_SET_USER_MEMORY_REGION for details on the usage of slot field. This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled; for more information, see the description of the capability. -- GitLab From d95df9510679757bdfc22376d351cdf367b3a604 Mon Sep 17 00:00:00 2001 From: Lorenzo Brescia Date: Wed, 23 Dec 2020 14:45:07 +0000 Subject: [PATCH 1444/2012] kvm: tracing: Fix unmatched kvm_entry and kvm_exit events On VMX, if we exit and then re-enter immediately without leaving the vmx_vcpu_run() function, the kvm_entry event is not logged. That means we will see one (or more) kvm_exit, without its (their) corresponding kvm_entry, as shown here: CPU-1979 [002] 89.871187: kvm_entry: vcpu 1 CPU-1979 [002] 89.871218: kvm_exit: reason MSR_WRITE CPU-1979 [002] 89.871259: kvm_exit: reason MSR_WRITE It also seems possible for a kvm_entry event to be logged, but then we leave vmx_vcpu_run() right away (if vmx->emulation_required is true). In this case, we will have a spurious kvm_entry event in the trace. Fix these situations by moving trace_kvm_entry() inside vmx_vcpu_run() (where trace_kvm_exit() already is). A trace obtained with this patch applied looks like this: CPU-14295 [000] 8388.395387: kvm_entry: vcpu 0 CPU-14295 [000] 8388.395392: kvm_exit: reason MSR_WRITE CPU-14295 [000] 8388.395393: kvm_entry: vcpu 0 CPU-14295 [000] 8388.395503: kvm_exit: reason EXTERNAL_INTERRUPT Of course, not calling trace_kvm_entry() in common x86 code any longer means that we need to adjust the SVM side of things too. Signed-off-by: Lorenzo Brescia Signed-off-by: Dario Faggioli Message-Id: <160873470698.11652.13483635328769030605.stgit@Wayrath> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 2 ++ arch/x86/kvm/x86.c | 3 +-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7ef171790d02b..f923e14e87df2 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3739,6 +3739,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + trace_kvm_entry(vcpu); + svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 2af05d3b05909..cc60b1fc3ee71 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6653,6 +6653,8 @@ reenter_guest: if (vmx->emulation_required) return EXIT_FASTPATH_NONE; + trace_kvm_entry(vcpu); + if (vmx->ple_window_dirty) { vmx->ple_window_dirty = false; vmcs_write32(PLE_WINDOW, vmx->ple_window); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9025c7673af61..1f64e8b97605d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8992,8 +8992,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops.request_immediate_exit(vcpu); } - trace_kvm_entry(vcpu); - fpregs_assert_state_consistent(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) switch_fpu_return(); @@ -11560,6 +11558,7 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, } EXPORT_SYMBOL_GPL(kvm_sev_es_string_io); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); -- GitLab From d51e1d3f6b4236e0352407d8a63f5c5f71ce193d Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 14 Jan 2021 22:54:47 +0200 Subject: [PATCH 1445/2012] KVM: nVMX: Sync unsync'd vmcs02 state to vmcs12 on migration Even when we are outside the nested guest, some vmcs02 fields may not be in sync vs vmcs12. This is intentional, even across nested VM-exit, because the sync can be delayed until the nested hypervisor performs a VMCLEAR or a VMREAD/VMWRITE that affects those rarely accessed fields. However, during KVM_GET_NESTED_STATE, the vmcs12 has to be up to date to be able to restore it. To fix that, call copy_vmcs02_to_vmcs12_rare() before the vmcs12 contents are copied to userspace. Fixes: 7952d769c29ca ("KVM: nVMX: Sync rarely accessed guest fields only when needed") Reviewed-by: Sean Christopherson Signed-off-by: Maxim Levitsky Message-Id: <20210114205449.8715-2-mlevitsk@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 0fbb46990dfce..776688f9d1017 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6077,11 +6077,14 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, if (is_guest_mode(vcpu)) { sync_vmcs02_to_vmcs12(vcpu, vmcs12); sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); - } else if (!vmx->nested.need_vmcs12_to_shadow_sync) { - if (vmx->nested.hv_evmcs) - copy_enlightened_to_vmcs12(vmx); - else if (enable_shadow_vmcs) - copy_shadow_to_vmcs12(vmx); + } else { + copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu)); + if (!vmx->nested.need_vmcs12_to_shadow_sync) { + if (vmx->nested.hv_evmcs) + copy_enlightened_to_vmcs12(vmx); + else if (enable_shadow_vmcs) + copy_shadow_to_vmcs12(vmx); + } } BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE); -- GitLab From 250091409a4ac567581c1f929eb39139b57b56ec Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 22 Jan 2021 15:50:47 -0800 Subject: [PATCH 1446/2012] KVM: SVM: Unconditionally sync GPRs to GHCB on VMRUN of SEV-ES guest Drop the per-GPR dirty checks when synchronizing GPRs to the GHCB, the GRPs' dirty bits are set from time zero and never cleared, i.e. will always be seen as dirty. The obvious alternative would be to clear the dirty bits when appropriate, but removing the dirty checks is desirable as it allows reverting GPR dirty+available tracking, which adds overhead to all flavors of x86 VMs. Note, unconditionally writing the GPRs in the GHCB is tacitly allowed by the GHCB spec, which allows the hypervisor (or guest) to provide unnecessary info; it's the guest's responsibility to consume only what it needs (the hypervisor is untrusted after all). The guest and hypervisor can supply additional state if desired but must not rely on that additional state being provided. Cc: Brijesh Singh Cc: Tom Lendacky Fixes: 291bd20d5d88 ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT") Signed-off-by: Sean Christopherson Message-Id: <20210122235049.3107620-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c8ffdbc81709e..ac652bc476ae7 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1415,16 +1415,13 @@ static void sev_es_sync_to_ghcb(struct vcpu_svm *svm) * to be returned: * GPRs RAX, RBX, RCX, RDX * - * Copy their values to the GHCB if they are dirty. + * Copy their values, even if they may not have been written during the + * VM-Exit. It's the guest's responsibility to not consume random data. */ - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX)) - ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]); - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX)) - ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]); - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX)) - ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]); - if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX)) - ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]); + ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]); + ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]); + ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]); + ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]); } static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) -- GitLab From aed89418de9a881419516fa0a5643577f521efc9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 22 Jan 2021 15:50:48 -0800 Subject: [PATCH 1447/2012] KVM: x86: Revert "KVM: x86: Mark GPRs dirty when written" Revert the dirty/available tracking of GPRs now that KVM copies the GPRs to the GHCB on any post-VMGEXIT VMRUN, even if a GPR is not dirty. Per commit de3cd117ed2f ("KVM: x86: Omit caching logic for always-available GPRs"), tracking for GPRs noticeably impacts KVM's code footprint. This reverts commit 1c04d8c986567c27c56c05205dceadc92efb14ff. Signed-off-by: Sean Christopherson Message-Id: <20210122235049.3107620-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/kvm_cache_regs.h | 51 +++++++++++++++++------------------ 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index f15bc16de07c3..a889563ad02d5 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -9,31 +9,6 @@ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE) -static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu, - enum kvm_reg reg) -{ - return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); -} - -static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu, - enum kvm_reg reg) -{ - return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); -} - -static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu, - enum kvm_reg reg) -{ - __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); -} - -static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu, - enum kvm_reg reg) -{ - __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); - __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); -} - #define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\ { \ @@ -43,7 +18,6 @@ static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \ unsigned long val) \ { \ vcpu->arch.regs[VCPU_REGS_##uname] = val; \ - kvm_register_mark_dirty(vcpu, VCPU_REGS_##uname); \ } BUILD_KVM_GPR_ACCESSORS(rax, RAX) BUILD_KVM_GPR_ACCESSORS(rbx, RBX) @@ -63,6 +37,31 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14) BUILD_KVM_GPR_ACCESSORS(r15, R15) #endif +static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); +} + +static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); +} + +static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); +} + +static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); + __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); +} + static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg) { if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS)) -- GitLab From 9a78e15802a87de2b08dfd1bd88e855201d2c8fa Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 8 Jan 2021 11:43:08 -0500 Subject: [PATCH 1448/2012] KVM: x86: allow KVM_REQ_GET_NESTED_STATE_PAGES outside guest mode for VMX VMX also uses KVM_REQ_GET_NESTED_STATE_PAGES for the Hyper-V eVMCS, which may need to be loaded outside guest mode. Therefore we cannot WARN in that case. However, that part of nested_get_vmcs12_pages is _not_ needed at vmentry time. Split it out of KVM_REQ_GET_NESTED_STATE_PAGES handling, so that both vmentry and migration (and in the latter case, independent of is_guest_mode) do the parts that are needed. Cc: # 5.10.x: f2c7ef3ba: KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES Cc: # 5.10.x Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 3 +++ arch/x86/kvm/vmx/nested.c | 31 +++++++++++++++++++++++++------ arch/x86/kvm/x86.c | 4 +--- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index cb4c6ee10029c..7a605ad8254db 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -200,6 +200,9 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + if (WARN_ON(!is_guest_mode(vcpu))) + return true; + if (!nested_svm_vmrun_msrpm(svm)) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 776688f9d1017..f2b9bfb582067 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3124,13 +3124,9 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) return 0; } -static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) +static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_host_map *map; - struct page *page; - u64 hpa; /* * hv_evmcs may end up being not mapped after migration (when @@ -3153,6 +3149,17 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) } } + return true; +} + +static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_host_map *map; + struct page *page; + u64 hpa; + if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { /* * Translate L1 physical address to host physical @@ -3221,6 +3228,18 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS); else exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS); + + return true; +} + +static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu) +{ + if (!nested_get_evmcs_page(vcpu)) + return false; + + if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu)) + return false; + return true; } @@ -6605,7 +6624,7 @@ struct kvm_x86_nested_ops vmx_nested_ops = { .hv_timer_pending = nested_vmx_preemption_timer_pending, .get_state = vmx_get_nested_state, .set_state = vmx_set_nested_state, - .get_nested_state_pages = nested_get_vmcs12_pages, + .get_nested_state_pages = vmx_get_nested_state_pages, .write_log_dirty = nested_vmx_write_pml_buffer, .enable_evmcs = nested_enable_evmcs, .get_evmcs_version = nested_get_evmcs_version, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1f64e8b97605d..76bce832cade2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8806,9 +8806,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { - if (WARN_ON_ONCE(!is_guest_mode(vcpu))) - ; - else if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { + if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { r = 0; goto out; } -- GitLab From 012da53d1afb619556f1a63c9da76b15888b190f Mon Sep 17 00:00:00 2001 From: Darby Payne Date: Wed, 6 Jan 2021 11:02:42 -0800 Subject: [PATCH 1449/2012] ipvs: add weighted random twos choice algorithm Adds the random twos choice load-balancing algorithm. The algorithm will pick two random servers based on weights. Then select the server with the least amount of connections normalized by weight. The algorithm avoids the "herd behavior" problem. The algorithm comes from a paper by Michael Mitzenmacher available here http://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/twosurvey.pdf Signed-off-by: Darby Payne Acked-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/Kconfig | 11 +++ net/netfilter/ipvs/Makefile | 1 + net/netfilter/ipvs/ip_vs_twos.c | 139 ++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 net/netfilter/ipvs/ip_vs_twos.c diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index eb0e329f9b8d6..8ca542a759d41 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -271,6 +271,17 @@ config IP_VS_NQ If you want to compile it in kernel, say Y. To compile it as a module, choose M here. If unsure, say N. +config IP_VS_TWOS + tristate "weighted random twos choice least-connection scheduling" + help + The weighted random twos choice least-connection scheduling + algorithm picks two random real servers and directs network + connections to the server with the least active connections + normalized by the server weight. + + If you want to compile it in kernel, say Y. To compile it as a + module, choose M here. If unsure, say N. + comment 'IPVS SH scheduler' config IP_VS_SH_TAB_BITS diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile index bfce2677fda26..bb5d8125c82ab 100644 --- a/net/netfilter/ipvs/Makefile +++ b/net/netfilter/ipvs/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_IP_VS_SH) += ip_vs_sh.o obj-$(CONFIG_IP_VS_MH) += ip_vs_mh.o obj-$(CONFIG_IP_VS_SED) += ip_vs_sed.o obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o +obj-$(CONFIG_IP_VS_TWOS) += ip_vs_twos.o # IPVS application helpers obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o diff --git a/net/netfilter/ipvs/ip_vs_twos.c b/net/netfilter/ipvs/ip_vs_twos.c new file mode 100644 index 0000000000000..acb55d8393ef6 --- /dev/null +++ b/net/netfilter/ipvs/ip_vs_twos.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* IPVS: Power of Twos Choice Scheduling module + * + * Authors: Darby Payne + */ + +#define KMSG_COMPONENT "IPVS" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include + +#include + +/* Power of Twos Choice scheduling, algorithm originally described by + * Michael Mitzenmacher. + * + * Randomly picks two destinations and picks the one with the least + * amount of connections + * + * The algorithm calculates a few variables + * - total_weight = sum of all weights + * - rweight1 = random number between [0,total_weight] + * - rweight2 = random number between [0,total_weight] + * + * For each destination + * decrement rweight1 and rweight2 by the destination weight + * pick choice1 when rweight1 is <= 0 + * pick choice2 when rweight2 is <= 0 + * + * Return choice2 if choice2 has less connections than choice 1 normalized + * by weight + * + * References + * ---------- + * + * [Mitzenmacher 2016] + * The Power of Two Random Choices: A Survey of Techniques and Results + * Michael Mitzenmacher, Andrea W. Richa y, Ramesh Sitaraman + * http://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/twosurvey.pdf + * + */ +static struct ip_vs_dest *ip_vs_twos_schedule(struct ip_vs_service *svc, + const struct sk_buff *skb, + struct ip_vs_iphdr *iph) +{ + struct ip_vs_dest *dest, *choice1 = NULL, *choice2 = NULL; + int rweight1, rweight2, weight1 = -1, weight2 = -1, overhead1 = 0; + int overhead2, total_weight = 0, weight; + + IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); + + /* Generate a random weight between [0,sum of all weights) */ + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { + if (!(dest->flags & IP_VS_DEST_F_OVERLOAD)) { + weight = atomic_read(&dest->weight); + if (weight > 0) { + total_weight += weight; + choice1 = dest; + } + } + } + + if (!choice1) { + ip_vs_scheduler_err(svc, "no destination available"); + return NULL; + } + + /* Add 1 to total_weight so that the random weights are inclusive + * from 0 to total_weight + */ + total_weight += 1; + rweight1 = prandom_u32() % total_weight; + rweight2 = prandom_u32() % total_weight; + + /* Pick two weighted servers */ + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { + if (dest->flags & IP_VS_DEST_F_OVERLOAD) + continue; + + weight = atomic_read(&dest->weight); + if (weight <= 0) + continue; + + rweight1 -= weight; + rweight2 -= weight; + + if (rweight1 <= 0 && weight1 == -1) { + choice1 = dest; + weight1 = weight; + overhead1 = ip_vs_dest_conn_overhead(dest); + } + + if (rweight2 <= 0 && weight2 == -1) { + choice2 = dest; + weight2 = weight; + overhead2 = ip_vs_dest_conn_overhead(dest); + } + + if (weight1 != -1 && weight2 != -1) + goto nextstage; + } + +nextstage: + if (choice2 && (weight2 * overhead1) > (weight1 * overhead2)) + choice1 = choice2; + + IP_VS_DBG_BUF(6, "twos: server %s:%u conns %d refcnt %d weight %d\n", + IP_VS_DBG_ADDR(choice1->af, &choice1->addr), + ntohs(choice1->port), atomic_read(&choice1->activeconns), + refcount_read(&choice1->refcnt), + atomic_read(&choice1->weight)); + + return choice1; +} + +static struct ip_vs_scheduler ip_vs_twos_scheduler = { + .name = "twos", + .refcnt = ATOMIC_INIT(0), + .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_twos_scheduler.n_list), + .schedule = ip_vs_twos_schedule, +}; + +static int __init ip_vs_twos_init(void) +{ + return register_ip_vs_scheduler(&ip_vs_twos_scheduler); +} + +static void __exit ip_vs_twos_cleanup(void) +{ + unregister_ip_vs_scheduler(&ip_vs_twos_scheduler); + synchronize_rcu(); +} + +module_init(ip_vs_twos_init); +module_exit(ip_vs_twos_cleanup); +MODULE_LICENSE("GPL"); -- GitLab From dbc859d96f1a90bafe9c3ba2e437aae5d5677318 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 25 Jan 2021 18:56:01 +0100 Subject: [PATCH 1450/2012] netfilter: flowtable: add hash offset field to tuple Add a placeholder field to calculate hash tuple offset. Similar to 2c407aca6497 ("netfilter: conntrack: avoid gcc-10 zero-length-bounds warning"). Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 4 ++++ net/netfilter/nf_flow_table_core.c | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 16e8b2f8d006a..54c4d5c908a52 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -107,6 +107,10 @@ struct flow_offload_tuple { u8 l3proto; u8 l4proto; + + /* All members above are keys for lookups, see flow_offload_hash(). */ + struct { } __hash; + u8 dir; u16 mtu; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 513f78db3cb2f..55fca71ace262 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -191,14 +191,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed) { const struct flow_offload_tuple *tuple = data; - return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed); + return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed); } static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) { const struct flow_offload_tuple_rhash *tuplehash = data; - return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed); + return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed); } static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, @@ -207,7 +207,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, const struct flow_offload_tuple *tuple = arg->key; const struct flow_offload_tuple_rhash *x = ptr; - if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir))) + if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash))) return 1; return 0; -- GitLab From 0b964446c63f9d7d7cd1809ee39277b4f73916b5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 19 Jan 2021 17:22:03 +0100 Subject: [PATCH 1451/2012] ecryptfs: fix uid translation for setxattr on security.capability Prior to commit 7c03e2cda4a5 ("vfs: move cap_convert_nscap() call into vfs_setxattr()") the translation of nscap->rootid did not take stacked filesystems (overlayfs and ecryptfs) into account. That patch fixed the overlay case, but made the ecryptfs case worse. Restore old the behavior for ecryptfs that existed before the overlayfs fix. This does not fix ecryptfs's handling of complex user namespace setups, but it does make sure existing setups don't regress. Reported-by: Eric W. Biederman Cc: Tyler Hicks Fixes: 7c03e2cda4a5 ("vfs: move cap_convert_nscap() call into vfs_setxattr()") Signed-off-by: Miklos Szeredi Signed-off-by: Tyler Hicks --- fs/ecryptfs/inode.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index e23752d9a79f3..58d0f71879979 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -1016,15 +1016,19 @@ ecryptfs_setxattr(struct dentry *dentry, struct inode *inode, { int rc; struct dentry *lower_dentry; + struct inode *lower_inode; lower_dentry = ecryptfs_dentry_to_lower(dentry); - if (!(d_inode(lower_dentry)->i_opflags & IOP_XATTR)) { + lower_inode = d_inode(lower_dentry); + if (!(lower_inode->i_opflags & IOP_XATTR)) { rc = -EOPNOTSUPP; goto out; } - rc = vfs_setxattr(lower_dentry, name, value, size, flags); + inode_lock(lower_inode); + rc = __vfs_setxattr_locked(lower_dentry, name, value, size, flags, NULL); + inode_unlock(lower_inode); if (!rc && inode) - fsstack_copy_attr_all(inode, d_inode(lower_dentry)); + fsstack_copy_attr_all(inode, lower_inode); out: return rc; } -- GitLab From b9b7421a01d82c474227fce04f0468f1c70be306 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sat, 23 Jan 2021 10:39:12 +0530 Subject: [PATCH 1452/2012] octeontx2-af: Support ESP/AH RSS hashing Support SPI and sequence number fields of ESP/AH header to be hashed for RSS. By default ESP/AH fields are not considered for RSS and needs to be set explicitly as below: ethtool -U eth0 rx-flow-hash esp4 sdfn or ethtool -U eth0 rx-flow-hash ah4 sdfn or ethtool -U eth0 rx-flow-hash esp6 sdfn or ethtool -U eth0 rx-flow-hash ah6 sdfn To disable hashing of ESP fields: ethtool -U eth0 rx-flow-hash esp4 sd or ethtool -U eth0 rx-flow-hash ah4 sd or ethtool -U eth0 rx-flow-hash esp6 sd or ethtool -U eth0 rx-flow-hash ah6 sd Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Kovvuri Goutham Link: https://lore.kernel.org/r/1611378552-13288-1-git-send-email-sundeep.lkml@gmail.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/mbox.h | 2 + .../ethernet/marvell/octeontx2/af/rvu_nix.c | 27 ++++++++++++++ .../marvell/octeontx2/nic/otx2_ethtool.c | 37 ++++++++++++++++++- 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index f919283ddc34d..89e93eb46ab73 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -717,6 +717,8 @@ struct nix_rss_flowkey_cfg { #define NIX_FLOW_KEY_TYPE_INNR_ETH_DMAC BIT(17) #define NIX_FLOW_KEY_TYPE_VLAN BIT(20) #define NIX_FLOW_KEY_TYPE_IPV4_PROTO BIT(21) +#define NIX_FLOW_KEY_TYPE_AH BIT(22) +#define NIX_FLOW_KEY_TYPE_ESP BIT(23) u32 flowkey_cfg; /* Flowkey types selected */ u8 group; /* RSS context or group */ }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index a8dfbb6d17746..b54753ef7d940 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -2580,6 +2580,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) struct nix_rx_flowkey_alg *field; struct nix_rx_flowkey_alg tmp; u32 key_type, valid_key; + int l4_key_offset; if (!alg) return -EINVAL; @@ -2712,6 +2713,12 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) field_marker = false; keyoff_marker = false; } + + /* TCP/UDP/SCTP and ESP/AH falls at same offset so + * remember the TCP key offset of 40 byte hash key. + */ + if (key_type == NIX_FLOW_KEY_TYPE_TCP) + l4_key_offset = key_off; break; case NIX_FLOW_KEY_TYPE_NVGRE: field->lid = NPC_LID_LD; @@ -2783,11 +2790,31 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) field->ltype_mask = 0xF; field->fn_mask = 1; /* Mask out the first nibble */ break; + case NIX_FLOW_KEY_TYPE_AH: + case NIX_FLOW_KEY_TYPE_ESP: + field->hdr_offset = 0; + field->bytesm1 = 7; /* SPI + sequence number */ + field->ltype_mask = 0xF; + field->lid = NPC_LID_LE; + field->ltype_match = NPC_LT_LE_ESP; + if (key_type == NIX_FLOW_KEY_TYPE_AH) { + field->lid = NPC_LID_LD; + field->ltype_match = NPC_LT_LD_AH; + field->hdr_offset = 4; + keyoff_marker = false; + } + break; } field->ena = 1; /* Found a valid flow key type */ if (valid_key) { + /* Use the key offset of TCP/UDP/SCTP fields + * for ESP/AH fields. + */ + if (key_type == NIX_FLOW_KEY_TYPE_ESP || + key_type == NIX_FLOW_KEY_TYPE_AH) + key_off = l4_key_offset; field->key_offset = key_off; memcpy(&alg[nr_field], field, sizeof(*field)); max_key_off = max(max_key_off, field->bytesm1 + 1); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index aaba0454d188a..e0199f0e4a6c3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -448,10 +448,14 @@ static int otx2_get_rss_hash_opts(struct otx2_nic *pfvf, nfc->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; break; case AH_ESP_V4_FLOW: + case AH_ESP_V6_FLOW: + if (rss->flowkey_cfg & NIX_FLOW_KEY_TYPE_ESP) + nfc->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + break; case AH_V4_FLOW: case ESP_V4_FLOW: case IPV4_FLOW: - case AH_ESP_V6_FLOW: + break; case AH_V6_FLOW: case ESP_V6_FLOW: case IPV6_FLOW: @@ -459,6 +463,7 @@ static int otx2_get_rss_hash_opts(struct otx2_nic *pfvf, default: return -EINVAL; } + return 0; } @@ -527,6 +532,36 @@ static int otx2_set_rss_hash_opts(struct otx2_nic *pfvf, return -EINVAL; } break; + case AH_ESP_V4_FLOW: + case AH_ESP_V6_FLOW: + switch (nfc->data & rxh_l4) { + case 0: + rss_cfg &= ~(NIX_FLOW_KEY_TYPE_ESP | + NIX_FLOW_KEY_TYPE_AH); + rss_cfg |= NIX_FLOW_KEY_TYPE_VLAN | + NIX_FLOW_KEY_TYPE_IPV4_PROTO; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + /* If VLAN hashing is also requested for ESP then do not + * allow because of hardware 40 bytes flow key limit. + */ + if (rss_cfg & NIX_FLOW_KEY_TYPE_VLAN) { + netdev_err(pfvf->netdev, + "RSS hash of ESP or AH with VLAN is not supported\n"); + return -EOPNOTSUPP; + } + + rss_cfg |= NIX_FLOW_KEY_TYPE_ESP | NIX_FLOW_KEY_TYPE_AH; + /* Disable IPv4 proto hashing since IPv6 SA+DA(32 bytes) + * and ESP SPI+sequence(8 bytes) uses hardware maximum + * limit of 40 byte flow key. + */ + rss_cfg &= ~NIX_FLOW_KEY_TYPE_IPV4_PROTO; + break; + default: + return -EINVAL; + } + break; case IPV4_FLOW: case IPV6_FLOW: rss_cfg = NIX_FLOW_KEY_TYPE_IPV4 | NIX_FLOW_KEY_TYPE_IPV6; -- GitLab From 8d21c882aba8004bb996270589c17ba71e100a16 Mon Sep 17 00:00:00 2001 From: Jiapeng Zhong Date: Mon, 25 Jan 2021 10:39:41 +0800 Subject: [PATCH 1453/2012] bridge: Use PTR_ERR_OR_ZERO instead if(IS_ERR(...)) + PTR_ERR coccicheck suggested using PTR_ERR_OR_ZERO() and looking at the code. Fix the following coccicheck warnings: ./net/bridge/br_multicast.c:1295:7-13: WARNING: PTR_ERR_OR_ZERO can be used. Reported-by: Abaci Signed-off-by: Jiapeng Zhong Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/1611542381-91178-1-git-send-email-abaci-bugfix@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index df5db6a58e952..3aa2833f60c7c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1300,7 +1300,7 @@ static int br_multicast_add_group(struct net_bridge *br, pg = __br_multicast_add_group(br, port, group, src, filter_mode, igmpv2_mldv1, false); /* NULL is considered valid for host joined groups */ - err = IS_ERR(pg) ? PTR_ERR(pg) : 0; + err = PTR_ERR_OR_ZERO(pg); spin_unlock(&br->multicast_lock); return err; -- GitLab From 4fd59792097a6b2fb949d41264386a7ecade469e Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Mon, 25 Jan 2021 12:20:46 +0800 Subject: [PATCH 1454/2012] net: ethernet: mediatek: support setting MTU MT762x HW, except for MT7628, supports frame length up to 2048 (maximum length on GDM), so allow setting MTU up to 2030. Also set the default frame length to the hardware default 1518. Signed-off-by: DENG Qingfang Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210125042046.5599-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 43 ++++++++++++++++++--- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 12 ++++-- 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 6d2d60675ffd7..01d3ee4b58292 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -353,7 +353,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, /* Setup gmac */ mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id)); mcr_new = mcr_cur; - mcr_new |= MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE | + mcr_new |= MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE | MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK; /* Only update control register when needed! */ @@ -759,8 +759,8 @@ static void mtk_get_stats64(struct net_device *dev, static inline int mtk_max_frag_size(int mtu) { /* make sure buf_size will be at least MTK_MAX_RX_LENGTH */ - if (mtu + MTK_RX_ETH_HLEN < MTK_MAX_RX_LENGTH) - mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN; + if (mtu + MTK_RX_ETH_HLEN < MTK_MAX_RX_LENGTH_2K) + mtu = MTK_MAX_RX_LENGTH_2K - MTK_RX_ETH_HLEN; return SKB_DATA_ALIGN(MTK_RX_HLEN + mtu) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -771,7 +771,7 @@ static inline int mtk_max_buf_size(int frag_size) int buf_size = frag_size - NET_SKB_PAD - NET_IP_ALIGN - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - WARN_ON(buf_size < MTK_MAX_RX_LENGTH); + WARN_ON(buf_size < MTK_MAX_RX_LENGTH_2K); return buf_size; } @@ -2499,6 +2499,35 @@ static void mtk_uninit(struct net_device *dev) mtk_rx_irq_disable(eth, ~0); } +static int mtk_change_mtu(struct net_device *dev, int new_mtu) +{ + int length = new_mtu + MTK_RX_ETH_HLEN; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + u32 mcr_cur, mcr_new; + + if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) { + mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id)); + mcr_new = mcr_cur & ~MAC_MCR_MAX_RX_MASK; + + if (length <= 1518) + mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1518); + else if (length <= 1536) + mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1536); + else if (length <= 1552) + mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1552); + else + mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_2048); + + if (mcr_new != mcr_cur) + mtk_w32(mac->hw, mcr_new, MTK_MAC_MCR(mac->id)); + } + + dev->mtu = new_mtu; + + return 0; +} + static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct mtk_mac *mac = netdev_priv(dev); @@ -2795,6 +2824,7 @@ static const struct net_device_ops mtk_netdev_ops = { .ndo_set_mac_address = mtk_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = mtk_do_ioctl, + .ndo_change_mtu = mtk_change_mtu, .ndo_tx_timeout = mtk_tx_timeout, .ndo_get_stats64 = mtk_get_stats64, .ndo_fix_features = mtk_fix_features, @@ -2896,7 +2926,10 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) eth->netdev[id]->irq = eth->irq[0]; eth->netdev[id]->dev.of_node = np; - eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN; + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) + eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN; + else + eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH_2K - MTK_RX_ETH_HLEN; return 0; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 454cfcd465fda..fd3cec8f06bae 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -17,12 +17,13 @@ #include #define MTK_QDMA_PAGE_SIZE 2048 -#define MTK_MAX_RX_LENGTH 1536 +#define MTK_MAX_RX_LENGTH 1536 +#define MTK_MAX_RX_LENGTH_2K 2048 #define MTK_TX_DMA_BUF_LEN 0x3fff #define MTK_DMA_SIZE 256 #define MTK_NAPI_WEIGHT 64 #define MTK_MAC_COUNT 2 -#define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) +#define MTK_RX_ETH_HLEN (ETH_HLEN + ETH_FCS_LEN) #define MTK_RX_HLEN (NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN) #define MTK_DMA_DUMMY_DESC 0xffffffff #define MTK_DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | \ @@ -320,7 +321,12 @@ /* Mac control registers */ #define MTK_MAC_MCR(x) (0x10100 + (x * 0x100)) -#define MAC_MCR_MAX_RX_1536 BIT(24) +#define MAC_MCR_MAX_RX_MASK GENMASK(25, 24) +#define MAC_MCR_MAX_RX(_x) (MAC_MCR_MAX_RX_MASK & ((_x) << 24)) +#define MAC_MCR_MAX_RX_1518 0x0 +#define MAC_MCR_MAX_RX_1536 0x1 +#define MAC_MCR_MAX_RX_1552 0x2 +#define MAC_MCR_MAX_RX_2048 0x3 #define MAC_MCR_IPG_CFG (BIT(18) | BIT(16)) #define MAC_MCR_FORCE_MODE BIT(15) #define MAC_MCR_TX_EN BIT(14) -- GitLab From 974d5ba60df74483c69a2ccf580308de68769ec7 Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Mon, 25 Jan 2021 12:43:21 +0800 Subject: [PATCH 1455/2012] dt-bindings: net: dsa: add MT7530 GPIO controller binding Add device tree binding to support MT7530 GPIO controller. Signed-off-by: DENG Qingfang Acked-by: Rob Herring Reviewed-by: Andrew Lunn Reviewed-by: Linus Walleij Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/dsa/mt7530.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/net/dsa/mt7530.txt b/Documentation/devicetree/bindings/net/dsa/mt7530.txt index 560369efad6ca..de04626a8e9db 100644 --- a/Documentation/devicetree/bindings/net/dsa/mt7530.txt +++ b/Documentation/devicetree/bindings/net/dsa/mt7530.txt @@ -76,6 +76,12 @@ phy-mode must be set, see also example 2 below! * mt7621: phy-mode = "rgmii-txid"; * mt7623: phy-mode = "rgmii"; +Optional properties: + +- gpio-controller: Boolean; if defined, MT7530's LED controller will run on + GPIO mode. +- #gpio-cells: Must be 2 if gpio-controller is defined. + See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional required, optional properties and how the integrated switch subnodes must be specified. -- GitLab From 429a0edeefd88cbfca5c417dfb8561047bb50769 Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Mon, 25 Jan 2021 12:43:22 +0800 Subject: [PATCH 1456/2012] net: dsa: mt7530: MT7530 optional GPIO support MT7530's LED controller can drive up to 15 LED/GPIOs. Add support for GPIO control and allow users to use its GPIOs by setting gpio-controller property in device tree. Signed-off-by: DENG Qingfang Reviewed-by: Linus Walleij Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 110 +++++++++++++++++++++++++++++++++++++++ drivers/net/dsa/mt7530.h | 20 +++++++ 2 files changed, 130 insertions(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index d2196197d920a..eb13ba79dd018 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "mt7530.h" @@ -1622,6 +1623,109 @@ mtk_get_tag_protocol(struct dsa_switch *ds, int port, } } +static inline u32 +mt7530_gpio_to_bit(unsigned int offset) +{ + /* Map GPIO offset to register bit + * [ 2: 0] port 0 LED 0..2 as GPIO 0..2 + * [ 6: 4] port 1 LED 0..2 as GPIO 3..5 + * [10: 8] port 2 LED 0..2 as GPIO 6..8 + * [14:12] port 3 LED 0..2 as GPIO 9..11 + * [18:16] port 4 LED 0..2 as GPIO 12..14 + */ + return BIT(offset + offset / 3); +} + +static int +mt7530_gpio_get(struct gpio_chip *gc, unsigned int offset) +{ + struct mt7530_priv *priv = gpiochip_get_data(gc); + u32 bit = mt7530_gpio_to_bit(offset); + + return !!(mt7530_read(priv, MT7530_LED_GPIO_DATA) & bit); +} + +static void +mt7530_gpio_set(struct gpio_chip *gc, unsigned int offset, int value) +{ + struct mt7530_priv *priv = gpiochip_get_data(gc); + u32 bit = mt7530_gpio_to_bit(offset); + + if (value) + mt7530_set(priv, MT7530_LED_GPIO_DATA, bit); + else + mt7530_clear(priv, MT7530_LED_GPIO_DATA, bit); +} + +static int +mt7530_gpio_get_direction(struct gpio_chip *gc, unsigned int offset) +{ + struct mt7530_priv *priv = gpiochip_get_data(gc); + u32 bit = mt7530_gpio_to_bit(offset); + + return (mt7530_read(priv, MT7530_LED_GPIO_DIR) & bit) ? + GPIO_LINE_DIRECTION_OUT : GPIO_LINE_DIRECTION_IN; +} + +static int +mt7530_gpio_direction_input(struct gpio_chip *gc, unsigned int offset) +{ + struct mt7530_priv *priv = gpiochip_get_data(gc); + u32 bit = mt7530_gpio_to_bit(offset); + + mt7530_clear(priv, MT7530_LED_GPIO_OE, bit); + mt7530_clear(priv, MT7530_LED_GPIO_DIR, bit); + + return 0; +} + +static int +mt7530_gpio_direction_output(struct gpio_chip *gc, unsigned int offset, int value) +{ + struct mt7530_priv *priv = gpiochip_get_data(gc); + u32 bit = mt7530_gpio_to_bit(offset); + + mt7530_set(priv, MT7530_LED_GPIO_DIR, bit); + + if (value) + mt7530_set(priv, MT7530_LED_GPIO_DATA, bit); + else + mt7530_clear(priv, MT7530_LED_GPIO_DATA, bit); + + mt7530_set(priv, MT7530_LED_GPIO_OE, bit); + + return 0; +} + +static int +mt7530_setup_gpio(struct mt7530_priv *priv) +{ + struct device *dev = priv->dev; + struct gpio_chip *gc; + + gc = devm_kzalloc(dev, sizeof(*gc), GFP_KERNEL); + if (!gc) + return -ENOMEM; + + mt7530_write(priv, MT7530_LED_GPIO_OE, 0); + mt7530_write(priv, MT7530_LED_GPIO_DIR, 0); + mt7530_write(priv, MT7530_LED_IO_MODE, 0); + + gc->label = "mt7530"; + gc->parent = dev; + gc->owner = THIS_MODULE; + gc->get_direction = mt7530_gpio_get_direction; + gc->direction_input = mt7530_gpio_direction_input; + gc->direction_output = mt7530_gpio_direction_output; + gc->get = mt7530_gpio_get; + gc->set = mt7530_gpio_set; + gc->base = -1; + gc->ngpio = 15; + gc->can_sleep = true; + + return devm_gpiochip_add_data(dev, gc, priv); +} + static int mt7530_setup(struct dsa_switch *ds) { @@ -1763,6 +1867,12 @@ mt7530_setup(struct dsa_switch *ds) } } + if (of_property_read_bool(priv->dev->of_node, "gpio-controller")) { + ret = mt7530_setup_gpio(priv); + if (ret) + return ret; + } + mt7530_setup_port5(ds, interface); /* Flush the FDB table */ diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 32d8969b3ace7..64a9bb377e158 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -554,6 +554,26 @@ enum mt7531_clk_skew { #define MT7531_GPIO12_RG_RXD3_MASK GENMASK(19, 16) #define MT7531_EXT_P_MDIO_12 (2 << 16) +/* Registers for LED GPIO control (MT7530 only) + * All registers follow this pattern: + * [ 2: 0] port 0 + * [ 6: 4] port 1 + * [10: 8] port 2 + * [14:12] port 3 + * [18:16] port 4 + */ + +/* LED enable, 0: Disable, 1: Enable (Default) */ +#define MT7530_LED_EN 0x7d00 +/* LED mode, 0: GPIO mode, 1: PHY mode (Default) */ +#define MT7530_LED_IO_MODE 0x7d04 +/* GPIO direction, 0: Input, 1: Output */ +#define MT7530_LED_GPIO_DIR 0x7d10 +/* GPIO output enable, 0: Disable, 1: Enable */ +#define MT7530_LED_GPIO_OE 0x7d14 +/* GPIO value, 0: Low, 1: High */ +#define MT7530_LED_GPIO_DATA 0x7d18 + #define MT7530_CREV 0x7ffc #define CHIP_NAME_SHIFT 16 #define MT7530_ID 0x7530 -- GitLab From 16db6323042f39b6f49148969e9d03d11265bc1b Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Jan 2021 02:08:07 -0500 Subject: [PATCH 1457/2012] bnxt_en: Update firmware interface to 1.10.2.11. Updates to backing store APIs, QoS profiles, and push buffer initial index support. Since the new HWRM_FUNC_BACKING_STORE_CFG message size has increased, we need to add some compat. logic to fall back to the smaller legacy size if firmware cannot accept the larger message size. The new fields added to the structure are not used yet. Signed-off-by: Michael Chan Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 + drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 249 ++++++++++++++---- 3 files changed, 203 insertions(+), 53 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d31a5ad7522a2..5daef68015121 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6845,6 +6845,7 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables) struct hwrm_func_backing_store_cfg_input req = {0}; struct bnxt_ctx_mem_info *ctx = bp->ctx; struct bnxt_ctx_pg_info *ctx_pg; + u32 req_len = sizeof(req); __le32 *num_entries; __le64 *pg_dir; u32 flags = 0; @@ -6855,6 +6856,8 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables) if (!ctx) return 0; + if (req_len > bp->hwrm_max_ext_req_len) + req_len = BNXT_BACKING_STORE_CFG_LEGACY_LEN; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_BACKING_STORE_CFG, -1, -1); req.enables = cpu_to_le32(enables); @@ -6938,7 +6941,7 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables) bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, pg_attr, pg_dir); } req.flags = cpu_to_le32(flags); - return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + return hwrm_send_message(bp, &req, req_len, HWRM_CMD_TIMEOUT); } static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 51996c85547ee..d68065367cf26 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1441,6 +1441,8 @@ struct bnxt_ctx_pg_info { #define BNXT_MAX_TQM_RINGS \ (BNXT_MAX_TQM_SP_RINGS + BNXT_MAX_TQM_FP_RINGS) +#define BNXT_BACKING_STORE_CFG_LEGACY_LEN 256 + struct bnxt_ctx_mem_info { u32 qp_max_entries; u16 qp_min_qp1_entries; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index 2d3e962bdac34..d5c6e6a3d22d9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -2,7 +2,7 @@ * * Copyright (c) 2014-2016 Broadcom Corporation * Copyright (c) 2014-2018 Broadcom Limited - * Copyright (c) 2018-2020 Broadcom Inc. + * Copyright (c) 2018-2021 Broadcom Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -164,6 +164,7 @@ struct cmd_nums { #define HWRM_VNIC_PLCMODES_CFG 0x48UL #define HWRM_VNIC_PLCMODES_QCFG 0x49UL #define HWRM_VNIC_QCAPS 0x4aUL + #define HWRM_VNIC_UPDATE 0x4bUL #define HWRM_RING_ALLOC 0x50UL #define HWRM_RING_FREE 0x51UL #define HWRM_RING_CMPL_RING_QAGGINT_PARAMS 0x52UL @@ -184,6 +185,9 @@ struct cmd_nums { #define HWRM_QUEUE_MPLS_QCAPS 0x80UL #define HWRM_QUEUE_MPLSTC2PRI_QCFG 0x81UL #define HWRM_QUEUE_MPLSTC2PRI_CFG 0x82UL + #define HWRM_QUEUE_VLANPRI_QCAPS 0x83UL + #define HWRM_QUEUE_VLANPRI2PRI_QCFG 0x84UL + #define HWRM_QUEUE_VLANPRI2PRI_CFG 0x85UL #define HWRM_CFA_L2_FILTER_ALLOC 0x90UL #define HWRM_CFA_L2_FILTER_FREE 0x91UL #define HWRM_CFA_L2_FILTER_CFG 0x92UL @@ -217,6 +221,8 @@ struct cmd_nums { #define HWRM_PORT_TX_FIR_CFG 0xbbUL #define HWRM_PORT_TX_FIR_QCFG 0xbcUL #define HWRM_PORT_ECN_QSTATS 0xbdUL + #define HWRM_FW_LIVEPATCH_QUERY 0xbeUL + #define HWRM_FW_LIVEPATCH 0xbfUL #define HWRM_FW_RESET 0xc0UL #define HWRM_FW_QSTATUS 0xc1UL #define HWRM_FW_HEALTH_CHECK 0xc2UL @@ -347,6 +353,8 @@ struct cmd_nums { #define HWRM_FUNC_HOST_PF_IDS_QUERY 0x197UL #define HWRM_FUNC_QSTATS_EXT 0x198UL #define HWRM_STAT_EXT_CTX_QUERY 0x199UL + #define HWRM_FUNC_SPD_CFG 0x19aUL + #define HWRM_FUNC_SPD_QCFG 0x19bUL #define HWRM_SELFTEST_QLIST 0x200UL #define HWRM_SELFTEST_EXEC 0x201UL #define HWRM_SELFTEST_IRQ 0x202UL @@ -359,6 +367,11 @@ struct cmd_nums { #define HWRM_MFG_HDMA_TEST 0x209UL #define HWRM_MFG_FRU_EEPROM_WRITE 0x20aUL #define HWRM_MFG_FRU_EEPROM_READ 0x20bUL + #define HWRM_MFG_SOC_IMAGE 0x20cUL + #define HWRM_MFG_SOC_QSTATUS 0x20dUL + #define HWRM_MFG_PARAM_SEEPROM_SYNC 0x20eUL + #define HWRM_MFG_PARAM_SEEPROM_READ 0x20fUL + #define HWRM_MFG_PARAM_SEEPROM_HEALTH 0x210UL #define HWRM_TF 0x2bcUL #define HWRM_TF_VERSION_GET 0x2bdUL #define HWRM_TF_SESSION_OPEN 0x2c6UL @@ -384,6 +397,7 @@ struct cmd_nums { #define HWRM_TF_EXT_EM_QCFG 0x2e9UL #define HWRM_TF_EM_INSERT 0x2eaUL #define HWRM_TF_EM_DELETE 0x2ebUL + #define HWRM_TF_EM_HASH_INSERT 0x2ecUL #define HWRM_TF_TCAM_SET 0x2f8UL #define HWRM_TF_TCAM_GET 0x2f9UL #define HWRM_TF_TCAM_MOVE 0x2faUL @@ -486,9 +500,9 @@ struct hwrm_err_output { #define HWRM_TARGET_ID_TOOLS 0xFFFD #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 -#define HWRM_VERSION_UPDATE 1 -#define HWRM_VERSION_RSVD 68 -#define HWRM_VERSION_STR "1.10.1.68" +#define HWRM_VERSION_UPDATE 2 +#define HWRM_VERSION_RSVD 11 +#define HWRM_VERSION_STR "1.10.2.11" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -563,8 +577,9 @@ struct hwrm_ver_get_output { __le16 max_resp_len; __le16 def_req_timeout; u8 flags; - #define VER_GET_RESP_FLAGS_DEV_NOT_RDY 0x1UL - #define VER_GET_RESP_FLAGS_EXT_VER_AVAIL 0x2UL + #define VER_GET_RESP_FLAGS_DEV_NOT_RDY 0x1UL + #define VER_GET_RESP_FLAGS_EXT_VER_AVAIL 0x2UL + #define VER_GET_RESP_FLAGS_DEV_NOT_RDY_BACKING_STORE 0x4UL u8 unused_0[2]; u8 always_1; __le16 hwrm_intf_major; @@ -708,6 +723,7 @@ struct hwrm_async_event_cmpl { #define ASYNC_EVENT_CMPL_EVENT_ID_QUIESCE_DONE 0x3fUL #define ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE 0x40UL #define ASYNC_EVENT_CMPL_EVENT_ID_PFC_WATCHDOG_CFG_CHANGE 0x41UL + #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x42UL #define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG 0xfeUL #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL #define ASYNC_EVENT_CMPL_EVENT_ID_LAST ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR @@ -815,6 +831,8 @@ struct hwrm_async_event_cmpl_reset_notify { #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_RESET_NOTIFY 0x8UL #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_RESET_NOTIFY __le32 event_data2; + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA2_FW_STATUS_CODE_MASK 0xffffUL + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA2_FW_STATUS_CODE_SFT 0 u8 opaque_v; #define ASYNC_EVENT_CMPL_RESET_NOTIFY_V 0x1UL #define ASYNC_EVENT_CMPL_RESET_NOTIFY_OPAQUE_MASK 0xfeUL @@ -832,7 +850,8 @@ struct hwrm_async_event_cmpl_reset_notify { #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MANAGEMENT_RESET_REQUEST (0x1UL << 8) #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL (0x2UL << 8) #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL (0x3UL << 8) - #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FAST_RESET (0x4UL << 8) + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FAST_RESET #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_MASK 0xffff0000UL #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_SFT 16 }; @@ -1271,6 +1290,10 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_EXT_TX_PROXY_SRC_INTF_OVERRIDE_SUPPORT 0x20UL #define FUNC_QCAPS_RESP_FLAGS_EXT_SCHQ_SUPPORTED 0x40UL #define FUNC_QCAPS_RESP_FLAGS_EXT_PPP_PUSH_MODE_SUPPORTED 0x80UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_EVB_MODE_CFG_NOT_SUPPORTED 0x100UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_SOC_SPD_SUPPORTED 0x200UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED 0x400UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_FAST_RESET_CAPABLE 0x800UL u8 max_schqs; u8 mpc_chnls_cap; #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE 0x1UL @@ -1315,6 +1338,7 @@ struct hwrm_func_qcfg_output { #define FUNC_QCFG_RESP_FLAGS_HOT_RESET_ALLOWED 0x200UL #define FUNC_QCFG_RESP_FLAGS_PPP_PUSH_MODE_ENABLED 0x400UL #define FUNC_QCFG_RESP_FLAGS_RING_MONITOR_ENABLED 0x800UL + #define FUNC_QCFG_RESP_FLAGS_FAST_RESET_ALLOWED 0x1000UL u8 mac_address[6]; __le16 pci_id; __le16 alloc_rsscos_ctx; @@ -1731,6 +1755,7 @@ struct hwrm_func_drv_rgtr_input { #define FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT 0x10UL #define FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT 0x20UL #define FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT 0x40UL + #define FUNC_DRV_RGTR_REQ_FLAGS_FAST_RESET_SUPPORT 0x80UL __le32 enables; #define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE 0x1UL #define FUNC_DRV_RGTR_REQ_ENABLES_VER 0x2UL @@ -1993,7 +2018,7 @@ struct hwrm_func_backing_store_qcaps_input { __le64 resp_addr; }; -/* hwrm_func_backing_store_qcaps_output (size:640b/80B) */ +/* hwrm_func_backing_store_qcaps_output (size:704b/88B) */ struct hwrm_func_backing_store_qcaps_output { __le16 error_code; __le16 req_type; @@ -2024,13 +2049,25 @@ struct hwrm_func_backing_store_qcaps_output { __le16 mrav_num_entries_units; u8 tqm_entries_multiple; u8 ctx_kind_initializer; - __le32 rsvd; - __le16 rsvd1; + __le16 ctx_init_mask; + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_QP 0x1UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_SRQ 0x2UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_CQ 0x4UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_VNIC 0x8UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_STAT 0x10UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_MRAV 0x20UL + u8 qp_init_offset; + u8 srq_init_offset; + u8 cq_init_offset; + u8 vnic_init_offset; u8 tqm_fp_rings_count; + u8 stat_init_offset; + u8 mrav_init_offset; + u8 rsvd[6]; u8 valid; }; -/* hwrm_func_backing_store_cfg_input (size:2048b/256B) */ +/* hwrm_func_backing_store_cfg_input (size:2432b/304B) */ struct hwrm_func_backing_store_cfg_input { __le16 req_type; __le16 cmpl_ring; @@ -2041,22 +2078,25 @@ struct hwrm_func_backing_store_cfg_input { #define FUNC_BACKING_STORE_CFG_REQ_FLAGS_PREBOOT_MODE 0x1UL #define FUNC_BACKING_STORE_CFG_REQ_FLAGS_MRAV_RESERVATION_SPLIT 0x2UL __le32 enables; - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP 0x1UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ 0x2UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ 0x4UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC 0x8UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT 0x10UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP 0x20UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING0 0x40UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING1 0x80UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING2 0x100UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING3 0x200UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING4 0x400UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING5 0x800UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING6 0x1000UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING7 0x2000UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV 0x4000UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM 0x8000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ 0x4UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC 0x8UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT 0x10UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP 0x20UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING0 0x40UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING1 0x80UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING2 0x100UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING3 0x200UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING4 0x400UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING5 0x800UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING6 0x1000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING7 0x2000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV 0x4000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM 0x8000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING8 0x10000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING9 0x20000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING10 0x40000UL u8 qpc_pg_size_qpc_lvl; #define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_MASK 0xfUL #define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_SFT 0 @@ -2358,6 +2398,63 @@ struct hwrm_func_backing_store_cfg_input { __le16 tqm_entry_size; __le16 mrav_entry_size; __le16 tim_entry_size; + u8 tqm_ring8_pg_size_tqm_ring_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_1G + u8 ring8_unused[3]; + __le32 tqm_ring8_num_entries; + __le64 tqm_ring8_page_dir; + u8 tqm_ring9_pg_size_tqm_ring_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_1G + u8 ring9_unused[3]; + __le32 tqm_ring9_num_entries; + __le64 tqm_ring9_page_dir; + u8 tqm_ring10_pg_size_tqm_ring_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_1G + u8 ring10_unused[3]; + __le32 tqm_ring10_num_entries; + __le64 tqm_ring10_page_dir; }; /* hwrm_func_backing_store_cfg_output (size:128b/16B) */ @@ -2930,6 +3027,7 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_DUPLEX_STATE_LAST PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL u8 option_flags; #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_MEDIA_AUTO_DETECT 0x1UL + #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SIGNAL_MODE_KNOWN 0x2UL char phy_vendor_name[16]; char phy_vendor_partnumber[16]; __le16 support_pam4_speeds; @@ -3528,8 +3626,8 @@ struct hwrm_port_phy_qcaps_output { #define PORT_PHY_QCAPS_RESP_FLAGS_SHARED_PHY_CFG_SUPPORTED 0x8UL #define PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET 0x10UL #define PORT_PHY_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED 0x20UL - #define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_MASK 0xc0UL - #define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_SFT 6 + #define PORT_PHY_QCAPS_RESP_FLAGS_FW_MANAGED_LINK_DOWN 0x40UL + #define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1 0x80UL u8 port_cnt; #define PORT_PHY_QCAPS_RESP_PORT_CNT_UNKNOWN 0x0UL #define PORT_PHY_QCAPS_RESP_PORT_CNT_1 0x1UL @@ -4119,7 +4217,10 @@ struct hwrm_queue_qportcfg_output { #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS_NIC 0x3UL #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN 0xffUL #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LAST QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN - u8 unused_0; + u8 queue_id0_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_TYPE_CNP 0x4UL char qid0_name[16]; char qid1_name[16]; char qid2_name[16]; @@ -4128,7 +4229,34 @@ struct hwrm_queue_qportcfg_output { char qid5_name[16]; char qid6_name[16]; char qid7_name[16]; - u8 unused_1[7]; + u8 queue_id1_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id2_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id3_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id4_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id5_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id6_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id7_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_TYPE_CNP 0x4UL u8 valid; }; @@ -5142,8 +5270,10 @@ struct hwrm_vnic_alloc_input { __le16 target_id; __le64 resp_addr; __le32 flags; - #define VNIC_ALLOC_REQ_FLAGS_DEFAULT 0x1UL - u8 unused_0[4]; + #define VNIC_ALLOC_REQ_FLAGS_DEFAULT 0x1UL + #define VNIC_ALLOC_REQ_FLAGS_VIRTIO_NET_FID_VALID 0x2UL + __le16 virtio_net_fid; + u8 unused_0[2]; }; /* hwrm_vnic_alloc_output (size:128b/16B) */ @@ -5260,6 +5390,8 @@ struct hwrm_vnic_qcaps_output { #define VNIC_QCAPS_RESP_FLAGS_OUTERMOST_RSS_CAP 0x80UL #define VNIC_QCAPS_RESP_FLAGS_COS_ASSIGNMENT_CAP 0x100UL #define VNIC_QCAPS_RESP_FLAGS_RX_CMPL_V2_CAP 0x200UL + #define VNIC_QCAPS_RESP_FLAGS_VNIC_STATE_CAP 0x400UL + #define VNIC_QCAPS_RESP_FLAGS_VIRTIO_NET_VNIC_ALLOC_CAP 0x800UL __le16 max_aggs_supported; u8 unused_1[5]; u8 valid; @@ -5585,7 +5717,11 @@ struct hwrm_ring_alloc_output { __le16 resp_len; __le16 ring_id; __le16 logical_ring_id; - u8 unused_0[3]; + u8 push_buffer_index; + #define RING_ALLOC_RESP_PUSH_BUFFER_INDEX_PING_BUFFER 0x0UL + #define RING_ALLOC_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER 0x1UL + #define RING_ALLOC_RESP_PUSH_BUFFER_INDEX_LAST RING_ALLOC_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER + u8 unused_0[2]; u8 valid; }; @@ -5644,7 +5780,11 @@ struct hwrm_ring_reset_output { __le16 req_type; __le16 seq_id; __le16 resp_len; - u8 unused_0[4]; + u8 push_buffer_index; + #define RING_RESET_RESP_PUSH_BUFFER_INDEX_PING_BUFFER 0x0UL + #define RING_RESET_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER 0x1UL + #define RING_RESET_RESP_PUSH_BUFFER_INDEX_LAST RING_RESET_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER + u8 unused_0[3]; u8 consumer_idx[3]; u8 valid; }; @@ -6988,21 +7128,23 @@ struct hwrm_cfa_adv_flow_mgnt_qcaps_output { __le16 seq_id; __le16 resp_len; __le32 flags; - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_16BIT_SUPPORTED 0x1UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_64BIT_SUPPORTED 0x2UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_BATCH_DELETE_SUPPORTED 0x4UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_RESET_ALL_SUPPORTED 0x8UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_DEST_FUNC_SUPPORTED 0x10UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TX_EEM_FLOW_SUPPORTED 0x20UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RX_EEM_FLOW_SUPPORTED 0x40UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_COUNTER_ALLOC_SUPPORTED 0x80UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_SUPPORTED 0x100UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_UNTAGGED_VLAN_SUPPORTED 0x200UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_XDP_SUPPORTED 0x400UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_HEADER_SOURCE_FIELDS_SUPPORTED 0x800UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ARP_SUPPORTED 0x1000UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_V2_SUPPORTED 0x2000UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ETHERTYPE_IP_SUPPORTED 0x4000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_16BIT_SUPPORTED 0x1UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_64BIT_SUPPORTED 0x2UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_BATCH_DELETE_SUPPORTED 0x4UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_RESET_ALL_SUPPORTED 0x8UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_DEST_FUNC_SUPPORTED 0x10UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TX_EEM_FLOW_SUPPORTED 0x20UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RX_EEM_FLOW_SUPPORTED 0x40UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_COUNTER_ALLOC_SUPPORTED 0x80UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_SUPPORTED 0x100UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_UNTAGGED_VLAN_SUPPORTED 0x200UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_XDP_SUPPORTED 0x400UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_HEADER_SOURCE_FIELDS_SUPPORTED 0x800UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ARP_SUPPORTED 0x1000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_V2_SUPPORTED 0x2000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ETHERTYPE_IP_SUPPORTED 0x4000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TRUFLOW_CAPABLE 0x8000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_FILTER_TRAFFIC_TYPE_L2_ROCE_SUPPORTED 0x10000UL u8 unused_0[3]; u8 valid; }; @@ -7472,7 +7614,8 @@ struct hwrm_struct_hdr { #define STRUCT_HDR_STRUCT_ID_AFM_OPAQUE 0x1UL #define STRUCT_HDR_STRUCT_ID_PORT_DESCRIPTION 0xaUL #define STRUCT_HDR_STRUCT_ID_RSS_V2 0x64UL - #define STRUCT_HDR_STRUCT_ID_LAST STRUCT_HDR_STRUCT_ID_RSS_V2 + #define STRUCT_HDR_STRUCT_ID_MSIX_PER_VF 0xc8UL + #define STRUCT_HDR_STRUCT_ID_LAST STRUCT_HDR_STRUCT_ID_MSIX_PER_VF __le16 len; u8 version; u8 count; @@ -8000,6 +8143,9 @@ struct hwrm_dbg_coredump_initiate_output { struct coredump_data_hdr { __le32 address; __le32 flags_length; + #define COREDUMP_DATA_HDR_FLAGS_LENGTH_ACTUAL_LEN_MASK 0xffffffUL + #define COREDUMP_DATA_HDR_FLAGS_LENGTH_ACTUAL_LEN_SFT 0 + #define COREDUMP_DATA_HDR_FLAGS_LENGTH_INDIRECT_ACCESS 0x1000000UL __le32 instance; __le32 next_offset; }; @@ -8669,7 +8815,6 @@ struct hcomm_status { #define HCOMM_STATUS_TRUE_OFFSET_MASK 0xfffffffcUL #define HCOMM_STATUS_TRUE_OFFSET_SFT 2 }; - #define HCOMM_STATUS_STRUCT_LOC 0x31001F0UL #endif /* _BNXT_HSI_H_ */ -- GitLab From fe1b853572f17dcfdda93651c1ca3f41bbaf76f0 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Jan 2021 02:08:08 -0500 Subject: [PATCH 1458/2012] bnxt_en: Define macros for the various health register states. Define macros to check for the various states in the lower 16 bits of the health register. Replace the C code that checks for these values with the newly defined macros. Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 10 ++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 7 +++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index d68065367cf26..a1dd80a0fcf63 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1534,9 +1534,19 @@ struct bnxt_fw_reporter_ctx { #define BNXT_FW_HEALTH_WIN_OFF(reg) (BNXT_FW_HEALTH_WIN_BASE + \ ((reg) & BNXT_GRC_OFFSET_MASK)) +#define BNXT_FW_STATUS_HEALTH_MSK 0xffff #define BNXT_FW_STATUS_HEALTHY 0x8000 #define BNXT_FW_STATUS_SHUTDOWN 0x100000 +#define BNXT_FW_IS_HEALTHY(sts) (((sts) & BNXT_FW_STATUS_HEALTH_MSK) ==\ + BNXT_FW_STATUS_HEALTHY) + +#define BNXT_FW_IS_BOOTING(sts) (((sts) & BNXT_FW_STATUS_HEALTH_MSK) < \ + BNXT_FW_STATUS_HEALTHY) + +#define BNXT_FW_IS_ERR(sts) (((sts) & BNXT_FW_STATUS_HEALTH_MSK) > \ + BNXT_FW_STATUS_HEALTHY) + struct bnxt { void __iomem *bar0; void __iomem *bar1; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 6b7b69ed62db0..90a31b4a30205 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -44,21 +44,20 @@ static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter, struct netlink_ext_ack *extack) { struct bnxt *bp = devlink_health_reporter_priv(reporter); - u32 val, health_status; + u32 val; int rc; if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) return 0; val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); - health_status = val & 0xffff; - if (health_status < BNXT_FW_STATUS_HEALTHY) { + if (BNXT_FW_IS_BOOTING(val)) { rc = devlink_fmsg_string_pair_put(fmsg, "Description", "Not yet completed initialization"); if (rc) return rc; - } else if (health_status > BNXT_FW_STATUS_HEALTHY) { + } else if (BNXT_FW_IS_ERR(val)) { rc = devlink_fmsg_string_pair_put(fmsg, "Description", "Encountered fatal error and cannot recover"); if (rc) -- GitLab From b187e4bae0aaa49958cc589af46f7059672980db Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Mon, 25 Jan 2021 02:08:09 -0500 Subject: [PATCH 1459/2012] bnxt_en: handle CRASH_NO_MASTER during bnxt_open() Add missing support for handling NO_MASTER crashes while ports are administratively down (ifdown). On some SoC platforms, the driver needs to assist the firmware to recover from a crash via OP-TEE. This is performed in a similar fashion to what is done during driver probe. Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 67 +++++++++++++---------- 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5daef68015121..c091a10231883 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9337,6 +9337,37 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp) static int bnxt_fw_init_one(struct bnxt *bp); +static int bnxt_fw_reset_via_optee(struct bnxt *bp) +{ +#ifdef CONFIG_TEE_BNXT_FW + int rc = tee_bnxt_fw_load(); + + if (rc) + netdev_err(bp->dev, "Failed FW reset via OP-TEE, rc=%d\n", rc); + + return rc; +#else + netdev_err(bp->dev, "OP-TEE not supported\n"); + return -ENODEV; +#endif +} + +static int bnxt_try_recover_fw(struct bnxt *bp) +{ + if (bp->fw_health && bp->fw_health->status_reliable) { + u32 sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); + + netdev_err(bp->dev, "Firmware not responding, status: 0x%x\n", + sts); + if (sts & FW_STATUS_REG_CRASHED_NO_MASTER) { + netdev_warn(bp->dev, "Firmware recover via OP-TEE requested\n"); + return bnxt_fw_reset_via_optee(bp); + } + } + + return -ENODEV; +} + static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) { struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr; @@ -9356,6 +9387,10 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) if (!rc) flags = le32_to_cpu(resp->flags); mutex_unlock(&bp->hwrm_cmd_lock); + if (rc && up) { + rc = bnxt_try_recover_fw(bp); + fw_reset = true; + } if (rc) return rc; @@ -11183,21 +11218,6 @@ static void bnxt_init_dflt_coal(struct bnxt *bp) bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS; } -static int bnxt_fw_reset_via_optee(struct bnxt *bp) -{ -#ifdef CONFIG_TEE_BNXT_FW - int rc = tee_bnxt_fw_load(); - - if (rc) - netdev_err(bp->dev, "Failed FW reset via OP-TEE, rc=%d\n", rc); - - return rc; -#else - netdev_err(bp->dev, "OP-TEE not supported\n"); - return -ENODEV; -#endif -} - static int bnxt_fw_init_one_p1(struct bnxt *bp) { int rc; @@ -11206,19 +11226,10 @@ static int bnxt_fw_init_one_p1(struct bnxt *bp) rc = bnxt_hwrm_ver_get(bp); bnxt_try_map_fw_health_reg(bp); if (rc) { - if (bp->fw_health && bp->fw_health->status_reliable) { - u32 sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); - - netdev_err(bp->dev, - "Firmware not responding, status: 0x%x\n", - sts); - if (sts & FW_STATUS_REG_CRASHED_NO_MASTER) { - netdev_warn(bp->dev, "Firmware recover via OP-TEE requested\n"); - rc = bnxt_fw_reset_via_optee(bp); - if (!rc) - rc = bnxt_hwrm_ver_get(bp); - } - } + rc = bnxt_try_recover_fw(bp); + if (rc) + return rc; + rc = bnxt_hwrm_ver_get(bp); if (rc) return rc; } -- GitLab From d1cbd1659cac9b192f4677715becf937978b091a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Jan 2021 02:08:10 -0500 Subject: [PATCH 1460/2012] bnxt_en: Retry sending the first message to firmware if it is under reset. The first HWRM_VER_GET message to firmware during probe may timeout if firmware is under reset. This can happen during hot-plug for example. On P5 and newer chips, we can check if firmware is in the boot stage by reading a status register. Retry 5 times if the status register shows that firmware is not ready and not in error state. Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 42 +++++++++++++++++++---- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 7 ++++ 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c091a10231883..c460dd796c1c4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7441,9 +7441,22 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp) sig = readl(hs + offsetof(struct hcomm_status, sig_ver)); if ((sig & HCOMM_STATUS_SIGNATURE_MASK) != HCOMM_STATUS_SIGNATURE_VAL) { - if (bp->fw_health) - bp->fw_health->status_reliable = false; - return; + if (!bp->chip_num) { + __bnxt_map_fw_health_reg(bp, BNXT_GRC_REG_BASE); + bp->chip_num = readl(bp->bar0 + + BNXT_FW_HEALTH_WIN_BASE + + BNXT_GRC_REG_CHIP_NUM); + } + if (!BNXT_CHIP_P5(bp)) { + if (bp->fw_health) + bp->fw_health->status_reliable = false; + return; + } + status_loc = BNXT_GRC_REG_STATUS_P5 | + BNXT_FW_HEALTH_REG_TYPE_BAR0; + } else { + status_loc = readl(hs + offsetof(struct hcomm_status, + fw_status_loc)); } if (__bnxt_alloc_fw_health(bp)) { @@ -7451,7 +7464,6 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp) return; } - status_loc = readl(hs + offsetof(struct hcomm_status, fw_status_loc)); bp->fw_health->regs[BNXT_FW_HEALTH_REG] = status_loc; reg_type = BNXT_FW_HEALTH_REG_TYPE(status_loc); if (reg_type == BNXT_FW_HEALTH_REG_TYPE_GRC) { @@ -9355,14 +9367,30 @@ static int bnxt_fw_reset_via_optee(struct bnxt *bp) static int bnxt_try_recover_fw(struct bnxt *bp) { if (bp->fw_health && bp->fw_health->status_reliable) { - u32 sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); + int retry = 0, rc; + u32 sts; + + mutex_lock(&bp->hwrm_cmd_lock); + do { + rc = __bnxt_hwrm_ver_get(bp, true); + sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); + if (!sts || !BNXT_FW_IS_BOOTING(sts)) + break; + retry++; + } while (rc == -EBUSY && retry < BNXT_FW_RETRY); + mutex_unlock(&bp->hwrm_cmd_lock); - netdev_err(bp->dev, "Firmware not responding, status: 0x%x\n", - sts); + if (!BNXT_FW_IS_HEALTHY(sts)) { + netdev_err(bp->dev, + "Firmware not responding, status: 0x%x\n", + sts); + rc = -ENODEV; + } if (sts & FW_STATUS_REG_CRASHED_NO_MASTER) { netdev_warn(bp->dev, "Firmware recover via OP-TEE requested\n"); return bnxt_fw_reset_via_optee(bp); } + return rc; } return -ENODEV; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index a1dd80a0fcf63..867b1d3a134e3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1345,9 +1345,14 @@ struct bnxt_test_info { #define BNXT_CAG_REG_LEGACY_INT_STATUS 0x4014 #define BNXT_CAG_REG_BASE 0x300000 +#define BNXT_GRC_REG_STATUS_P5 0x520 + #define BNXT_GRCPF_REG_KONG_COMM 0xA00 #define BNXT_GRCPF_REG_KONG_COMM_TRIGGER 0xB00 +#define BNXT_GRC_REG_CHIP_NUM 0x48 +#define BNXT_GRC_REG_BASE 0x260000 + #define BNXT_GRC_BASE_MASK 0xfffff000 #define BNXT_GRC_OFFSET_MASK 0x00000ffc @@ -1547,6 +1552,8 @@ struct bnxt_fw_reporter_ctx { #define BNXT_FW_IS_ERR(sts) (((sts) & BNXT_FW_STATUS_HEALTH_MSK) > \ BNXT_FW_STATUS_HEALTHY) +#define BNXT_FW_RETRY 5 + struct bnxt { void __iomem *bar0; void __iomem *bar1; -- GitLab From 3e3c09b0e999f51d35875c3103c6ccb49290788f Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 25 Jan 2021 02:08:11 -0500 Subject: [PATCH 1461/2012] bnxt_en: Move reading VPD info after successful handshake with fw. If firmware is in reset or in bad state, it won't be able to return VPD data. Move bnxt_vpd_read_info() until after bnxt_fw_init_one_p1() successfully returns. By then we would have established proper communications with the firmware. Reviewed-by: Edwin Peer Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c460dd796c1c4..2fb9873e01626 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12584,9 +12584,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->ethtool_ops = &bnxt_ethtool_ops; pci_set_drvdata(pdev, dev); - if (BNXT_PF(bp)) - bnxt_vpd_read_info(bp); - rc = bnxt_alloc_hwrm_resources(bp); if (rc) goto init_err_pci_clean; @@ -12598,6 +12595,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto init_err_pci_clean; + if (BNXT_PF(bp)) + bnxt_vpd_read_info(bp); + if (BNXT_CHIP_P5(bp)) { bp->flags |= BNXT_FLAG_CHIP_P5; if (BNXT_CHIP_SR2(bp)) -- GitLab From 881d8353b05e80d93db14b860581ceba14116422 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 25 Jan 2021 02:08:12 -0500 Subject: [PATCH 1462/2012] bnxt_en: Add an upper bound for all firmware command timeouts. The timeout period for firmware messages is passed to the driver from the firmware in the response of the first command. This timeout period is multiplied by a factor for certain long running commands such as NVRAM commands. In some cases, the timeout period can become really long and it can cause hung task warnings if firmware has crashed or is not responding. To avoid such long delays, cap all firmware commands to a max timeout value of 40 seconds. Reviewed-by: Edwin Peer Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 ++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2fb9873e01626..c06c5f81f0875 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4425,6 +4425,8 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, if (!timeout) timeout = DFLT_HWRM_CMD_TIMEOUT; + /* Limit timeout to an upper limit */ + timeout = min(timeout, HWRM_CMD_MAX_TIMEOUT); /* convert timeout to usec */ timeout *= 1000; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 867b1d3a134e3..cbb338baab07e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -656,6 +656,7 @@ struct nqe_cn { #define BNXT_HWRM_MAX_REQ_LEN (bp->hwrm_max_req_len) #define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input) #define DFLT_HWRM_CMD_TIMEOUT 500 +#define HWRM_CMD_MAX_TIMEOUT 40000 #define SHORT_HWRM_CMD_TIMEOUT 20 #define HWRM_CMD_TIMEOUT (bp->hwrm_cmd_timeout) #define HWRM_RESET_TIMEOUT ((HWRM_CMD_TIMEOUT) * 4) -- GitLab From a44daa8fcbcf572545c4c1a7908b3fbb38388048 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Mon, 25 Jan 2021 02:08:13 -0500 Subject: [PATCH 1463/2012] bnxt_en: log firmware debug notifications Firmware is capable of generating asynchronous debug notifications. The event data is opaque to the driver and is simply logged. Debug notifications can be enabled by turning on hardware status messages using the ethtool msglvl interface. Reviewed-by: Pavan Chebbi Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c06c5f81f0875..c8c25f7644aef 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -255,6 +255,7 @@ static const u16 bnxt_async_events_arr[] = { ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE, ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY, ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY, + ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION, ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG, }; @@ -2072,6 +2073,13 @@ static int bnxt_async_event_process(struct bnxt *bp, bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); goto async_event_process_exit; } + case ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION: + if (netif_msg_hw(bp)) { + netdev_notice(bp->dev, + "Received firmware debug notification, data1: 0x%x, data2: 0x%x\n", + data1, data2); + } + goto async_event_process_exit; case ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG: { struct bnxt_rx_ring_info *rxr; u16 grp_idx; -- GitLab From 6882c36cf82ebb210f3977be7a3a0be0c64a44cb Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Mon, 25 Jan 2021 02:08:14 -0500 Subject: [PATCH 1464/2012] bnxt_en: attempt to reinitialize after aborted reset Drawing a hard line on aborted resets prevents a NIC open in some scenarios that may otherwise be recoverable. For example, if a firmware recovery happened while a PF was down and an attempt was made to bring up an associated VF in this state, then it was impossible to ever bring up this VF without a rebind or reload of its driver. Attempt to reinitialize the firmware when an aborted reset (or failed init after a reset) is discovered during open - it may succeed. Also take care to allow the user to retry opening the NIC even after an aborted reset. Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 29 +++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c8c25f7644aef..7f30a9fee0c88 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9768,6 +9768,25 @@ static void bnxt_preset_reg_win(struct bnxt *bp) static int bnxt_init_dflt_ring_mode(struct bnxt *bp); +static int bnxt_reinit_after_abort(struct bnxt *bp) +{ + int rc; + + if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) + return -EBUSY; + + rc = bnxt_fw_init_one(bp); + if (!rc) { + bnxt_clear_int_mode(bp); + rc = bnxt_init_int_mode(bp); + if (!rc) { + clear_bit(BNXT_STATE_ABORT_ERR, &bp->state); + set_bit(BNXT_STATE_FW_RESET_DET, &bp->state); + } + } + return rc; +} + static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { int rc = 0; @@ -9926,8 +9945,14 @@ static int bnxt_open(struct net_device *dev) int rc; if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { - netdev_err(bp->dev, "A previous firmware reset did not complete, aborting\n"); - return -ENODEV; + rc = bnxt_reinit_after_abort(bp); + if (rc) { + if (rc == -EBUSY) + netdev_err(bp->dev, "A previous firmware reset has not completed, aborting\n"); + else + netdev_err(bp->dev, "Failed to reinitialize after aborted firmware reset\n"); + return -ENODEV; + } } rc = bnxt_hwrm_if_change(bp, true); -- GitLab From 5d06eb5cb1f9da393eb47b8948d4367e69e48a62 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 25 Jan 2021 02:08:15 -0500 Subject: [PATCH 1465/2012] bnxt_en: Retry open if firmware is in reset. Firmware may be in the middle of reset when the driver tries to do ifup. In that case, firmware will return a special error code and the driver will retry 10 times with 50 msecs delay after each retry. Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 15 +++++++++++++-- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 7f30a9fee0c88..c35a5d497c1e8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9411,8 +9411,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr; struct hwrm_func_drv_if_change_input req = {0}; bool resc_reinit = false, fw_reset = false; + int rc, retry = 0; u32 flags = 0; - int rc; if (!(bp->fw_cap & BNXT_FW_CAP_IF_CHANGE)) return 0; @@ -9421,10 +9421,21 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) if (up) req.flags = cpu_to_le32(FUNC_DRV_IF_CHANGE_REQ_FLAGS_UP); mutex_lock(&bp->hwrm_cmd_lock); - rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + while (retry < BNXT_FW_IF_RETRY) { + rc = _hwrm_send_message(bp, &req, sizeof(req), + HWRM_CMD_TIMEOUT); + if (rc != -EAGAIN) + break; + + msleep(50); + retry++; + } if (!rc) flags = le32_to_cpu(resp->flags); mutex_unlock(&bp->hwrm_cmd_lock); + + if (rc == -EAGAIN) + return rc; if (rc && up) { rc = bnxt_try_recover_fw(bp); fw_reset = true; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index cbb338baab07e..bd36f00ef28ca 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1554,6 +1554,7 @@ struct bnxt_fw_reporter_ctx { BNXT_FW_STATUS_HEALTHY) #define BNXT_FW_RETRY 5 +#define BNXT_FW_IF_RETRY 10 struct bnxt { void __iomem *bar0; -- GitLab From 339eeb4bd9e477141280e46ea9433f3a10b54699 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Jan 2021 02:08:16 -0500 Subject: [PATCH 1466/2012] bnxt_en: Add bnxt_fw_reset_timeout() helper. This code to check if we have reached the maximum wait time after firmware reset is used multiple times. Add a helper function to do this. Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c35a5d497c1e8..98caac9fbdee7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11503,6 +11503,12 @@ static void bnxt_reset_all(struct bnxt *bp) bp->fw_reset_timestamp = jiffies; } +static bool bnxt_fw_reset_timeout(struct bnxt *bp) +{ + return time_after(jiffies, bp->fw_reset_timestamp + + (bp->fw_reset_max_dsecs * HZ / 10)); +} + static void bnxt_fw_reset_task(struct work_struct *work) { struct bnxt *bp = container_of(work, struct bnxt, fw_reset_task.work); @@ -11524,8 +11530,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->fw_reset_timestamp)); goto fw_reset_abort; } else if (n > 0) { - if (time_after(jiffies, bp->fw_reset_timestamp + - (bp->fw_reset_max_dsecs * HZ / 10))) { + if (bnxt_fw_reset_timeout(bp)) { clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); bp->fw_reset_state = 0; netdev_err(bp->dev, "Firmware reset aborted, bnxt_get_registered_vfs() returns %d\n", @@ -11554,8 +11559,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); if (!(val & BNXT_FW_STATUS_SHUTDOWN) && - !time_after(jiffies, bp->fw_reset_timestamp + - (bp->fw_reset_max_dsecs * HZ / 10))) { + !bnxt_fw_reset_timeout(bp)) { bnxt_queue_fw_reset_work(bp, HZ / 5); return; } @@ -11597,8 +11601,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->hwrm_cmd_timeout = SHORT_HWRM_CMD_TIMEOUT; rc = __bnxt_hwrm_ver_get(bp, true); if (rc) { - if (time_after(jiffies, bp->fw_reset_timestamp + - (bp->fw_reset_max_dsecs * HZ / 10))) { + if (bnxt_fw_reset_timeout(bp)) { netdev_err(bp->dev, "Firmware reset aborted\n"); goto fw_reset_abort_status; } -- GitLab From e340a5c4fbdde20fec8c16b83bce386aaad6b6eb Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Jan 2021 02:08:17 -0500 Subject: [PATCH 1467/2012] bnxt_en: Add a new BNXT_STATE_NAPI_DISABLED flag to keep track of NAPI state. Up until now, we don't need to keep track of this state because NAPI is always enabled once and disabled once during bring up and shutdown. For better error recovery in subsequent patches, we want to quiesce the device earlier during fatal error conditions. The normal shutdown sequence will disable NAPI again and the flag will prevent disabling NAPI twice. Reviewed-by: Pavan Chebbi Reviewed-by: Andy Gospodarek Reviewed-by: Edwin Peer Reviewed-by: Vasundhara Volam Signed-off-by: Michael Chan Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 +++- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 98caac9fbdee7..83846b50042a9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8836,7 +8836,8 @@ static void bnxt_disable_napi(struct bnxt *bp) { int i; - if (!bp->bnapi) + if (!bp->bnapi || + test_and_set_bit(BNXT_STATE_NAPI_DISABLED, &bp->state)) return; for (i = 0; i < bp->cp_nr_rings; i++) { @@ -8853,6 +8854,7 @@ static void bnxt_enable_napi(struct bnxt *bp) { int i; + clear_bit(BNXT_STATE_NAPI_DISABLED, &bp->state); for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; struct bnxt_cp_ring_info *cpr; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index bd36f00ef28ca..4ef6888acdc6d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1809,6 +1809,7 @@ struct bnxt { #define BNXT_STATE_FW_FATAL_COND 6 #define BNXT_STATE_DRV_REGISTERED 7 #define BNXT_STATE_PCI_CHANNEL_IO_FROZEN 8 +#define BNXT_STATE_NAPI_DISABLED 9 #define BNXT_NO_FW_ACCESS(bp) \ (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \ -- GitLab From 38290e37297087f7ea3ef7904b8f185d77c42976 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Jan 2021 02:08:18 -0500 Subject: [PATCH 1468/2012] bnxt_en: Modify bnxt_disable_int_sync() to be called more than once. In the event of a fatal firmware error, we want to disable IRQ early in the recovery sequence. This change will allow it to be called safely again as part of the normal shutdown sequence. Reviewed-by: Edwin Peer Reviewed-by: Vasundhara Volam Signed-off-by: Michael Chan Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 83846b50042a9..80dab4e622abb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4280,6 +4280,9 @@ static void bnxt_disable_int_sync(struct bnxt *bp) { int i; + if (!bp->irq_tbl) + return; + atomic_inc(&bp->intr_sem); bnxt_disable_int(bp); -- GitLab From 4f036b2e75986946117237a6baddc489dd2b3c34 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Jan 2021 02:08:19 -0500 Subject: [PATCH 1469/2012] bnxt_en: Improve firmware fatal error shutdown sequence. In the event of a fatal firmware error, firmware will notify the host and then it will proceed to do core reset when it sees that all functions have disabled Bus Master. To prevent Master Aborts and other hard errors, we need to quiesce all activities in addition to disabling Bus Master before the chip goes into core reset. Reviewed-by: Edwin Peer Reviewed-by: Vasundhara Volam Signed-off-by: Michael Chan Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 80dab4e622abb..e7abb3b7ed68c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10905,11 +10905,18 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) static void bnxt_fw_reset_close(struct bnxt *bp) { bnxt_ulp_stop(bp); - /* When firmware is fatal state, disable PCI device to prevent - * any potential bad DMAs before freeing kernel memory. + /* When firmware is in fatal state, quiesce device and disable + * bus master to prevent any potential bad DMAs before freeing + * kernel memory. */ - if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) + if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) { + bnxt_tx_disable(bp); + bnxt_disable_napi(bp); + bnxt_disable_int_sync(bp); + bnxt_free_irq(bp); + bnxt_clear_int_mode(bp); pci_disable_device(bp->pdev); + } __bnxt_close_nic(bp, true, false); bnxt_clear_int_mode(bp); bnxt_hwrm_func_drv_unrgtr(bp); -- GitLab From 5863b10aa86a5f5f69a25b55a5c15806c834471a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Jan 2021 02:08:20 -0500 Subject: [PATCH 1470/2012] bnxt_en: Consolidate firmware reset event logging. Combine the three netdev_warn() calls into a single call, printed at the NETIF_MSG_HW log level. Reviewed-by: Vasundhara Volam Signed-off-by: Michael Chan Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e7abb3b7ed68c..221f5437884b0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2022,10 +2022,9 @@ static int bnxt_async_event_process(struct bnxt *bp, goto async_event_process_exit; set_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event); break; - case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: - if (netif_msg_hw(bp)) - netdev_warn(bp->dev, "Received RESET_NOTIFY event, data1: 0x%x, data2: 0x%x\n", - data1, data2); + case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: { + char *fatal_str = "non-fatal"; + if (!bp->fw_health) goto async_event_process_exit; @@ -2037,14 +2036,18 @@ static int bnxt_async_event_process(struct bnxt *bp, if (!bp->fw_reset_max_dsecs) bp->fw_reset_max_dsecs = BNXT_DFLT_FW_RST_MAX_DSECS; if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) { - netdev_warn(bp->dev, "Firmware fatal reset event received\n"); + fatal_str = "fatal"; set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); - } else { - netdev_warn(bp->dev, "Firmware non-fatal reset event received, max wait time %d msec\n", + } + if (netif_msg_hw(bp)) { + netdev_warn(bp->dev, "Firmware %s reset event, data1: 0x%x, data2: 0x%x, min wait %u ms, max wait %u ms\n", + fatal_str, data1, data2, + bp->fw_reset_min_dsecs * 100, bp->fw_reset_max_dsecs * 100); } set_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event); break; + } case ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY: { struct bnxt_fw_health *fw_health = bp->fw_health; -- GitLab From 0da65f4932cee9f9698a2e1493d22b27c91841c9 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Jan 2021 02:08:21 -0500 Subject: [PATCH 1471/2012] bnxt_en: Do not process completion entries after fatal condition detected. Once the firmware fatal condition is detected, we should cease comminication with the firmware and hardware quickly even if there are many completion entries in the completion rings. This will speed up the recovery process and prevent further I/Os that may cause further exceptions. Do not proceed in the NAPI poll function if fatal condition is detected. Call napi_complete() and return without arming interrupts. Cleanup of all rings and reset are imminent. Reviewed-by: Pavan Chebbi Reviewed-by: Vasundhara Volam Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 221f5437884b0..dd7d2caa57a21 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2405,6 +2405,10 @@ static int bnxt_poll(struct napi_struct *napi, int budget) struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; int work_done = 0; + if (unlikely(test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))) { + napi_complete(napi); + return 0; + } while (1) { work_done += bnxt_poll_work(bp, cpr, budget - work_done); @@ -2479,6 +2483,10 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget) int work_done = 0; u32 cons; + if (unlikely(test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))) { + napi_complete(napi); + return 0; + } if (cpr->has_more_work) { cpr->has_more_work = 0; work_done = __bnxt_poll_cqs(bp, bnapi, budget); -- GitLab From afe197f44e646f13544299f516139b13327a849f Mon Sep 17 00:00:00 2001 From: wengjianfeng Date: Sat, 23 Jan 2021 15:48:35 +0800 Subject: [PATCH 1472/2012] nfc: fdp: fix typo issue change 'paquet' to 'packet' Signed-off-by: wengjianfeng Link: https://lore.kernel.org/r/20210123074835.9448-1-samirweng1979@163.com Signed-off-by: Jakub Kicinski --- drivers/nfc/fdp/i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index ad0abb1f0bae9..adaa1a7147f95 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -155,7 +155,7 @@ static int fdp_nci_i2c_read(struct fdp_i2c_phy *phy, struct sk_buff **skb) /* * LRC check failed. This may due to transmission error or - * desynchronization between driver and FDP. Drop the paquet + * desynchronization between driver and FDP. Drop the packet * and force resynchronization */ if (lrc) { -- GitLab From 02c26940908fd31bb112e9742adedfb06eca19e1 Mon Sep 17 00:00:00 2001 From: wengjianfeng Date: Sat, 23 Jan 2021 16:25:50 +0800 Subject: [PATCH 1473/2012] nfc: fix typo change 'regster' to 'register' Signed-off-by: wengjianfeng Acked-by: Mark Greer Link: https://lore.kernel.org/r/20210123082550.3748-1-samirweng1979@163.com Signed-off-by: Jakub Kicinski --- drivers/nfc/trf7970a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index c70f62fe321eb..33978022ae475 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -169,7 +169,7 @@ /* Bits determining whether its a direct command or register R/W, * whether to use a continuous SPI transaction or not, and the actual - * direct cmd opcode or regster address. + * direct cmd opcode or register address. */ #define TRF7970A_CMD_BIT_CTRL BIT(7) #define TRF7970A_CMD_BIT_RW BIT(6) -- GitLab From 0549cd67b01016b579047bce045b386202a8bcfc Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 19 Jan 2021 11:57:27 +0100 Subject: [PATCH 1474/2012] xen-blkfront: allow discard-* nodes to be optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is inline with the specification described in blkif.h: * discard-granularity: should be set to the physical block size if node is not present. * discard-alignment, discard-secure: should be set to 0 if node not present. This was detected as QEMU would only create the discard-granularity node but not discard-alignment, and thus the setup done in blkfront_setup_discard would fail. Fix blkfront_setup_discard to not fail on missing nodes, and also fix blkif_set_queue_limits to set the discard granularity to the physical block size if none is specified in xenbus. Fixes: ed30bf317c5ce ('xen-blkfront: Handle discard requests.') Reported-by: Arthur Borsboom Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Tested-By: Arthur Borsboom Link: https://lore.kernel.org/r/20210119105727.95173-1-roger.pau@citrix.com Signed-off-by: Juergen Gross --- drivers/block/xen-blkfront.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 34b028be78ab1..1ee9545ea2f11 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -945,7 +945,8 @@ static void blkif_set_queue_limits(struct blkfront_info *info) if (info->feature_discard) { blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq); blk_queue_max_discard_sectors(rq, get_capacity(gd)); - rq->limits.discard_granularity = info->discard_granularity; + rq->limits.discard_granularity = info->discard_granularity ?: + info->physical_sector_size; rq->limits.discard_alignment = info->discard_alignment; if (info->feature_secdiscard) blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq); @@ -2179,19 +2180,12 @@ static void blkfront_closing(struct blkfront_info *info) static void blkfront_setup_discard(struct blkfront_info *info) { - int err; - unsigned int discard_granularity; - unsigned int discard_alignment; - info->feature_discard = 1; - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "discard-granularity", "%u", &discard_granularity, - "discard-alignment", "%u", &discard_alignment, - NULL); - if (!err) { - info->discard_granularity = discard_granularity; - info->discard_alignment = discard_alignment; - } + info->discard_granularity = xenbus_read_unsigned(info->xbdev->otherend, + "discard-granularity", + 0); + info->discard_alignment = xenbus_read_unsigned(info->xbdev->otherend, + "discard-alignment", 0); info->feature_secdiscard = !!xenbus_read_unsigned(info->xbdev->otherend, "discard-secure", 0); -- GitLab From 179e8e47c02a1950f1c556f2b854bdb2259078fb Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Thu, 21 Jan 2021 10:46:49 -0800 Subject: [PATCH 1475/2012] HID: wacom: Correct NULL dereference on AES pen proximity The recent commit to fix a memory leak introduced an inadvertant NULL pointer dereference. The `wacom_wac->pen_fifo` variable was never intialized, resuling in a crash whenever functions tried to use it. Since the FIFO is only used by AES pens (to buffer events from pen proximity until the hardware reports the pen serial number) this would have been easily overlooked without testing an AES device. This patch converts `wacom_wac->pen_fifo` over to a pointer (since the call to `devres_alloc` allocates memory for us) and ensures that we assign it to point to the allocated and initalized `pen_fifo` before the function returns. Link: https://github.com/linuxwacom/input-wacom/issues/230 Fixes: 37309f47e2f5 ("HID: wacom: Fix memory leakage caused by kfifo_alloc") CC: stable@vger.kernel.org # v4.19+ Signed-off-by: Jason Gerecke Tested-by: Ping Cheng Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 7 ++++--- drivers/hid/wacom_wac.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index e8acd235db2a9..aa9e48876ceda 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -147,9 +147,9 @@ static int wacom_wac_pen_serial_enforce(struct hid_device *hdev, } if (flush) - wacom_wac_queue_flush(hdev, &wacom_wac->pen_fifo); + wacom_wac_queue_flush(hdev, wacom_wac->pen_fifo); else if (insert) - wacom_wac_queue_insert(hdev, &wacom_wac->pen_fifo, + wacom_wac_queue_insert(hdev, wacom_wac->pen_fifo, raw_data, report_size); return insert && !flush; @@ -1280,7 +1280,7 @@ static void wacom_devm_kfifo_release(struct device *dev, void *res) static int wacom_devm_kfifo_alloc(struct wacom *wacom) { struct wacom_wac *wacom_wac = &wacom->wacom_wac; - struct kfifo_rec_ptr_2 *pen_fifo = &wacom_wac->pen_fifo; + struct kfifo_rec_ptr_2 *pen_fifo; int error; pen_fifo = devres_alloc(wacom_devm_kfifo_release, @@ -1297,6 +1297,7 @@ static int wacom_devm_kfifo_alloc(struct wacom *wacom) } devres_add(&wacom->hdev->dev, pen_fifo); + wacom_wac->pen_fifo = pen_fifo; return 0; } diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h index da612b6e9c779..195910dd2154e 100644 --- a/drivers/hid/wacom_wac.h +++ b/drivers/hid/wacom_wac.h @@ -342,7 +342,7 @@ struct wacom_wac { struct input_dev *pen_input; struct input_dev *touch_input; struct input_dev *pad_input; - struct kfifo_rec_ptr_2 pen_fifo; + struct kfifo_rec_ptr_2 *pen_fifo; int pid; int num_contacts_left; u8 bt_features; -- GitLab From a05829a7222e9d10c416dd2dbbf3929fe6646b89 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 Jan 2021 16:19:43 +0100 Subject: [PATCH 1476/2012] cfg80211: avoid holding the RTNL when calling the driver Currently, _everything_ in cfg80211 holds the RTNL, and if you have a slow USB device (or a few) you can get some bad lock contention on that. Fix that by re-adding a mutex to each wiphy/rdev as we had at some point, so we have locking for the wireless_dev lists and all the other things in there, and also so that drivers still don't have to worry too much about it (they still won't get parallel calls for a single device). Then, we can restrict the RTNL to a few cases where we add or remove interfaces and really need the added protection. Some of the global list management still also uses the RTNL, since we need to have it anyway for netdev management, but we only hold the RTNL for very short periods of time here. Link: https://lore.kernel.org/r/20210122161942.81df9f5e047a.I4a8e1a60b18863ea8c5e6d3a0faeafb2d45b2f40@changeid Tested-by: Marek Szyprowski [marvell driver issues] Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/reg.c | 4 +- drivers/net/wireless/ath/ath6kl/core.c | 2 + drivers/net/wireless/ath/ath6kl/init.c | 2 + drivers/net/wireless/ath/wil6210/cfg80211.c | 2 + drivers/net/wireless/ath/wil6210/netdev.c | 7 +- drivers/net/wireless/ath/wil6210/pcie_bus.c | 2 + .../broadcom/brcm80211/brcmfmac/core.c | 18 +- .../broadcom/brcm80211/brcmfmac/core.h | 6 +- .../broadcom/brcm80211/brcmfmac/p2p.c | 12 +- drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 2 +- .../net/wireless/intel/iwlwifi/mvm/mac80211.c | 4 +- drivers/net/wireless/intel/iwlwifi/mvm/nvm.c | 2 +- .../net/wireless/marvell/mwifiex/cfg80211.c | 6 +- drivers/net/wireless/marvell/mwifiex/main.c | 7 + drivers/net/wireless/quantenna/qtnfmac/core.c | 3 +- drivers/net/wireless/virt_wifi.c | 8 + include/net/cfg80211.h | 104 ++- include/net/mac80211.h | 10 +- net/mac80211/iface.c | 14 +- net/mac80211/key.c | 4 +- net/mac80211/main.c | 5 + net/mac80211/pm.c | 6 +- net/mac80211/tdls.c | 6 +- net/mac80211/util.c | 14 +- net/wireless/chan.c | 5 +- net/wireless/core.c | 46 +- net/wireless/core.h | 2 +- net/wireless/debugfs.c | 4 - net/wireless/ibss.c | 3 +- net/wireless/mlme.c | 6 +- net/wireless/nl80211.c | 657 ++++++++++-------- net/wireless/reg.c | 91 ++- net/wireless/reg.h | 1 - net/wireless/scan.c | 35 +- net/wireless/sme.c | 5 +- net/wireless/sysfs.c | 5 + net/wireless/util.c | 4 +- net/wireless/wext-compat.c | 271 ++++++-- net/wireless/wext-sme.c | 4 +- 39 files changed, 880 insertions(+), 509 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/reg.c b/drivers/net/wireless/ath/ath11k/reg.c index b876fec7fa1b1..e1a1df169034b 100644 --- a/drivers/net/wireless/ath/ath11k/reg.c +++ b/drivers/net/wireless/ath/ath11k/reg.c @@ -247,7 +247,9 @@ int ath11k_regd_update(struct ath11k *ar, bool init) } rtnl_lock(); - ret = regulatory_set_wiphy_regd_sync_rtnl(ar->hw->wiphy, regd_copy); + wiphy_lock(ar->hw->wiphy); + ret = regulatory_set_wiphy_regd_sync(ar->hw->wiphy, regd_copy); + wiphy_unlock(ar->hw->wiphy); rtnl_unlock(); kfree(regd_copy); diff --git a/drivers/net/wireless/ath/ath6kl/core.c b/drivers/net/wireless/ath/ath6kl/core.c index ebb9f163710fa..4f0a7a185fc91 100644 --- a/drivers/net/wireless/ath/ath6kl/core.c +++ b/drivers/net/wireless/ath/ath6kl/core.c @@ -212,11 +212,13 @@ int ath6kl_core_init(struct ath6kl *ar, enum ath6kl_htc_type htc_type) ar->avail_idx_map |= BIT(i); rtnl_lock(); + wiphy_lock(ar->wiphy); /* Add an initial station interface */ wdev = ath6kl_interface_add(ar, "wlan%d", NET_NAME_ENUM, NL80211_IFTYPE_STATION, 0, INFRA_NETWORK); + wiphy_unlock(ar->wiphy); rtnl_unlock(); if (!wdev) { diff --git a/drivers/net/wireless/ath/ath6kl/init.c b/drivers/net/wireless/ath/ath6kl/init.c index 39bf196861751..9b5c7d8f2b95e 100644 --- a/drivers/net/wireless/ath/ath6kl/init.c +++ b/drivers/net/wireless/ath/ath6kl/init.c @@ -1904,7 +1904,9 @@ void ath6kl_stop_txrx(struct ath6kl *ar) spin_unlock_bh(&ar->list_lock); ath6kl_cfg80211_vif_stop(vif, test_bit(WMI_READY, &ar->flag)); rtnl_lock(); + wiphy_lock(ar->wiphy); ath6kl_cfg80211_vif_cleanup(vif); + wiphy_unlock(ar->wiphy); rtnl_unlock(); spin_lock_bh(&ar->list_lock); } diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index 1c42410d68e1a..60bba5b491e0f 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -2820,7 +2820,9 @@ void wil_p2p_wdev_free(struct wil6210_priv *wil) wil->radio_wdev = wil->main_ndev->ieee80211_ptr; mutex_unlock(&wil->vif_mutex); if (p2p_wdev) { + wiphy_lock(wil->wiphy); cfg80211_unregister_wdev(p2p_wdev); + wiphy_unlock(wil->wiphy); kfree(p2p_wdev); } } diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c index 472fe804203d2..0913f0bf60e79 100644 --- a/drivers/net/wireless/ath/wil6210/netdev.c +++ b/drivers/net/wireless/ath/wil6210/netdev.c @@ -473,7 +473,9 @@ int wil_if_add(struct wil6210_priv *wil) wil_update_net_queues_bh(wil, vif, NULL, true); rtnl_lock(); + wiphy_lock(wiphy); rc = wil_vif_add(wil, vif); + wiphy_unlock(wiphy); rtnl_unlock(); if (rc < 0) goto out_wiphy; @@ -543,15 +545,18 @@ void wil_if_remove(struct wil6210_priv *wil) { struct net_device *ndev = wil->main_ndev; struct wireless_dev *wdev = ndev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; wil_dbg_misc(wil, "if_remove\n"); rtnl_lock(); + wiphy_lock(wiphy); wil_vif_remove(wil, 0); + wiphy_unlock(wiphy); rtnl_unlock(); netif_napi_del(&wil->napi_tx); netif_napi_del(&wil->napi_rx); - wiphy_unregister(wdev->wiphy); + wiphy_unregister(wiphy); } diff --git a/drivers/net/wireless/ath/wil6210/pcie_bus.c b/drivers/net/wireless/ath/wil6210/pcie_bus.c index c174323c5c0b4..ce40d94909ada 100644 --- a/drivers/net/wireless/ath/wil6210/pcie_bus.c +++ b/drivers/net/wireless/ath/wil6210/pcie_bus.c @@ -473,8 +473,10 @@ static void wil_pcie_remove(struct pci_dev *pdev) wil6210_debugfs_remove(wil); rtnl_lock(); + wiphy_lock(wil->wiphy); wil_p2p_wdev_free(wil); wil_remove_all_additional_vifs(wil); + wiphy_unlock(wil->wiphy); rtnl_unlock(); wil_if_remove(wil); wil_if_pcie_disable(wil); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 6cf308d5934c9..ea78fe527c5dc 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -633,7 +633,7 @@ static const struct net_device_ops brcmf_netdev_ops_pri = { .ndo_set_rx_mode = brcmf_netdev_set_multicast_list }; -int brcmf_net_attach(struct brcmf_if *ifp, bool rtnl_locked) +int brcmf_net_attach(struct brcmf_if *ifp, bool locked) { struct brcmf_pub *drvr = ifp->drvr; struct net_device *ndev; @@ -656,7 +656,7 @@ int brcmf_net_attach(struct brcmf_if *ifp, bool rtnl_locked) INIT_WORK(&ifp->multicast_work, _brcmf_set_multicast_list); INIT_WORK(&ifp->ndoffload_work, _brcmf_update_ndtable); - if (rtnl_locked) + if (locked) err = cfg80211_register_netdevice(ndev); else err = register_netdev(ndev); @@ -677,10 +677,10 @@ fail: return -EBADE; } -void brcmf_net_detach(struct net_device *ndev, bool rtnl_locked) +void brcmf_net_detach(struct net_device *ndev, bool locked) { if (ndev->reg_state == NETREG_REGISTERED) { - if (rtnl_locked) + if (locked) cfg80211_unregister_netdevice(ndev); else unregister_netdev(ndev); @@ -909,7 +909,7 @@ struct brcmf_if *brcmf_add_if(struct brcmf_pub *drvr, s32 bsscfgidx, s32 ifidx, } static void brcmf_del_if(struct brcmf_pub *drvr, s32 bsscfgidx, - bool rtnl_locked) + bool locked) { struct brcmf_if *ifp; int ifidx; @@ -938,7 +938,7 @@ static void brcmf_del_if(struct brcmf_pub *drvr, s32 bsscfgidx, cancel_work_sync(&ifp->multicast_work); cancel_work_sync(&ifp->ndoffload_work); } - brcmf_net_detach(ifp->ndev, rtnl_locked); + brcmf_net_detach(ifp->ndev, locked); } else { /* Only p2p device interfaces which get dynamically created * end up here. In this case the p2p module should be informed @@ -947,7 +947,7 @@ static void brcmf_del_if(struct brcmf_pub *drvr, s32 bsscfgidx, * serious troublesome side effects. The p2p module will clean * up the ifp if needed. */ - brcmf_p2p_ifp_removed(ifp, rtnl_locked); + brcmf_p2p_ifp_removed(ifp, locked); kfree(ifp); } @@ -956,14 +956,14 @@ static void brcmf_del_if(struct brcmf_pub *drvr, s32 bsscfgidx, drvr->if2bss[ifidx] = BRCMF_BSSIDX_INVALID; } -void brcmf_remove_interface(struct brcmf_if *ifp, bool rtnl_locked) +void brcmf_remove_interface(struct brcmf_if *ifp, bool locked) { if (!ifp || WARN_ON(ifp->drvr->iflist[ifp->bsscfgidx] != ifp)) return; brcmf_dbg(TRACE, "Enter, bsscfgidx=%d, ifidx=%d\n", ifp->bsscfgidx, ifp->ifidx); brcmf_proto_del_if(ifp->drvr, ifp); - brcmf_del_if(ifp->drvr, ifp->bsscfgidx, rtnl_locked); + brcmf_del_if(ifp->drvr, ifp->bsscfgidx, locked); } static int brcmf_psm_watchdog_notify(struct brcmf_if *ifp, diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h index 5767d665cee50..8212c9de14f1f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h @@ -201,16 +201,16 @@ int brcmf_netdev_wait_pend8021x(struct brcmf_if *ifp); char *brcmf_ifname(struct brcmf_if *ifp); struct brcmf_if *brcmf_get_ifp(struct brcmf_pub *drvr, int ifidx); void brcmf_configure_arp_nd_offload(struct brcmf_if *ifp, bool enable); -int brcmf_net_attach(struct brcmf_if *ifp, bool rtnl_locked); +int brcmf_net_attach(struct brcmf_if *ifp, bool locked); struct brcmf_if *brcmf_add_if(struct brcmf_pub *drvr, s32 bsscfgidx, s32 ifidx, bool is_p2pdev, const char *name, u8 *mac_addr); -void brcmf_remove_interface(struct brcmf_if *ifp, bool rtnl_locked); +void brcmf_remove_interface(struct brcmf_if *ifp, bool locked); void brcmf_txflowblock_if(struct brcmf_if *ifp, enum brcmf_netif_stop_reason reason, bool state); void brcmf_txfinalize(struct brcmf_if *ifp, struct sk_buff *txp, bool success); void brcmf_netif_rx(struct brcmf_if *ifp, struct sk_buff *skb, bool inirq); void brcmf_netif_mon_rx(struct brcmf_if *ifp, struct sk_buff *skb); -void brcmf_net_detach(struct net_device *ndev, bool rtnl_locked); +void brcmf_net_detach(struct net_device *ndev, bool locked); int brcmf_net_mon_attach(struct brcmf_if *ifp); void brcmf_net_setcarrier(struct brcmf_if *ifp, bool on); int __init brcmf_core_init(void); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index ec6fc7a150a6a..6d30a0fceceae 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -2430,7 +2430,7 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev) return err; } -void brcmf_p2p_ifp_removed(struct brcmf_if *ifp, bool rtnl_locked) +void brcmf_p2p_ifp_removed(struct brcmf_if *ifp, bool locked) { struct brcmf_cfg80211_info *cfg; struct brcmf_cfg80211_vif *vif; @@ -2439,11 +2439,15 @@ void brcmf_p2p_ifp_removed(struct brcmf_if *ifp, bool rtnl_locked) vif = ifp->vif; cfg = wdev_to_cfg(&vif->wdev); cfg->p2p.bss_idx[P2PAPI_BSSCFG_DEVICE].vif = NULL; - if (!rtnl_locked) + if (locked) { rtnl_lock(); - cfg80211_unregister_wdev(&vif->wdev); - if (!rtnl_locked) + wiphy_lock(cfg->wiphy); + cfg80211_unregister_wdev(&vif->wdev); + wiphy_unlock(cfg->wiphy); rtnl_unlock(); + } else { + cfg80211_unregister_wdev(&vif->wdev); + } brcmf_free_vif(vif); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index c025188fa9bc5..a0b7331cab317 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -2143,7 +2143,7 @@ err: out_iterate: if (!test) - ieee80211_iterate_active_interfaces_rtnl(mvm->hw, + ieee80211_iterate_active_interfaces_mtx(mvm->hw, IEEE80211_IFACE_ITER_NORMAL, iwl_mvm_d3_disconnect_iter, keep ? vif : NULL); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index da32937ba9a78..6cce72b0685b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -260,7 +260,7 @@ int iwl_mvm_init_fw_regd(struct iwl_mvm *mvm) int ret; bool changed; const struct ieee80211_regdomain *r = - rtnl_dereference(mvm->hw->wiphy->regd); + wiphy_dereference(mvm->hw->wiphy, mvm->hw->wiphy->regd); if (!r) return -ENOENT; @@ -282,7 +282,7 @@ int iwl_mvm_init_fw_regd(struct iwl_mvm *mvm) /* update cfg80211 if the regdomain was changed */ if (changed) - ret = regulatory_set_wiphy_regd_sync_rtnl(mvm->hw->wiphy, regd); + ret = regulatory_set_wiphy_regd_sync(mvm->hw->wiphy, regd); else ret = 0; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index abb8c1088c2fe..7fb4e618f76ef 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -545,7 +545,7 @@ int iwl_mvm_init_mcc(struct iwl_mvm *mvm) return -EIO; } - retval = regulatory_set_wiphy_regd_sync_rtnl(mvm->hw->wiphy, regd); + retval = regulatory_set_wiphy_regd_sync(mvm->hw->wiphy, regd); kfree(regd); return retval; } diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 15e1cee7f465d..5553df9132901 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -2097,7 +2097,7 @@ mwifiex_cfg80211_disconnect(struct wiphy *wiphy, struct net_device *dev, struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); if (!mwifiex_stop_bg_scan(priv)) - cfg80211_sched_scan_stopped_rtnl(priv->wdev.wiphy, 0); + cfg80211_sched_scan_stopped_locked(priv->wdev.wiphy, 0); if (mwifiex_deauthenticate(priv, NULL)) return -EFAULT; @@ -2366,7 +2366,7 @@ mwifiex_cfg80211_connect(struct wiphy *wiphy, struct net_device *dev, (int)sme->ssid_len, (char *)sme->ssid, sme->bssid); if (!mwifiex_stop_bg_scan(priv)) - cfg80211_sched_scan_stopped_rtnl(priv->wdev.wiphy, 0); + cfg80211_sched_scan_stopped_locked(priv->wdev.wiphy, 0); ret = mwifiex_cfg80211_assoc(priv, sme->ssid_len, sme->ssid, sme->bssid, priv->bss_mode, sme->channel, sme, 0); @@ -2576,7 +2576,7 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy, priv->scan_block = false; if (!mwifiex_stop_bg_scan(priv)) - cfg80211_sched_scan_stopped_rtnl(priv->wdev.wiphy, 0); + cfg80211_sched_scan_stopped_locked(priv->wdev.wiphy, 0); user_scan_cfg = kzalloc(sizeof(*user_scan_cfg), GFP_KERNEL); if (!user_scan_cfg) diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index ee52fb839ef77..529dfd8b7ae85 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -598,12 +598,14 @@ static int _mwifiex_fw_dpc(const struct firmware *firmware, void *context) } rtnl_lock(); + wiphy_lock(adapter->wiphy); /* Create station interface by default */ wdev = mwifiex_add_virtual_intf(adapter->wiphy, "mlan%d", NET_NAME_ENUM, NL80211_IFTYPE_STATION, NULL); if (IS_ERR(wdev)) { mwifiex_dbg(adapter, ERROR, "cannot create default STA interface\n"); + wiphy_unlock(adapter->wiphy); rtnl_unlock(); goto err_add_intf; } @@ -614,6 +616,7 @@ static int _mwifiex_fw_dpc(const struct firmware *firmware, void *context) if (IS_ERR(wdev)) { mwifiex_dbg(adapter, ERROR, "cannot create AP interface\n"); + wiphy_unlock(adapter->wiphy); rtnl_unlock(); goto err_add_intf; } @@ -625,10 +628,12 @@ static int _mwifiex_fw_dpc(const struct firmware *firmware, void *context) if (IS_ERR(wdev)) { mwifiex_dbg(adapter, ERROR, "cannot create p2p client interface\n"); + wiphy_unlock(adapter->wiphy); rtnl_unlock(); goto err_add_intf; } } + wiphy_unlock(adapter->wiphy); rtnl_unlock(); mwifiex_drv_get_driver_version(adapter, fmt, sizeof(fmt) - 1); @@ -1440,9 +1445,11 @@ static void mwifiex_uninit_sw(struct mwifiex_adapter *adapter) if (!priv) continue; rtnl_lock(); + wiphy_lock(adapter->wiphy); if (priv->netdev && priv->wdev.iftype != NL80211_IFTYPE_UNSPECIFIED) mwifiex_del_virtual_intf(adapter->wiphy, &priv->wdev); + wiphy_unlock(adapter->wiphy); rtnl_unlock(); } diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c index 18964e2a9f281..b4dd60b2ebc90 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.c +++ b/drivers/net/wireless/quantenna/qtnfmac/core.c @@ -611,8 +611,9 @@ static int qtnf_core_mac_attach(struct qtnf_bus *bus, unsigned int macid) mac->wiphy_registered = 1; rtnl_lock(); - + wiphy_lock(priv_to_wiphy(mac)); ret = qtnf_core_net_attach(mac, vif, "wlan%d", NET_NAME_ENUM); + wiphy_unlock(priv_to_wiphy(mac)); rtnl_unlock(); if (ret) { diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c index c878097f0ddaf..4b455a4ae15b8 100644 --- a/drivers/net/wireless/virt_wifi.c +++ b/drivers/net/wireless/virt_wifi.c @@ -537,7 +537,9 @@ static int virt_wifi_newlink(struct net *src_net, struct net_device *dev, dev->ieee80211_ptr->iftype = NL80211_IFTYPE_STATION; dev->ieee80211_ptr->wiphy = common_wiphy; + wiphy_lock(common_wiphy); err = register_netdevice(dev); + wiphy_unlock(common_wiphy); if (err) { dev_err(&priv->lowerdev->dev, "can't register_netdevice: %d\n", err); @@ -560,7 +562,9 @@ static int virt_wifi_newlink(struct net *src_net, struct net_device *dev, return 0; unregister_netdev: + wiphy_lock(common_wiphy); unregister_netdevice(dev); + wiphy_unlock(common_wiphy); free_wireless_dev: kfree(dev->ieee80211_ptr); dev->ieee80211_ptr = NULL; @@ -586,7 +590,9 @@ static void virt_wifi_dellink(struct net_device *dev, netdev_rx_handler_unregister(priv->lowerdev); netdev_upper_dev_unlink(priv->lowerdev, dev); + wiphy_lock(common_wiphy); unregister_netdevice_queue(dev, head); + wiphy_unlock(common_wiphy); module_put(THIS_MODULE); /* Deleting the wiphy is handled in the module destructor. */ @@ -625,7 +631,9 @@ static int virt_wifi_event(struct notifier_block *this, unsigned long event, upper_dev = priv->upperdev; upper_dev->rtnl_link_ops->dellink(upper_dev, &list_kill); + wiphy_lock(common_wiphy); unregister_netdevice_many(&list_kill); + wiphy_unlock(common_wiphy); break; } diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e7703fdbac8db..4741d71ead215 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3632,9 +3632,10 @@ struct mgmt_frame_regs { * All callbacks except where otherwise noted should return 0 * on success or a negative error code. * - * All operations are currently invoked under rtnl for consistency with the - * wireless extensions but this is subject to reevaluation as soon as this - * code is used more widely and we have a first user without wext. + * All operations are invoked with the wiphy mutex held. The RTNL may be + * held in addition (due to wireless extensions) but this cannot be relied + * upon except in cases where documented below. Note that due to ordering, + * the RTNL also cannot be acquired in any handlers. * * @suspend: wiphy device needs to be suspended. The variable @wow will * be %NULL or contain the enabled Wake-on-Wireless triggers that are @@ -3649,11 +3650,14 @@ struct mgmt_frame_regs { * the new netdev in the wiphy's network namespace! Returns the struct * wireless_dev, or an ERR_PTR. For P2P device wdevs, the driver must * also set the address member in the wdev. + * This additionally holds the RTNL to be able to do netdev changes. * * @del_virtual_intf: remove the virtual interface + * This additionally holds the RTNL to be able to do netdev changes. * * @change_virtual_intf: change type/configuration of virtual interface, * keep the struct wireless_dev's iftype updated. + * This additionally holds the RTNL to be able to do netdev changes. * * @add_key: add a key with the given parameters. @mac_addr will be %NULL * when adding a group key. @@ -4743,6 +4747,7 @@ struct wiphy_iftype_akm_suites { /** * struct wiphy - wireless hardware description + * @mtx: mutex for the data (structures) of this device * @reg_notifier: the driver's regulatory notification callback, * note that if your driver uses wiphy_apply_custom_regulatory() * the reg_notifier's request can be passed as NULL @@ -4936,6 +4941,8 @@ struct wiphy_iftype_akm_suites { * @sar_capa: SAR control capabilities */ struct wiphy { + struct mutex mtx; + /* assign these fields before you register the wiphy */ u8 perm_addr[ETH_ALEN]; @@ -5188,6 +5195,37 @@ static inline struct wiphy *wiphy_new(const struct cfg80211_ops *ops, */ int wiphy_register(struct wiphy *wiphy); +/* this is a define for better error reporting (file/line) */ +#define lockdep_assert_wiphy(wiphy) lockdep_assert_held(&(wiphy)->mtx) + +/** + * rcu_dereference_wiphy - rcu_dereference with debug checking + * @wiphy: the wiphy to check the locking on + * @p: The pointer to read, prior to dereferencing + * + * Do an rcu_dereference(p), but check caller either holds rcu_read_lock() + * or RTNL. Note: Please prefer wiphy_dereference() or rcu_dereference(). + */ +#define rcu_dereference_wiphy(wiphy, p) \ + rcu_dereference_check(p, lockdep_is_held(&wiphy->mtx)) + +/** + * wiphy_dereference - fetch RCU pointer when updates are prevented by wiphy mtx + * @wiphy: the wiphy to check the locking on + * @p: The pointer to read, prior to dereferencing + * + * Return the value of the specified RCU-protected pointer, but omit the + * READ_ONCE(), because caller holds the wiphy mutex used for updates. + */ +#define wiphy_dereference(wiphy, p) \ + rcu_dereference_protected(p, lockdep_is_held(&wiphy->mtx)) + +/** + * get_wiphy_regdom - get custom regdomain for the given wiphy + * @wiphy: the wiphy to get the regdomain from + */ +const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy); + /** * wiphy_unregister - deregister a wiphy from cfg80211 * @@ -5212,6 +5250,35 @@ struct cfg80211_internal_bss; struct cfg80211_cached_keys; struct cfg80211_cqm_config; +/** + * wiphy_lock - lock the wiphy + * @wiphy: the wiphy to lock + * + * This is mostly exposed so it can be done around registering and + * unregistering netdevs that aren't created through cfg80211 calls, + * since that requires locking in cfg80211 when the notifiers is + * called, but that cannot differentiate which way it's called. + * + * When cfg80211 ops are called, the wiphy is already locked. + */ +static inline void wiphy_lock(struct wiphy *wiphy) + __acquires(&wiphy->mtx) +{ + mutex_lock(&wiphy->mtx); + __acquire(&wiphy->mtx); +} + +/** + * wiphy_unlock - unlock the wiphy again + * @wiphy: the wiphy to unlock + */ +static inline void wiphy_unlock(struct wiphy *wiphy) + __releases(&wiphy->mtx) +{ + __release(&wiphy->mtx); + mutex_unlock(&wiphy->mtx); +} + /** * struct wireless_dev - wireless device state * @@ -5219,7 +5286,10 @@ struct cfg80211_cqm_config; * that uses the ieee80211_ptr field in struct net_device (this * is intentional so it can be allocated along with the netdev.) * It need not be registered then as netdev registration will - * be intercepted by cfg80211 to see the new wireless device. + * be intercepted by cfg80211 to see the new wireless device, + * however, drivers must lock the wiphy before registering or + * unregistering netdevs if they pre-create any netdevs (in ops + * called from cfg80211, the wiphy is already locked.) * * For non-netdev uses, it must also be allocated by the driver * in response to the cfg80211 callbacks that require it, as @@ -5981,18 +6051,18 @@ int regulatory_set_wiphy_regd(struct wiphy *wiphy, struct ieee80211_regdomain *rd); /** - * regulatory_set_wiphy_regd_sync_rtnl - set regdom for self-managed drivers + * regulatory_set_wiphy_regd_sync - set regdom for self-managed drivers * @wiphy: the wireless device we want to process the regulatory domain on * @rd: the regulatory domain information to use for this wiphy * - * This functions requires the RTNL to be held and applies the new regdomain - * synchronously to this wiphy. For more details see - * regulatory_set_wiphy_regd(). + * This functions requires the RTNL and the wiphy mutex to be held and + * applies the new regdomain synchronously to this wiphy. For more details + * see regulatory_set_wiphy_regd(). * * Return: 0 on success. -EINVAL, -EPERM */ -int regulatory_set_wiphy_regd_sync_rtnl(struct wiphy *wiphy, - struct ieee80211_regdomain *rd); +int regulatory_set_wiphy_regd_sync(struct wiphy *wiphy, + struct ieee80211_regdomain *rd); /** * wiphy_apply_custom_regulatory - apply a custom driver regulatory domain @@ -6110,7 +6180,7 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid); void cfg80211_sched_scan_stopped(struct wiphy *wiphy, u64 reqid); /** - * cfg80211_sched_scan_stopped_rtnl - notify that the scheduled scan has stopped + * cfg80211_sched_scan_stopped_locked - notify that the scheduled scan has stopped * * @wiphy: the wiphy on which the scheduled scan stopped * @reqid: identifier for the related scheduled scan request @@ -6118,9 +6188,9 @@ void cfg80211_sched_scan_stopped(struct wiphy *wiphy, u64 reqid); * The driver can call this function to inform cfg80211 that the * scheduled scan had to be stopped, for whatever reason. The driver * is then called back via the sched_scan_stop operation when done. - * This function should be called with rtnl locked. + * This function should be called with the wiphy mutex held. */ -void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy, u64 reqid); +void cfg80211_sched_scan_stopped_locked(struct wiphy *wiphy, u64 reqid); /** * cfg80211_inform_bss_frame_data - inform cfg80211 of a received BSS frame @@ -7557,7 +7627,7 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, * also checks if IR-relaxation conditions apply, to allow beaconing under * more permissive conditions. * - * Requires the RTNL to be held. + * Requires the wiphy mutex to be held. */ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, @@ -7661,7 +7731,7 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate); * when the driver wishes to unregister the wdev, e.g. when the hardware device * is unbound from the driver. * - * Requires the RTNL to be held. + * Requires the RTNL and wiphy mutex to be held. */ void cfg80211_unregister_wdev(struct wireless_dev *wdev); @@ -7673,6 +7743,8 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev); * than register_netdevice(), unregister_netdev() is impossible as the RTNL is * held. Otherwise, both register_netdevice() and register_netdev() are usable * instead as well. + * + * Requires the RTNL and wiphy mutex to be held. */ int cfg80211_register_netdevice(struct net_device *dev); @@ -7684,6 +7756,8 @@ int cfg80211_register_netdevice(struct net_device *dev); * than unregister_netdevice(), unregister_netdev() is impossible as the RTNL * is held. Otherwise, both unregister_netdevice() and unregister_netdev() are * usable instead as well. + * + * Requires the RTNL and wiphy mutex to be held. */ static inline void cfg80211_unregister_netdevice(struct net_device *dev) { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 176fe0d9f67f8..2d1d629e5d14b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5529,7 +5529,7 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, void *data); /** - * ieee80211_iterate_active_interfaces_rtnl - iterate active interfaces + * ieee80211_iterate_active_interfaces_mtx - iterate active interfaces * * This function iterates over the interfaces associated with a given * hardware that are currently active and calls the callback for them. @@ -5540,12 +5540,12 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, * @iterator: the iterator function to call, cannot sleep * @data: first argument of the iterator function */ -void ieee80211_iterate_active_interfaces_rtnl(struct ieee80211_hw *hw, - u32 iter_flags, - void (*iterator)(void *data, +void ieee80211_iterate_active_interfaces_mtx(struct ieee80211_hw *hw, + u32 iter_flags, + void (*iterator)(void *data, u8 *mac, struct ieee80211_vif *vif), - void *data); + void *data); /** * ieee80211_iterate_stations_atomic - iterate stations diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index fcaf4d20cabf5..4bc350cf4eee2 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -357,11 +357,14 @@ static int ieee80211_open(struct net_device *dev) if (err) return err; - return ieee80211_do_open(&sdata->wdev, true); + wiphy_lock(sdata->local->hw.wiphy); + err = ieee80211_do_open(&sdata->wdev, true); + wiphy_unlock(sdata->local->hw.wiphy); + + return err; } -static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, - bool going_down) +static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_down) { struct ieee80211_local *local = sdata->local; unsigned long flags; @@ -637,7 +640,9 @@ static int ieee80211_stop(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + wiphy_lock(sdata->local->hw.wiphy); ieee80211_do_stop(sdata, true); + wiphy_unlock(sdata->local->hw.wiphy); return 0; } @@ -2057,13 +2062,16 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) list_add(&sdata->list, &wdev_list); } mutex_unlock(&local->iflist_mtx); + unregister_netdevice_many(&unreg_list); + wiphy_lock(local->hw.wiphy); list_for_each_entry_safe(sdata, tmp, &wdev_list, list) { list_del(&sdata->list); cfg80211_unregister_wdev(&sdata->wdev); kfree(sdata); } + wiphy_unlock(local->hw.wiphy); } static int netdev_notify(struct notifier_block *nb, diff --git a/net/mac80211/key.c b/net/mac80211/key.c index a4817aa4b1713..56c068cb49c4d 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -887,7 +887,7 @@ void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata) struct ieee80211_key *key; struct ieee80211_sub_if_data *vlan; - ASSERT_RTNL(); + lockdep_assert_wiphy(sdata->local->hw.wiphy); mutex_lock(&sdata->local->key_mtx); @@ -924,7 +924,7 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw, struct ieee80211_key *key, *tmp; struct ieee80211_sub_if_data *sdata; - ASSERT_RTNL(); + lockdep_assert_wiphy(hw->wiphy); mutex_lock(&local->key_mtx); if (vif) { diff --git a/net/mac80211/main.c b/net/mac80211/main.c index dee88ec566ad1..4f3f8bb58e76c 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -261,7 +261,9 @@ static void ieee80211_restart_work(struct work_struct *work) "%s called with hardware scan in progress\n", __func__); flush_work(&local->radar_detected_work); + /* we might do interface manipulations, so need both */ rtnl_lock(); + wiphy_lock(local->hw.wiphy); list_for_each_entry(sdata, &local->interfaces, list) { /* * XXX: there may be more work for other vif types and even @@ -293,6 +295,7 @@ static void ieee80211_restart_work(struct work_struct *work) synchronize_net(); ieee80211_reconfig(local); + wiphy_unlock(local->hw.wiphy); rtnl_unlock(); } @@ -1272,6 +1275,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) rate_control_add_debugfs(local); rtnl_lock(); + wiphy_lock(hw->wiphy); /* add one default STA interface if supported */ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) && @@ -1285,6 +1289,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) "Failed to add default virtual iface\n"); } + wiphy_unlock(hw->wiphy); rtnl_unlock(); #ifdef CONFIG_INET diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index ae378a41c9270..7809a906d7fe9 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -1,4 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 +/* + * Portions + * Copyright (C) 2020-2021 Intel Corporation + */ #include #include @@ -11,7 +15,7 @@ static void ieee80211_sched_scan_cancel(struct ieee80211_local *local) { if (ieee80211_request_sched_scan_stop(local)) return; - cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy, 0); + cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0); } int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index e01e4daeb8cd3..f91d02b81b923 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -1927,7 +1927,7 @@ ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata, struct ieee80211_tdls_data *tf = (void *)skb->data; struct wiphy *wiphy = sdata->local->hw.wiphy; - ASSERT_RTNL(); + lockdep_assert_wiphy(wiphy); /* make sure the driver supports it */ if (!(wiphy->features & NL80211_FEATURE_TDLS_CHANNEL_SWITCH)) @@ -1979,7 +1979,7 @@ void ieee80211_tdls_chsw_work(struct work_struct *wk) struct sk_buff *skb; struct ieee80211_tdls_data *tf; - rtnl_lock(); + wiphy_lock(local->hw.wiphy); while ((skb = skb_dequeue(&local->skb_queue_tdls_chsw))) { tf = (struct ieee80211_tdls_data *)skb->data; list_for_each_entry(sdata, &local->interfaces, list) { @@ -1994,7 +1994,7 @@ void ieee80211_tdls_chsw_work(struct work_struct *wk) kfree_skb(skb); } - rtnl_unlock(); + wiphy_unlock(local->hw.wiphy); } void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 8d3ae6b2f95ff..f080fcf60e453 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -832,7 +832,7 @@ void ieee80211_iterate_active_interfaces_atomic( } EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic); -void ieee80211_iterate_active_interfaces_rtnl( +void ieee80211_iterate_active_interfaces_mtx( struct ieee80211_hw *hw, u32 iter_flags, void (*iterator)(void *data, u8 *mac, struct ieee80211_vif *vif), @@ -840,12 +840,12 @@ void ieee80211_iterate_active_interfaces_rtnl( { struct ieee80211_local *local = hw_to_local(hw); - ASSERT_RTNL(); + lockdep_assert_wiphy(hw->wiphy); __iterate_interfaces(local, iter_flags | IEEE80211_IFACE_ITER_ACTIVE, iterator, data); } -EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_rtnl); +EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_mtx); static void __iterate_stations(struct ieee80211_local *local, void (*iterator)(void *data, @@ -2595,7 +2595,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->mtx); if (sched_scan_stopped) - cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy, 0); + cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0); wake_up: @@ -3811,7 +3811,7 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) struct cfg80211_chan_def chandef; /* for interface list, to avoid linking iflist_mtx and chanctx_mtx */ - ASSERT_RTNL(); + lockdep_assert_wiphy(local->hw.wiphy); mutex_lock(&local->mtx); list_for_each_entry(sdata, &local->interfaces, list) { @@ -3851,9 +3851,9 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work) } mutex_unlock(&local->chanctx_mtx); - rtnl_lock(); + wiphy_lock(local->hw.wiphy); ieee80211_dfs_cac_cancel(local); - rtnl_unlock(); + wiphy_unlock(local->hw.wiphy); if (num_chanctx > 1) /* XXX: multi-channel is not supported yet */ diff --git a/net/wireless/chan.c b/net/wireless/chan.c index e4030f1fbc60e..285b8076054b5 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -1093,7 +1093,7 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, struct wireless_dev *wdev; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); if (!IS_ENABLED(CONFIG_CFG80211_REG_RELAX_NO_IR) || !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR)) @@ -1216,9 +1216,10 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype) { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); bool check_no_ir; - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); /* * Under certain conditions suggested by some regulatory bodies a diff --git a/net/wireless/core.c b/net/wireless/core.c index 9e7d1f9620bd3..200cd9f5fd5f0 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -222,7 +222,7 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)) return; @@ -247,7 +247,7 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN)) return; @@ -273,7 +273,11 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) dev_close(wdev->netdev); continue; } + /* otherwise, check iftype */ + + wiphy_lock(wiphy); + switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: cfg80211_stop_p2p_device(rdev, wdev); @@ -284,6 +288,8 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) default: break; } + + wiphy_unlock(wiphy); } } EXPORT_SYMBOL_GPL(cfg80211_shutdown_all_interfaces); @@ -318,9 +324,9 @@ static void cfg80211_event_work(struct work_struct *work) rdev = container_of(work, struct cfg80211_registered_device, event_work); - rtnl_lock(); + wiphy_lock(&rdev->wiphy); cfg80211_process_rdev_events(rdev); - rtnl_unlock(); + wiphy_unlock(&rdev->wiphy); } void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) @@ -475,6 +481,7 @@ use_default_name: } } + mutex_init(&rdev->wiphy.mtx); INIT_LIST_HEAD(&rdev->wiphy.wdev_list); INIT_LIST_HEAD(&rdev->beacon_registrations); spin_lock_init(&rdev->beacon_registrations_lock); @@ -1007,15 +1014,16 @@ void wiphy_unregister(struct wiphy *wiphy) wait_event(rdev->dev_wait, ({ int __count; - rtnl_lock(); + wiphy_lock(&rdev->wiphy); __count = rdev->opencount; - rtnl_unlock(); + wiphy_unlock(&rdev->wiphy); __count == 0; })); if (rdev->rfkill) rfkill_unregister(rdev->rfkill); rtnl_lock(); + wiphy_lock(&rdev->wiphy); nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY); rdev->wiphy.registered = false; @@ -1038,6 +1046,7 @@ void wiphy_unregister(struct wiphy *wiphy) cfg80211_rdev_list_generation++; device_del(&rdev->wiphy.dev); + wiphy_unlock(&rdev->wiphy); rtnl_unlock(); flush_work(&rdev->scan_done_wk); @@ -1070,6 +1079,7 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) } list_for_each_entry_safe(scan, tmp, &rdev->bss_list, list) cfg80211_put_bss(&rdev->wiphy, &scan->pub); + mutex_destroy(&rdev->wiphy.mtx); kfree(rdev); } @@ -1100,6 +1110,7 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev, struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); flush_work(&wdev->pmsr_free_wk); @@ -1166,7 +1177,7 @@ static const struct device_type wiphy_type = { void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num) { - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); rdev->num_running_ifaces += num; if (iftype == NL80211_IFTYPE_MONITOR) @@ -1179,7 +1190,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, struct net_device *dev = wdev->netdev; struct cfg80211_sched_scan_request *pos, *tmp; - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); ASSERT_WDEV_LOCK(wdev); cfg80211_pmsr_wdev_down(wdev); @@ -1296,6 +1307,9 @@ void cfg80211_init_wdev(struct wireless_dev *wdev) void cfg80211_register_wdev(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { + ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); + /* * We get here also when the interface changes network namespaces, * as it's registered into the new one, but we don't want it to @@ -1375,21 +1389,30 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, cfg80211_init_wdev(wdev); break; case NETDEV_REGISTER: - if (!wdev->registered) + if (!wdev->registered) { + wiphy_lock(&rdev->wiphy); cfg80211_register_wdev(rdev, wdev); + wiphy_unlock(&rdev->wiphy); + } break; case NETDEV_UNREGISTER: /* * It is possible to get NETDEV_UNREGISTER multiple times, * so check wdev->registered. */ - if (wdev->registered) + if (wdev->registered) { + wiphy_lock(&rdev->wiphy); _cfg80211_unregister_wdev(wdev, false); + wiphy_unlock(&rdev->wiphy); + } break; case NETDEV_GOING_DOWN: + wiphy_lock(&rdev->wiphy); cfg80211_leave(rdev, wdev); + wiphy_unlock(&rdev->wiphy); break; case NETDEV_DOWN: + wiphy_lock(&rdev->wiphy); cfg80211_update_iface_num(rdev, wdev->iftype, -1); if (rdev->scan_req && rdev->scan_req->wdev == wdev) { if (WARN_ON(!rdev->scan_req->notified && @@ -1406,9 +1429,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, } rdev->opencount--; + wiphy_unlock(&rdev->wiphy); wake_up(&rdev->dev_wait); break; case NETDEV_UP: + wiphy_lock(&rdev->wiphy); cfg80211_update_iface_num(rdev, wdev->iftype, 1); wdev_lock(wdev); switch (wdev->iftype) { @@ -1455,6 +1480,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, /* assume this means it's off */ wdev->ps = false; } + wiphy_unlock(&rdev->wiphy); break; case NETDEV_PRE_UP: if (!cfg80211_iftype_allowed(wdev->wiphy, wdev->iftype, diff --git a/net/wireless/core.h b/net/wireless/core.h index 7df91f9402124..a7d19b4b40ac6 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -231,7 +231,7 @@ static inline void wdev_unlock(struct wireless_dev *wdev) static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev) { - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); return rdev->num_running_ifaces == rdev->num_running_monitor_ifaces && rdev->num_running_ifaces > 0; diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index 76b845f68ac89..aab43469a2f04 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -73,8 +73,6 @@ static ssize_t ht40allow_map_read(struct file *file, if (!buf) return -ENOMEM; - rtnl_lock(); - for (band = 0; band < NUM_NL80211_BANDS; band++) { sband = wiphy->bands[band]; if (!sband) @@ -84,8 +82,6 @@ static ssize_t ht40allow_map_read(struct file *file, buf, buf_size, offset); } - rtnl_unlock(); - r = simple_read_from_buffer(user_buf, count, ppos, buf, offset); kfree(buf); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index a0621bb76d8e9..8f98e546becf2 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -3,6 +3,7 @@ * Some IBSS support code for cfg80211. * * Copyright 2009 Johannes Berg + * Copyright (C) 2020-2021 Intel Corporation */ #include @@ -92,7 +93,7 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); ASSERT_WDEV_LOCK(wdev); if (wdev->ssid_len) diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index e1e90761dc007..3aa69b375a107 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -450,7 +450,7 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev) struct cfg80211_mgmt_registration *reg; struct mgmt_frame_regs upd = {}; - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); spin_lock_bh(&wdev->mgmt_registrations_lock); if (!wdev->mgmt_registrations_need_update) { @@ -492,10 +492,10 @@ void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk) rdev = container_of(wk, struct cfg80211_registered_device, mgmt_registrations_update_wk); - rtnl_lock(); + wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) cfg80211_mgmt_registrations_update(wdev); - rtnl_unlock(); + wiphy_unlock(&rdev->wiphy); } int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 775d0c4d86c36..e5e9d889f00f9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -64,9 +64,9 @@ static const struct genl_multicast_group nl80211_mcgrps[] = { /* returns ERR_PTR values */ static struct wireless_dev * -__cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) +__cfg80211_wdev_from_attrs(struct cfg80211_registered_device *rdev, + struct net *netns, struct nlattr **attrs) { - struct cfg80211_registered_device *rdev; struct wireless_dev *result = NULL; bool have_ifidx = attrs[NL80211_ATTR_IFINDEX]; bool have_wdev_id = attrs[NL80211_ATTR_WDEV]; @@ -74,8 +74,6 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) int wiphy_idx = -1; int ifidx = -1; - ASSERT_RTNL(); - if (!have_ifidx && !have_wdev_id) return ERR_PTR(-EINVAL); @@ -86,6 +84,28 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) wiphy_idx = wdev_id >> 32; } + if (rdev) { + struct wireless_dev *wdev; + + lockdep_assert_held(&rdev->wiphy.mtx); + + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { + if (have_ifidx && wdev->netdev && + wdev->netdev->ifindex == ifidx) { + result = wdev; + break; + } + if (have_wdev_id && wdev->identifier == (u32)wdev_id) { + result = wdev; + break; + } + } + + return result ?: ERR_PTR(-ENODEV); + } + + ASSERT_RTNL(); + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { struct wireless_dev *wdev; @@ -914,22 +934,31 @@ int nl80211_prepare_wdev_dump(struct netlink_callback *cb, return err; } - *wdev = __cfg80211_wdev_from_attrs(sock_net(cb->skb->sk), + rtnl_lock(); + *wdev = __cfg80211_wdev_from_attrs(NULL, sock_net(cb->skb->sk), attrbuf); kfree(attrbuf); - if (IS_ERR(*wdev)) + if (IS_ERR(*wdev)) { + rtnl_unlock(); return PTR_ERR(*wdev); + } *rdev = wiphy_to_rdev((*wdev)->wiphy); + mutex_lock(&(*rdev)->wiphy.mtx); + rtnl_unlock(); /* 0 is the first index - add 1 to parse only once */ cb->args[0] = (*rdev)->wiphy_idx + 1; cb->args[1] = (*wdev)->identifier; } else { /* subtract the 1 again here */ - struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); + struct wiphy *wiphy; struct wireless_dev *tmp; - if (!wiphy) + rtnl_lock(); + wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); + if (!wiphy) { + rtnl_unlock(); return -ENODEV; + } *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; @@ -940,8 +969,12 @@ int nl80211_prepare_wdev_dump(struct netlink_callback *cb, } } - if (!*wdev) + if (!*wdev) { + rtnl_unlock(); return -ENODEV; + } + mutex_lock(&(*rdev)->wiphy.mtx); + rtnl_unlock(); } return 0; @@ -3141,7 +3174,7 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info) static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) { - struct cfg80211_registered_device *rdev; + struct cfg80211_registered_device *rdev = NULL; struct net_device *netdev = NULL; struct wireless_dev *wdev; int result = 0, rem_txq_params = 0; @@ -3152,8 +3185,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) u8 coverage_class = 0; u32 txq_limit = 0, txq_memory_limit = 0, txq_quantum = 0; - ASSERT_RTNL(); - + rtnl_lock(); /* * Try to find the wiphy and netdev. Normally this * function shouldn't need the netdev, but this is @@ -3177,14 +3209,19 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (!netdev) { rdev = __cfg80211_rdev_from_attrs(genl_info_net(info), info->attrs); - if (IS_ERR(rdev)) + if (IS_ERR(rdev)) { + rtnl_unlock(); return PTR_ERR(rdev); + } wdev = NULL; netdev = NULL; result = 0; } else wdev = netdev->ieee80211_ptr; + wiphy_lock(&rdev->wiphy); + rtnl_unlock(); + /* * end workaround code, by now the rdev is available * and locked, and wdev may or may not be NULL. @@ -3195,24 +3232,32 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); if (result) - return result; + goto out; if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) { struct ieee80211_txq_params txq_params; struct nlattr *tb[NL80211_TXQ_ATTR_MAX + 1]; - if (!rdev->ops->set_txq_params) - return -EOPNOTSUPP; + if (!rdev->ops->set_txq_params) { + result = -EOPNOTSUPP; + goto out; + } - if (!netdev) - return -EINVAL; + if (!netdev) { + result = -EINVAL; + goto out; + } if (netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) - return -EINVAL; + netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { + result = -EINVAL; + goto out; + } - if (!netif_running(netdev)) - return -ENETDOWN; + if (!netif_running(netdev)) { + result = -ENETDOWN; + goto out; + } nla_for_each_nested(nl_txq_params, info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], @@ -3223,15 +3268,15 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) txq_params_policy, info->extack); if (result) - return result; + goto out; result = parse_txq_params(tb, &txq_params); if (result) - return result; + goto out; result = rdev_set_txq_params(rdev, netdev, &txq_params); if (result) - return result; + goto out; } } @@ -3241,7 +3286,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) nl80211_can_set_dev_channel(wdev) ? netdev : NULL, info); if (result) - return result; + goto out; } if (info->attrs[NL80211_ATTR_WIPHY_TX_POWER_SETTING]) { @@ -3252,15 +3297,19 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (!(rdev->wiphy.features & NL80211_FEATURE_VIF_TXPOWER)) txp_wdev = NULL; - if (!rdev->ops->set_tx_power) - return -EOPNOTSUPP; + if (!rdev->ops->set_tx_power) { + result = -EOPNOTSUPP; + goto out; + } idx = NL80211_ATTR_WIPHY_TX_POWER_SETTING; type = nla_get_u32(info->attrs[idx]); if (!info->attrs[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] && - (type != NL80211_TX_POWER_AUTOMATIC)) - return -EINVAL; + (type != NL80211_TX_POWER_AUTOMATIC)) { + result = -EINVAL; + goto out; + } if (type != NL80211_TX_POWER_AUTOMATIC) { idx = NL80211_ATTR_WIPHY_TX_POWER_LEVEL; @@ -3269,7 +3318,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) result = rdev_set_tx_power(rdev, txp_wdev, type, mbm); if (result) - return result; + goto out; } if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] && @@ -3278,8 +3327,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if ((!rdev->wiphy.available_antennas_tx && !rdev->wiphy.available_antennas_rx) || - !rdev->ops->set_antenna) - return -EOPNOTSUPP; + !rdev->ops->set_antenna) { + result = -EOPNOTSUPP; + goto out; + } tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]); rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]); @@ -3287,15 +3338,17 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) /* reject antenna configurations which don't match the * available antenna masks, except for the "all" mask */ if ((~tx_ant && (tx_ant & ~rdev->wiphy.available_antennas_tx)) || - (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas_rx))) - return -EINVAL; + (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas_rx))) { + result = -EINVAL; + goto out; + } tx_ant = tx_ant & rdev->wiphy.available_antennas_tx; rx_ant = rx_ant & rdev->wiphy.available_antennas_rx; result = rdev_set_antenna(rdev, tx_ant, rx_ant); if (result) - return result; + goto out; } changed = 0; @@ -3317,8 +3370,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]) { frag_threshold = nla_get_u32( info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]); - if (frag_threshold < 256) - return -EINVAL; + if (frag_threshold < 256) { + result = -EINVAL; + goto out; + } if (frag_threshold != (u32) -1) { /* @@ -3339,8 +3394,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]) { - if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) - return -EINVAL; + if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) { + result = -EINVAL; + goto out; + } coverage_class = nla_get_u8( info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]); @@ -3348,16 +3405,20 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) { - if (!(rdev->wiphy.features & NL80211_FEATURE_ACKTO_ESTIMATION)) - return -EOPNOTSUPP; + if (!(rdev->wiphy.features & NL80211_FEATURE_ACKTO_ESTIMATION)) { + result = -EOPNOTSUPP; + goto out; + } changed |= WIPHY_PARAM_DYN_ACK; } if (info->attrs[NL80211_ATTR_TXQ_LIMIT]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_TXQS)) - return -EOPNOTSUPP; + NL80211_EXT_FEATURE_TXQS)) { + result = -EOPNOTSUPP; + goto out; + } txq_limit = nla_get_u32( info->attrs[NL80211_ATTR_TXQ_LIMIT]); changed |= WIPHY_PARAM_TXQ_LIMIT; @@ -3365,8 +3426,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_TXQ_MEMORY_LIMIT]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_TXQS)) - return -EOPNOTSUPP; + NL80211_EXT_FEATURE_TXQS)) { + result = -EOPNOTSUPP; + goto out; + } txq_memory_limit = nla_get_u32( info->attrs[NL80211_ATTR_TXQ_MEMORY_LIMIT]); changed |= WIPHY_PARAM_TXQ_MEMORY_LIMIT; @@ -3374,8 +3437,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_TXQ_QUANTUM]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_TXQS)) - return -EOPNOTSUPP; + NL80211_EXT_FEATURE_TXQS)) { + result = -EOPNOTSUPP; + goto out; + } txq_quantum = nla_get_u32( info->attrs[NL80211_ATTR_TXQ_QUANTUM]); changed |= WIPHY_PARAM_TXQ_QUANTUM; @@ -3387,8 +3452,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) u8 old_coverage_class; u32 old_txq_limit, old_txq_memory_limit, old_txq_quantum; - if (!rdev->ops->set_wiphy_params) - return -EOPNOTSUPP; + if (!rdev->ops->set_wiphy_params) { + result = -EOPNOTSUPP; + goto out; + } old_retry_short = rdev->wiphy.retry_short; old_retry_long = rdev->wiphy.retry_long; @@ -3426,10 +3493,15 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev->wiphy.txq_limit = old_txq_limit; rdev->wiphy.txq_memory_limit = old_txq_memory_limit; rdev->wiphy.txq_quantum = old_txq_quantum; - return result; + goto out; } } - return 0; + + result = 0; + +out: + wiphy_unlock(&rdev->wiphy); + return result; } static int nl80211_send_chandef(struct sk_buff *msg, @@ -3959,6 +4031,17 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->del_virtual_intf) return -EOPNOTSUPP; + /* + * We hold RTNL, so this is safe, without RTNL opencount cannot + * reach 0, and thus the rdev cannot be deleted. + * + * We need to do it for the dev_close(), since that will call + * the netdev notifiers, and we need to acquire the mutex there + * but don't know if we get there from here or from some other + * place (e.g. "ip link set ... down"). + */ + mutex_unlock(&rdev->wiphy.mtx); + /* * If we remove a wireless device without a netdev then clear * user_ptr[1] so that nl80211_post_doit won't dereference it @@ -3968,6 +4051,10 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) */ if (!wdev->netdev) info->user_ptr[1] = NULL; + else + dev_close(wdev->netdev); + + mutex_lock(&rdev->wiphy.mtx); return rdev_del_virtual_intf(rdev, wdev); } @@ -5884,10 +5971,11 @@ static int nl80211_dump_station(struct sk_buff *skb, int sta_idx = cb->args[2]; int err; - rtnl_lock(); err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev); if (err) - goto out_err; + return err; + /* nl80211_prepare_wdev_dump acquired it in the successful case */ + __acquire(&rdev->wiphy.mtx); if (!wdev->netdev) { err = -EINVAL; @@ -5922,7 +6010,7 @@ static int nl80211_dump_station(struct sk_buff *skb, cb->args[2] = sta_idx; err = skb->len; out_err: - rtnl_unlock(); + wiphy_unlock(&rdev->wiphy); return err; } @@ -6780,10 +6868,11 @@ static int nl80211_dump_mpath(struct sk_buff *skb, int path_idx = cb->args[2]; int err; - rtnl_lock(); err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev); if (err) - goto out_err; + return err; + /* nl80211_prepare_wdev_dump acquired it in the successful case */ + __acquire(&rdev->wiphy.mtx); if (!rdev->ops->dump_mpath) { err = -EOPNOTSUPP; @@ -6816,7 +6905,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, cb->args[2] = path_idx; err = skb->len; out_err: - rtnl_unlock(); + wiphy_unlock(&rdev->wiphy); return err; } @@ -6979,10 +7068,11 @@ static int nl80211_dump_mpp(struct sk_buff *skb, int path_idx = cb->args[2]; int err; - rtnl_lock(); err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev); if (err) - goto out_err; + return err; + /* nl80211_prepare_wdev_dump acquired it in the successful case */ + __acquire(&rdev->wiphy.mtx); if (!rdev->ops->dump_mpp) { err = -EOPNOTSUPP; @@ -7015,7 +7105,7 @@ static int nl80211_dump_mpp(struct sk_buff *skb, cb->args[2] = path_idx; err = skb->len; out_err: - rtnl_unlock(); + wiphy_unlock(&rdev->wiphy); return err; } @@ -7634,12 +7724,15 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info) if (!hdr) goto put_failure; + rtnl_lock(); + if (info->attrs[NL80211_ATTR_WIPHY]) { bool self_managed; rdev = cfg80211_get_dev_from_info(genl_info_net(info), info); if (IS_ERR(rdev)) { nlmsg_free(msg); + rtnl_unlock(); return PTR_ERR(rdev); } @@ -7651,6 +7744,7 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info) /* a self-managed-reg device must have a private regdom */ if (WARN_ON(!regdom && self_managed)) { nlmsg_free(msg); + rtnl_unlock(); return -EINVAL; } @@ -7675,11 +7769,13 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); genlmsg_end(msg, hdr); + rtnl_unlock(); return genlmsg_reply(msg, info); nla_put_failure_rcu: rcu_read_unlock(); nla_put_failure: + rtnl_unlock(); put_failure: nlmsg_free(msg); return -EMSGSIZE; @@ -7842,12 +7938,17 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - if (!reg_is_valid_request(alpha2)) - return -EINVAL; + rtnl_lock(); + if (!reg_is_valid_request(alpha2)) { + r = -EINVAL; + goto out; + } rd = kzalloc(struct_size(rd, reg_rules, num_rules), GFP_KERNEL); - if (!rd) - return -ENOMEM; + if (!rd) { + r = -ENOMEM; + goto out; + } rd->n_reg_rules = num_rules; rd->alpha2[0] = alpha2[0]; @@ -7879,10 +7980,13 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) } } + r = set_regdom(rd, REGD_SOURCE_CRDA); /* set_regdom takes ownership of rd */ - return set_regdom(rd, REGD_SOURCE_CRDA); + rd = NULL; bad_reg: kfree(rd); + out: + rtnl_unlock(); return r; } #endif /* CONFIG_CFG80211_CRDA_SUPPORT */ @@ -9050,10 +9154,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_csa_settings params; - /* csa_attrs is defined static to avoid waste of stack size - this - * function is called under RTNL lock, so this should not be a problem. - */ - static struct nlattr *csa_attrs[NL80211_ATTR_MAX+1]; + struct nlattr **csa_attrs = NULL; int err; bool need_new_beacon = false; bool need_handle_dfs_flag = true; @@ -9118,28 +9219,39 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) if (err) return err; + csa_attrs = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*csa_attrs), + GFP_KERNEL); + if (!csa_attrs) + return -ENOMEM; + err = nla_parse_nested_deprecated(csa_attrs, NL80211_ATTR_MAX, info->attrs[NL80211_ATTR_CSA_IES], nl80211_policy, info->extack); if (err) - return err; + goto free; err = nl80211_parse_beacon(rdev, csa_attrs, ¶ms.beacon_csa); if (err) - return err; + goto free; - if (!csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]) - return -EINVAL; + if (!csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]) { + err = -EINVAL; + goto free; + } len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]); - if (!len || (len % sizeof(u16))) - return -EINVAL; + if (!len || (len % sizeof(u16))) { + err = -EINVAL; + goto free; + } params.n_counter_offsets_beacon = len / sizeof(u16); if (rdev->wiphy.max_num_csa_counters && (params.n_counter_offsets_beacon > - rdev->wiphy.max_num_csa_counters)) - return -EINVAL; + rdev->wiphy.max_num_csa_counters)) { + err = -EINVAL; + goto free; + } params.counter_offsets_beacon = nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]); @@ -9148,23 +9260,31 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) for (i = 0; i < params.n_counter_offsets_beacon; i++) { u16 offset = params.counter_offsets_beacon[i]; - if (offset >= params.beacon_csa.tail_len) - return -EINVAL; + if (offset >= params.beacon_csa.tail_len) { + err = -EINVAL; + goto free; + } - if (params.beacon_csa.tail[offset] != params.count) - return -EINVAL; + if (params.beacon_csa.tail[offset] != params.count) { + err = -EINVAL; + goto free; + } } if (csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]) { len = nla_len(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]); - if (!len || (len % sizeof(u16))) - return -EINVAL; + if (!len || (len % sizeof(u16))) { + err = -EINVAL; + goto free; + } params.n_counter_offsets_presp = len / sizeof(u16); if (rdev->wiphy.max_num_csa_counters && (params.n_counter_offsets_presp > - rdev->wiphy.max_num_csa_counters)) - return -EINVAL; + rdev->wiphy.max_num_csa_counters)) { + err = -EINVAL; + goto free; + } params.counter_offsets_presp = nla_data(csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP]); @@ -9173,35 +9293,42 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) for (i = 0; i < params.n_counter_offsets_presp; i++) { u16 offset = params.counter_offsets_presp[i]; - if (offset >= params.beacon_csa.probe_resp_len) - return -EINVAL; + if (offset >= params.beacon_csa.probe_resp_len) { + err = -EINVAL; + goto free; + } if (params.beacon_csa.probe_resp[offset] != - params.count) - return -EINVAL; + params.count) { + err = -EINVAL; + goto free; + } } } skip_beacons: err = nl80211_parse_chandef(rdev, info, ¶ms.chandef); if (err) - return err; + goto free; if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, ¶ms.chandef, - wdev->iftype)) - return -EINVAL; + wdev->iftype)) { + err = -EINVAL; + goto free; + } err = cfg80211_chandef_dfs_required(wdev->wiphy, ¶ms.chandef, wdev->iftype); if (err < 0) - return err; + goto free; if (err > 0) { params.radar_required = true; if (need_handle_dfs_flag && !nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS])) { - return -EINVAL; + err = -EINVAL; + goto free; } } @@ -9212,6 +9339,8 @@ skip_beacons: err = rdev_channel_switch(rdev, dev, ¶ms); wdev_unlock(wdev); +free: + kfree(csa_attrs); return err; } @@ -9362,12 +9491,11 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) int start = cb->args[2], idx = 0; int err; - rtnl_lock(); err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev); - if (err) { - rtnl_unlock(); + if (err) return err; - } + /* nl80211_prepare_wdev_dump acquired it in the successful case */ + __acquire(&rdev->wiphy.mtx); wdev_lock(wdev); spin_lock_bh(&rdev->bss_lock); @@ -9398,7 +9526,7 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) wdev_unlock(wdev); cb->args[2] = idx; - rtnl_unlock(); + wiphy_unlock(&rdev->wiphy); return skb->len; } @@ -9496,10 +9624,13 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) if (!attrbuf) return -ENOMEM; - rtnl_lock(); res = nl80211_prepare_wdev_dump(cb, &rdev, &wdev); - if (res) - goto out_err; + if (res) { + kfree(attrbuf); + return res; + } + /* nl80211_prepare_wdev_dump acquired it in the successful case */ + __acquire(&rdev->wiphy.mtx); /* prepare_wdev_dump parsed the attributes */ radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; @@ -9541,7 +9672,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) res = skb->len; out_err: kfree(attrbuf); - rtnl_unlock(); + wiphy_unlock(&rdev->wiphy); return res; } @@ -10403,10 +10534,14 @@ EXPORT_SYMBOL(__cfg80211_send_event_skb); static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct wireless_dev *wdev = - __cfg80211_wdev_from_attrs(genl_info_net(info), info->attrs); + struct wireless_dev *wdev; int err; + lockdep_assert_held(&rdev->wiphy.mtx); + + wdev = __cfg80211_wdev_from_attrs(rdev, genl_info_net(info), + info->attrs); + if (!rdev->ops->testmode_cmd) return -EOPNOTSUPP; @@ -13591,7 +13726,8 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = - __cfg80211_wdev_from_attrs(genl_info_net(info), info->attrs); + __cfg80211_wdev_from_attrs(rdev, genl_info_net(info), + info->attrs); int i, err; u32 vid, subcmd; @@ -13715,7 +13851,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, goto out; } - *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf); + *wdev = __cfg80211_wdev_from_attrs(NULL, sock_net(skb->sk), attrbuf); if (IS_ERR(*wdev)) *wdev = NULL; @@ -14650,31 +14786,24 @@ bad_tid_conf: static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { - struct cfg80211_registered_device *rdev; + struct cfg80211_registered_device *rdev = NULL; struct wireless_dev *wdev; struct net_device *dev; - bool rtnl = ops->internal_flags & NL80211_FLAG_NEED_RTNL; - - if (rtnl) - rtnl_lock(); + rtnl_lock(); if (ops->internal_flags & NL80211_FLAG_NEED_WIPHY) { rdev = cfg80211_get_dev_from_info(genl_info_net(info), info); if (IS_ERR(rdev)) { - if (rtnl) - rtnl_unlock(); + rtnl_unlock(); return PTR_ERR(rdev); } info->user_ptr[0] = rdev; } else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV || ops->internal_flags & NL80211_FLAG_NEED_WDEV) { - ASSERT_RTNL(); - - wdev = __cfg80211_wdev_from_attrs(genl_info_net(info), + wdev = __cfg80211_wdev_from_attrs(NULL, genl_info_net(info), info->attrs); if (IS_ERR(wdev)) { - if (rtnl) - rtnl_unlock(); + rtnl_unlock(); return PTR_ERR(wdev); } @@ -14683,8 +14812,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) { if (!dev) { - if (rtnl) - rtnl_unlock(); + rtnl_unlock(); return -EINVAL; } @@ -14695,8 +14823,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && !wdev_running(wdev)) { - if (rtnl) - rtnl_unlock(); + rtnl_unlock(); return -ENETDOWN; } @@ -14706,6 +14833,14 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, info->user_ptr[0] = rdev; } + if (rdev) { + wiphy_lock(&rdev->wiphy); + /* we keep the mutex locked until post_doit */ + __release(&rdev->wiphy.mtx); + } + if (!(ops->internal_flags & NL80211_FLAG_NEED_RTNL)) + rtnl_unlock(); + return 0; } @@ -14723,6 +14858,14 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, } } + if (info->user_ptr[0]) { + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + + /* we kept the mutex locked since pre_doit */ + __acquire(&rdev->wiphy.mtx); + wiphy_unlock(&rdev->wiphy); + } + if (ops->internal_flags & NL80211_FLAG_NEED_RTNL) rtnl_unlock(); @@ -14851,8 +14994,7 @@ static const struct genl_ops nl80211_ops[] = { .dumpit = nl80211_dump_wiphy, .done = nl80211_dump_wiphy_done, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WIPHY, }, }; @@ -14862,7 +15004,6 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_wiphy, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_GET_INTERFACE, @@ -14870,8 +15011,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_get_interface, .dumpit = nl80211_dump_interface, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_WDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WDEV, }, { .cmd = NL80211_CMD_SET_INTERFACE, @@ -14902,8 +15042,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_key, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_SET_KEY, @@ -14911,7 +15050,6 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_set_key, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL | NL80211_FLAG_CLEAR_SKB, }, { @@ -14920,7 +15058,6 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_new_key, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL | NL80211_FLAG_CLEAR_SKB, }, { @@ -14928,64 +15065,56 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_key, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_SET_BEACON, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_set_beacon, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_START_AP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_start_ap, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_STOP_AP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_stop_ap, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_GET_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_station, .dumpit = nl80211_dump_station, - .internal_flags = NL80211_FLAG_NEED_NETDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV, }, { .cmd = NL80211_CMD_SET_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_station, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_NEW_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_station, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_DEL_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_station, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_GET_MPATH, @@ -14993,8 +15122,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_get_mpath, .dumpit = nl80211_dump_mpath, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_GET_MPP, @@ -15002,47 +15130,42 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_get_mpp, .dumpit = nl80211_dump_mpp, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_SET_MPATH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mpath, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_NEW_MPATH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_mpath, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_DEL_MPATH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_mpath, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_SET_BSS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_bss, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_GET_REG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_reg_do, .dumpit = nl80211_get_reg_dump, - .internal_flags = NL80211_FLAG_NEED_RTNL, + .internal_flags = 0, /* can be retrieved by unprivileged users */ }, #ifdef CONFIG_CFG80211_CRDA_SUPPORT @@ -15051,7 +15174,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_reg, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_RTNL, + .internal_flags = 0, }, #endif { @@ -15071,32 +15194,28 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_mesh_config, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_SET_MESH_CONFIG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_mesh_config, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_TRIGGER_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_trigger_scan, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP, }, { .cmd = NL80211_CMD_ABORT_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_abort_scan, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP, }, { .cmd = NL80211_CMD_GET_SCAN, @@ -15108,16 +15227,14 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_sched_scan, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_STOP_SCHED_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_sched_scan, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_AUTHENTICATE, @@ -15125,7 +15242,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_authenticate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL | + 0 | NL80211_FLAG_CLEAR_SKB, }, { @@ -15134,7 +15251,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_associate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL | + 0 | NL80211_FLAG_CLEAR_SKB, }, { @@ -15142,32 +15259,28 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_deauthenticate, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_DISASSOCIATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_disassociate, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_JOIN_IBSS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_ibss, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_LEAVE_IBSS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_ibss, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, #ifdef CONFIG_NL80211_TESTMODE { @@ -15176,8 +15289,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_testmode_do, .dumpit = nl80211_testmode_dump, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WIPHY, }, #endif { @@ -15186,7 +15298,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_connect, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL | + 0 | NL80211_FLAG_CLEAR_SKB, }, { @@ -15195,7 +15307,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_update_connect_params, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL | + 0 | NL80211_FLAG_CLEAR_SKB, }, { @@ -15203,16 +15315,14 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_disconnect, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_SET_WIPHY_NETNS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_wiphy_netns, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WIPHY, }, { .cmd = NL80211_CMD_GET_SURVEY, @@ -15225,7 +15335,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_setdel_pmksa, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL | + 0 | NL80211_FLAG_CLEAR_SKB, }, { @@ -15233,128 +15343,112 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_setdel_pmksa, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_FLUSH_PMKSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_flush_pmksa, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_REMAIN_ON_CHANNEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_remain_on_channel, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP, }, { .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_cancel_remain_on_channel, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP, }, { .cmd = NL80211_CMD_SET_TX_BITRATE_MASK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_tx_bitrate_mask, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV, }, { .cmd = NL80211_CMD_REGISTER_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_mgmt, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WDEV, }, { .cmd = NL80211_CMD_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_mgmt, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP, }, { .cmd = NL80211_CMD_FRAME_WAIT_CANCEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_mgmt_cancel_wait, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP, }, { .cmd = NL80211_CMD_SET_POWER_SAVE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_power_save, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV, }, { .cmd = NL80211_CMD_GET_POWER_SAVE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_power_save, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_NETDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV, }, { .cmd = NL80211_CMD_SET_CQM, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_cqm, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV, }, { .cmd = NL80211_CMD_SET_CHANNEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_channel, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV, }, { .cmd = NL80211_CMD_JOIN_MESH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_mesh, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_LEAVE_MESH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_mesh, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_JOIN_OCB, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_ocb, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_LEAVE_OCB, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_ocb, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, #ifdef CONFIG_PM { @@ -15362,16 +15456,14 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_wowlan, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WIPHY, }, { .cmd = NL80211_CMD_SET_WOWLAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_wowlan, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WIPHY, }, #endif { @@ -15380,7 +15472,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_set_rekey_data, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL | + 0 | NL80211_FLAG_CLEAR_SKB, }, { @@ -15388,48 +15480,42 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_mgmt, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_TDLS_OPER, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_oper, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_UNEXPECTED_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_unexpected_frame, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV, }, { .cmd = NL80211_CMD_PROBE_CLIENT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_probe_client, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_REGISTER_BEACONS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_beacons, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WIPHY, }, { .cmd = NL80211_CMD_SET_NOACK_MAP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_noack_map, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV, }, { .cmd = NL80211_CMD_START_P2P_DEVICE, @@ -15468,48 +15554,42 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_add_func, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP, }, { .cmd = NL80211_CMD_DEL_NAN_FUNCTION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_del_func, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP, }, { .cmd = NL80211_CMD_CHANGE_NAN_CONFIG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_change_config, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP, }, { .cmd = NL80211_CMD_SET_MCAST_RATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mcast_rate, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV, }, { .cmd = NL80211_CMD_SET_MAC_ACL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mac_acl, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV, }, { .cmd = NL80211_CMD_RADAR_DETECT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_radar_detection, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES, @@ -15521,47 +15601,41 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_ft_ies, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_CRIT_PROTOCOL_START, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_crit_protocol_start, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP, }, { .cmd = NL80211_CMD_CRIT_PROTOCOL_STOP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_crit_protocol_stop, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP, }, { .cmd = NL80211_CMD_GET_COALESCE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_coalesce, - .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WIPHY, }, { .cmd = NL80211_CMD_SET_COALESCE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_coalesce, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WIPHY, }, { .cmd = NL80211_CMD_CHANNEL_SWITCH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_channel_switch, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_VENDOR, @@ -15570,7 +15644,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .dumpit = nl80211_vendor_cmd_dump, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL | + 0 | NL80211_FLAG_CLEAR_SKB, }, { @@ -15578,123 +15652,108 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_qos_map, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_ADD_TX_TS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_add_tx_ts, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_DEL_TX_TS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_tx_ts, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_TDLS_CHANNEL_SWITCH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_channel_switch, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_cancel_channel_switch, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_multicast_to_unicast, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV, }, { .cmd = NL80211_CMD_SET_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_pmk, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL | + 0 | NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_pmk, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_EXTERNAL_AUTH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_external_auth, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_CONTROL_PORT_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_control_port, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_GET_FTM_RESPONDER_STATS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_ftm_responder_stats, - .internal_flags = NL80211_FLAG_NEED_NETDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV, }, { .cmd = NL80211_CMD_PEER_MEASUREMENT_START, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_pmsr_start, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_WDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP, }, { .cmd = NL80211_CMD_NOTIFY_RADAR, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_notify_radar_detection, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_UPDATE_OWE_INFO, .doit = nl80211_update_owe_info, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_PROBE_MESH_LINK, .doit = nl80211_probe_mesh_link, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP, }, { .cmd = NL80211_CMD_SET_TID_CONFIG, .doit = nl80211_set_tid_config, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | - NL80211_FLAG_NEED_RTNL, + .internal_flags = NL80211_FLAG_NEED_NETDEV, }, { .cmd = NL80211_CMD_SET_SAR_SPECS, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 8114bba8556c7..452b698f42bef 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -142,12 +142,15 @@ static const struct ieee80211_regdomain *get_cfg80211_regdom(void) /* * Returns the regulatory domain associated with the wiphy. * - * Requires either RTNL or RCU protection + * Requires any of RTNL, wiphy mutex or RCU protection. */ const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) { - return rcu_dereference_rtnl(wiphy->regd); + return rcu_dereference_check(wiphy->regd, + lockdep_is_held(&wiphy->mtx) || + lockdep_rtnl_is_held()); } +EXPORT_SYMBOL(get_wiphy_regdom); static const char *reg_dfs_region_str(enum nl80211_dfs_regions dfs_region) { @@ -169,7 +172,9 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy) const struct ieee80211_regdomain *regd = NULL; const struct ieee80211_regdomain *wiphy_regd = NULL; + rcu_read_lock(); regd = get_cfg80211_regdom(); + if (!wiphy) goto out; @@ -186,6 +191,8 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy) reg_dfs_region_str(regd->dfs_region)); out: + rcu_read_unlock(); + return regd->dfs_region; } @@ -2577,11 +2584,13 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, return; rtnl_lock(); + wiphy_lock(wiphy); tmp = get_wiphy_regdom(wiphy); rcu_assign_pointer(wiphy->regd, new_regd); rcu_free_regdom(tmp); + wiphy_unlock(wiphy); rtnl_unlock(); } EXPORT_SYMBOL(wiphy_apply_custom_regulatory); @@ -2744,7 +2753,10 @@ reg_process_hint_driver(struct wiphy *wiphy, return REG_REQ_IGNORE; tmp = get_wiphy_regdom(wiphy); + ASSERT_RTNL(); + wiphy_lock(wiphy); rcu_assign_pointer(wiphy->regd, regd); + wiphy_unlock(wiphy); rcu_free_regdom(tmp); } @@ -3076,41 +3088,52 @@ static void reg_process_pending_beacon_hints(void) spin_unlock_bh(®_pending_beacons_lock); } -static void reg_process_self_managed_hints(void) +static void reg_process_self_managed_hint(struct wiphy *wiphy) { - struct cfg80211_registered_device *rdev; - struct wiphy *wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); const struct ieee80211_regdomain *tmp; const struct ieee80211_regdomain *regd; enum nl80211_band band; struct regulatory_request request = {}; - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { - wiphy = &rdev->wiphy; + ASSERT_RTNL(); + lockdep_assert_wiphy(wiphy); - spin_lock(®_requests_lock); - regd = rdev->requested_regd; - rdev->requested_regd = NULL; - spin_unlock(®_requests_lock); + spin_lock(®_requests_lock); + regd = rdev->requested_regd; + rdev->requested_regd = NULL; + spin_unlock(®_requests_lock); - if (regd == NULL) - continue; + if (!regd) + return; - tmp = get_wiphy_regdom(wiphy); - rcu_assign_pointer(wiphy->regd, regd); - rcu_free_regdom(tmp); + tmp = get_wiphy_regdom(wiphy); + rcu_assign_pointer(wiphy->regd, regd); + rcu_free_regdom(tmp); + + for (band = 0; band < NUM_NL80211_BANDS; band++) + handle_band_custom(wiphy, wiphy->bands[band], regd); - for (band = 0; band < NUM_NL80211_BANDS; band++) - handle_band_custom(wiphy, wiphy->bands[band], regd); + reg_process_ht_flags(wiphy); - reg_process_ht_flags(wiphy); + request.wiphy_idx = get_wiphy_idx(wiphy); + request.alpha2[0] = regd->alpha2[0]; + request.alpha2[1] = regd->alpha2[1]; + request.initiator = NL80211_REGDOM_SET_BY_DRIVER; - request.wiphy_idx = get_wiphy_idx(wiphy); - request.alpha2[0] = regd->alpha2[0]; - request.alpha2[1] = regd->alpha2[1]; - request.initiator = NL80211_REGDOM_SET_BY_DRIVER; + nl80211_send_wiphy_reg_change_event(&request); +} - nl80211_send_wiphy_reg_change_event(&request); +static void reg_process_self_managed_hints(void) +{ + struct cfg80211_registered_device *rdev; + + ASSERT_RTNL(); + + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + wiphy_lock(&rdev->wiphy); + reg_process_self_managed_hint(&rdev->wiphy); + wiphy_unlock(&rdev->wiphy); } reg_check_channels(); @@ -3789,14 +3812,21 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd, return -ENODEV; if (!driver_request->intersect) { - if (request_wiphy->regd) + ASSERT_RTNL(); + wiphy_lock(request_wiphy); + if (request_wiphy->regd) { + wiphy_unlock(request_wiphy); return -EALREADY; + } regd = reg_copy_regd(rd); - if (IS_ERR(regd)) + if (IS_ERR(regd)) { + wiphy_unlock(request_wiphy); return PTR_ERR(regd); + } rcu_assign_pointer(request_wiphy->regd, regd); + wiphy_unlock(request_wiphy); reset_regdomains(false, rd); return 0; } @@ -3978,8 +4008,8 @@ int regulatory_set_wiphy_regd(struct wiphy *wiphy, } EXPORT_SYMBOL(regulatory_set_wiphy_regd); -int regulatory_set_wiphy_regd_sync_rtnl(struct wiphy *wiphy, - struct ieee80211_regdomain *rd) +int regulatory_set_wiphy_regd_sync(struct wiphy *wiphy, + struct ieee80211_regdomain *rd) { int ret; @@ -3990,10 +4020,11 @@ int regulatory_set_wiphy_regd_sync_rtnl(struct wiphy *wiphy, return ret; /* process the request immediately */ - reg_process_self_managed_hints(); + reg_process_self_managed_hint(wiphy); + reg_check_channels(); return 0; } -EXPORT_SYMBOL(regulatory_set_wiphy_regd_sync_rtnl); +EXPORT_SYMBOL(regulatory_set_wiphy_regd_sync); void wiphy_regulatory_register(struct wiphy *wiphy) { diff --git a/net/wireless/reg.h b/net/wireless/reg.h index f9e83031a40a5..f3707f7290245 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -63,7 +63,6 @@ unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd, const struct ieee80211_reg_rule *rule); bool reg_last_request_cell_base(void); -const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy); /** * regulatory_hint_found_beacon - hints a beacon was found on a channel diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 1b7fec3b53cdd..019952d4fc7db 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -918,7 +918,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, union iwreq_data wrqu; #endif - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); if (rdev->scan_msg) { nl80211_send_scan_msg(rdev, rdev->scan_msg); @@ -987,9 +987,9 @@ void __cfg80211_scan_done(struct work_struct *wk) rdev = container_of(wk, struct cfg80211_registered_device, scan_done_wk); - rtnl_lock(); + wiphy_lock(&rdev->wiphy); ___cfg80211_scan_done(rdev, true); - rtnl_unlock(); + wiphy_unlock(&rdev->wiphy); } void cfg80211_scan_done(struct cfg80211_scan_request *request, @@ -1022,7 +1022,7 @@ EXPORT_SYMBOL(cfg80211_scan_done); void cfg80211_add_sched_scan_req(struct cfg80211_registered_device *rdev, struct cfg80211_sched_scan_request *req) { - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); list_add_rcu(&req->list, &rdev->sched_scan_req_list); } @@ -1030,7 +1030,7 @@ void cfg80211_add_sched_scan_req(struct cfg80211_registered_device *rdev, static void cfg80211_del_sched_scan_req(struct cfg80211_registered_device *rdev, struct cfg80211_sched_scan_request *req) { - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); list_del_rcu(&req->list); kfree_rcu(req, rcu_head); @@ -1042,7 +1042,7 @@ cfg80211_find_sched_scan_req(struct cfg80211_registered_device *rdev, u64 reqid) struct cfg80211_sched_scan_request *pos; list_for_each_entry_rcu(pos, &rdev->sched_scan_req_list, list, - lockdep_rtnl_is_held()) { + lockdep_is_held(&rdev->wiphy.mtx)) { if (pos->reqid == reqid) return pos; } @@ -1090,7 +1090,7 @@ void cfg80211_sched_scan_results_wk(struct work_struct *work) rdev = container_of(work, struct cfg80211_registered_device, sched_scan_res_wk); - rtnl_lock(); + wiphy_lock(&rdev->wiphy); list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) { if (req->report_results) { req->report_results = false; @@ -1105,7 +1105,7 @@ void cfg80211_sched_scan_results_wk(struct work_struct *work) NL80211_CMD_SCHED_SCAN_RESULTS); } } - rtnl_unlock(); + wiphy_unlock(&rdev->wiphy); } void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid) @@ -1126,23 +1126,23 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid) } EXPORT_SYMBOL(cfg80211_sched_scan_results); -void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy, u64 reqid) +void cfg80211_sched_scan_stopped_locked(struct wiphy *wiphy, u64 reqid) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - ASSERT_RTNL(); + lockdep_assert_held(&wiphy->mtx); trace_cfg80211_sched_scan_stopped(wiphy, reqid); __cfg80211_stop_sched_scan(rdev, reqid, true); } -EXPORT_SYMBOL(cfg80211_sched_scan_stopped_rtnl); +EXPORT_SYMBOL(cfg80211_sched_scan_stopped_locked); void cfg80211_sched_scan_stopped(struct wiphy *wiphy, u64 reqid) { - rtnl_lock(); - cfg80211_sched_scan_stopped_rtnl(wiphy, reqid); - rtnl_unlock(); + wiphy_lock(wiphy); + cfg80211_sched_scan_stopped_locked(wiphy, reqid); + wiphy_unlock(wiphy); } EXPORT_SYMBOL(cfg80211_sched_scan_stopped); @@ -1150,7 +1150,7 @@ int cfg80211_stop_sched_scan_req(struct cfg80211_registered_device *rdev, struct cfg80211_sched_scan_request *req, bool driver_initiated) { - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); if (!driver_initiated) { int err = rdev_sched_scan_stop(rdev, req->dev, req->reqid); @@ -1170,7 +1170,7 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, { struct cfg80211_sched_scan_request *sched_scan_req; - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); sched_scan_req = cfg80211_find_sched_scan_req(rdev, reqid); if (!sched_scan_req) @@ -2774,6 +2774,8 @@ int cfg80211_wext_siwscan(struct net_device *dev, eth_broadcast_addr(creq->bssid); + wiphy_lock(&rdev->wiphy); + rdev->scan_req = creq; err = rdev_scan(rdev, creq); if (err) { @@ -2785,6 +2787,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, creq = NULL; dev_hold(dev); } + wiphy_unlock(&rdev->wiphy); out: kfree(creq); return err; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 38df713f2e2ed..07756ca5e3b59 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -67,7 +67,6 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) struct cfg80211_scan_request *request; int n_channels, err; - ASSERT_RTNL(); ASSERT_WDEV_LOCK(wdev); if (rdev->scan_req || rdev->scan_msg) @@ -233,7 +232,7 @@ void cfg80211_conn_work(struct work_struct *work) u8 bssid_buf[ETH_ALEN], *bssid = NULL; enum nl80211_timeout_reason treason; - rtnl_lock(); + wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) @@ -266,7 +265,7 @@ void cfg80211_conn_work(struct work_struct *work) wdev_unlock(wdev); } - rtnl_unlock(); + wiphy_unlock(&rdev->wiphy); } /* Returned bss is reference counted and must be cleaned up appropriately. */ diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 3ac1f48195d28..043762354a669 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -5,6 +5,7 @@ * * Copyright 2005-2006 Jiri Benc * Copyright 2006 Johannes Berg + * Copyright (C) 2020-2021 Intel Corporation */ #include @@ -104,6 +105,7 @@ static int wiphy_suspend(struct device *dev) rdev->suspend_at = ktime_get_boottime_seconds(); rtnl_lock(); + wiphy_lock(&rdev->wiphy); if (rdev->wiphy.registered) { if (!rdev->wiphy.wowlan_config) { cfg80211_leave_all(rdev); @@ -118,6 +120,7 @@ static int wiphy_suspend(struct device *dev) ret = rdev_suspend(rdev, NULL); } } + wiphy_unlock(&rdev->wiphy); rtnl_unlock(); return ret; @@ -132,8 +135,10 @@ static int wiphy_resume(struct device *dev) cfg80211_bss_age(rdev, ktime_get_boottime_seconds() - rdev->suspend_at); rtnl_lock(); + wiphy_lock(&rdev->wiphy); if (rdev->wiphy.registered && rdev->ops->resume) ret = rdev_resume(rdev); + wiphy_unlock(&rdev->wiphy); rtnl_unlock(); return ret; diff --git a/net/wireless/util.c b/net/wireless/util.c index eab928002cd80..1bf0200f562ab 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -997,7 +997,7 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev) { struct wireless_dev *wdev; - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) cfg80211_process_wdev_events(wdev); @@ -1010,7 +1010,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, int err; enum nl80211_iftype otype = dev->ieee80211_ptr->iftype; - ASSERT_RTNL(); + lockdep_assert_held(&rdev->wiphy.mtx); /* don't support changing VLANs, you just re-create them */ if (otype == NL80211_IFTYPE_AP_VLAN) diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index fd9ad74972fb0..2e35cb78221ee 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -7,7 +7,7 @@ * we directly assign the wireless handlers of wireless interfaces. * * Copyright 2008-2009 Johannes Berg - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation */ #include @@ -253,17 +253,23 @@ int cfg80211_wext_siwrts(struct net_device *dev, u32 orts = wdev->wiphy->rts_threshold; int err; - if (rts->disabled || !rts->fixed) + wiphy_lock(&rdev->wiphy); + if (rts->disabled || !rts->fixed) { wdev->wiphy->rts_threshold = (u32) -1; - else if (rts->value < 0) - return -EINVAL; - else + } else if (rts->value < 0) { + err = -EINVAL; + goto out; + } else { wdev->wiphy->rts_threshold = rts->value; + } err = rdev_set_wiphy_params(rdev, WIPHY_PARAM_RTS_THRESHOLD); + if (err) wdev->wiphy->rts_threshold = orts; +out: + wiphy_unlock(&rdev->wiphy); return err; } EXPORT_WEXT_HANDLER(cfg80211_wext_siwrts); @@ -291,11 +297,13 @@ int cfg80211_wext_siwfrag(struct net_device *dev, u32 ofrag = wdev->wiphy->frag_threshold; int err; - if (frag->disabled || !frag->fixed) + wiphy_lock(&rdev->wiphy); + if (frag->disabled || !frag->fixed) { wdev->wiphy->frag_threshold = (u32) -1; - else if (frag->value < 256) - return -EINVAL; - else { + } else if (frag->value < 256) { + err = -EINVAL; + goto out; + } else { /* Fragment length must be even, so strip LSB. */ wdev->wiphy->frag_threshold = frag->value & ~0x1; } @@ -303,6 +311,8 @@ int cfg80211_wext_siwfrag(struct net_device *dev, err = rdev_set_wiphy_params(rdev, WIPHY_PARAM_FRAG_THRESHOLD); if (err) wdev->wiphy->frag_threshold = ofrag; +out: + wiphy_unlock(&rdev->wiphy); return err; } @@ -337,6 +347,7 @@ static int cfg80211_wext_siwretry(struct net_device *dev, (retry->flags & IW_RETRY_TYPE) != IW_RETRY_LIMIT) return -EINVAL; + wiphy_lock(&rdev->wiphy); if (retry->flags & IW_RETRY_LONG) { wdev->wiphy->retry_long = retry->value; changed |= WIPHY_PARAM_RETRY_LONG; @@ -355,6 +366,7 @@ static int cfg80211_wext_siwretry(struct net_device *dev, wdev->wiphy->retry_short = oshort; wdev->wiphy->retry_long = olong; } + wiphy_unlock(&rdev->wiphy); return err; } @@ -577,15 +589,18 @@ static int cfg80211_wext_siwencode(struct net_device *dev, !rdev->ops->set_default_key) return -EOPNOTSUPP; + wiphy_lock(&rdev->wiphy); idx = erq->flags & IW_ENCODE_INDEX; if (idx == 0) { idx = wdev->wext.default_key; if (idx < 0) idx = 0; - } else if (idx < 1 || idx > 4) - return -EINVAL; - else + } else if (idx < 1 || idx > 4) { + err = -EINVAL; + goto out; + } else { idx--; + } if (erq->flags & IW_ENCODE_DISABLED) remove = true; @@ -599,22 +614,28 @@ static int cfg80211_wext_siwencode(struct net_device *dev, if (!err) wdev->wext.default_key = idx; wdev_unlock(wdev); - return err; + goto out; } memset(¶ms, 0, sizeof(params)); params.key = keybuf; params.key_len = erq->length; - if (erq->length == 5) + if (erq->length == 5) { params.cipher = WLAN_CIPHER_SUITE_WEP40; - else if (erq->length == 13) + } else if (erq->length == 13) { params.cipher = WLAN_CIPHER_SUITE_WEP104; - else if (!remove) - return -EINVAL; + } else if (!remove) { + err = -EINVAL; + goto out; + } + + err = cfg80211_set_encryption(rdev, dev, false, NULL, remove, + wdev->wext.default_key == -1, + idx, ¶ms); +out: + wiphy_unlock(&rdev->wiphy); - return cfg80211_set_encryption(rdev, dev, false, NULL, remove, - wdev->wext.default_key == -1, - idx, ¶ms); + return err; } static int cfg80211_wext_siwencodeext(struct net_device *dev, @@ -754,38 +775,61 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, struct cfg80211_chan_def chandef = { .width = NL80211_CHAN_WIDTH_20_NOHT, }; - int freq; + int freq, ret; + + wiphy_lock(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: - return cfg80211_mgd_wext_siwfreq(dev, info, wextfreq, extra); + ret = cfg80211_mgd_wext_siwfreq(dev, info, wextfreq, extra); + break; case NL80211_IFTYPE_ADHOC: - return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra); + ret = cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra); + break; case NL80211_IFTYPE_MONITOR: freq = cfg80211_wext_freq(wextfreq); - if (freq < 0) - return freq; - if (freq == 0) - return -EINVAL; + if (freq < 0) { + ret = freq; + break; + } + if (freq == 0) { + ret = -EINVAL; + break; + } chandef.center_freq1 = freq; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); - if (!chandef.chan) - return -EINVAL; - return cfg80211_set_monitor_channel(rdev, &chandef); + if (!chandef.chan) { + ret = -EINVAL; + break; + } + ret = cfg80211_set_monitor_channel(rdev, &chandef); + break; case NL80211_IFTYPE_MESH_POINT: freq = cfg80211_wext_freq(wextfreq); - if (freq < 0) - return freq; - if (freq == 0) - return -EINVAL; + if (freq < 0) { + ret = freq; + break; + } + if (freq == 0) { + ret = -EINVAL; + break; + } chandef.center_freq1 = freq; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); - if (!chandef.chan) - return -EINVAL; - return cfg80211_set_mesh_channel(rdev, wdev, &chandef); + if (!chandef.chan) { + ret = -EINVAL; + break; + } + ret = cfg80211_set_mesh_channel(rdev, wdev, &chandef); + break; default: - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + break; } + + wiphy_unlock(&rdev->wiphy); + + return ret; } static int cfg80211_wext_giwfreq(struct net_device *dev, @@ -797,24 +841,35 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, struct cfg80211_chan_def chandef = {}; int ret; + wiphy_lock(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: - return cfg80211_mgd_wext_giwfreq(dev, info, freq, extra); + ret = cfg80211_mgd_wext_giwfreq(dev, info, freq, extra); + break; case NL80211_IFTYPE_ADHOC: - return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra); + ret = cfg80211_ibss_wext_giwfreq(dev, info, freq, extra); + break; case NL80211_IFTYPE_MONITOR: - if (!rdev->ops->get_channel) - return -EINVAL; + if (!rdev->ops->get_channel) { + ret = -EINVAL; + break; + } ret = rdev_get_channel(rdev, wdev, &chandef); if (ret) - return ret; + break; freq->m = chandef.chan->center_freq; freq->e = 6; - return 0; + ret = 0; + break; default: - return -EINVAL; + ret = -EINVAL; + break; } + + wiphy_unlock(&rdev->wiphy); + + return ret; } static int cfg80211_wext_siwtxpower(struct net_device *dev, @@ -825,6 +880,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); enum nl80211_tx_power_setting type; int dbm = 0; + int ret; if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) return -EINVAL; @@ -866,7 +922,11 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, return 0; } - return rdev_set_tx_power(rdev, wdev, type, DBM_TO_MBM(dbm)); + wiphy_lock(&rdev->wiphy); + ret = rdev_set_tx_power(rdev, wdev, type, DBM_TO_MBM(dbm)); + wiphy_unlock(&rdev->wiphy); + + return ret; } static int cfg80211_wext_giwtxpower(struct net_device *dev, @@ -885,7 +945,9 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev, if (!rdev->ops->get_tx_power) return -EOPNOTSUPP; + wiphy_lock(&rdev->wiphy); err = rdev_get_tx_power(rdev, wdev, &val); + wiphy_unlock(&rdev->wiphy); if (err) return err; @@ -1125,7 +1187,9 @@ static int cfg80211_wext_siwpower(struct net_device *dev, timeout = wrq->value / 1000; } + wiphy_lock(&rdev->wiphy); err = rdev_set_power_mgmt(rdev, dev, ps, timeout); + wiphy_unlock(&rdev->wiphy); if (err) return err; @@ -1156,7 +1220,7 @@ static int cfg80211_wext_siwrate(struct net_device *dev, struct cfg80211_bitrate_mask mask; u32 fixed, maxrate; struct ieee80211_supported_band *sband; - int band, ridx; + int band, ridx, ret; bool match = false; if (!rdev->ops->set_bitrate_mask) @@ -1195,7 +1259,11 @@ static int cfg80211_wext_siwrate(struct net_device *dev, if (!match) return -EINVAL; - return rdev_set_bitrate_mask(rdev, dev, NULL, &mask); + wiphy_lock(&rdev->wiphy); + ret = rdev_set_bitrate_mask(rdev, dev, NULL, &mask); + wiphy_unlock(&rdev->wiphy); + + return ret; } static int cfg80211_wext_giwrate(struct net_device *dev, @@ -1224,7 +1292,9 @@ static int cfg80211_wext_giwrate(struct net_device *dev, if (err) return err; + wiphy_lock(&rdev->wiphy); err = rdev_get_station(rdev, dev, addr, &sinfo); + wiphy_unlock(&rdev->wiphy); if (err) return err; @@ -1249,6 +1319,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) static struct iw_statistics wstats; static struct station_info sinfo = {}; u8 bssid[ETH_ALEN]; + int ret; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) return NULL; @@ -1267,7 +1338,11 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) memset(&sinfo, 0, sizeof(sinfo)); - if (rdev_get_station(rdev, dev, bssid, &sinfo)) + wiphy_lock(&rdev->wiphy); + ret = rdev_get_station(rdev, dev, bssid, &sinfo); + wiphy_unlock(&rdev->wiphy); + + if (ret) return NULL; memset(&wstats, 0, sizeof(wstats)); @@ -1318,15 +1393,24 @@ static int cfg80211_wext_siwap(struct net_device *dev, struct sockaddr *ap_addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + int ret; + wiphy_lock(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - return cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra); + ret = cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra); + break; case NL80211_IFTYPE_STATION: - return cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra); + ret = cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra); + break; default: - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + break; } + wiphy_unlock(&rdev->wiphy); + + return ret; } static int cfg80211_wext_giwap(struct net_device *dev, @@ -1334,15 +1418,24 @@ static int cfg80211_wext_giwap(struct net_device *dev, struct sockaddr *ap_addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + int ret; + wiphy_lock(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - return cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra); + ret = cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra); + break; case NL80211_IFTYPE_STATION: - return cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra); + ret = cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra); + break; default: - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + break; } + wiphy_unlock(&rdev->wiphy); + + return ret; } static int cfg80211_wext_siwessid(struct net_device *dev, @@ -1350,15 +1443,24 @@ static int cfg80211_wext_siwessid(struct net_device *dev, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + int ret; + wiphy_lock(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - return cfg80211_ibss_wext_siwessid(dev, info, data, ssid); + ret = cfg80211_ibss_wext_siwessid(dev, info, data, ssid); + break; case NL80211_IFTYPE_STATION: - return cfg80211_mgd_wext_siwessid(dev, info, data, ssid); + ret = cfg80211_mgd_wext_siwessid(dev, info, data, ssid); + break; default: - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + break; } + wiphy_unlock(&rdev->wiphy); + + return ret; } static int cfg80211_wext_giwessid(struct net_device *dev, @@ -1366,18 +1468,27 @@ static int cfg80211_wext_giwessid(struct net_device *dev, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + int ret; data->flags = 0; data->length = 0; + wiphy_lock(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - return cfg80211_ibss_wext_giwessid(dev, info, data, ssid); + ret = cfg80211_ibss_wext_giwessid(dev, info, data, ssid); + break; case NL80211_IFTYPE_STATION: - return cfg80211_mgd_wext_giwessid(dev, info, data, ssid); + ret = cfg80211_mgd_wext_giwessid(dev, info, data, ssid); + break; default: - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + break; } + wiphy_unlock(&rdev->wiphy); + + return ret; } static int cfg80211_wext_siwpmksa(struct net_device *dev, @@ -1388,6 +1499,7 @@ static int cfg80211_wext_siwpmksa(struct net_device *dev, struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_pmksa cfg_pmksa; struct iw_pmksa *pmksa = (struct iw_pmksa *)extra; + int ret; memset(&cfg_pmksa, 0, sizeof(struct cfg80211_pmksa)); @@ -1397,28 +1509,39 @@ static int cfg80211_wext_siwpmksa(struct net_device *dev, cfg_pmksa.bssid = pmksa->bssid.sa_data; cfg_pmksa.pmkid = pmksa->pmkid; + wiphy_lock(&rdev->wiphy); switch (pmksa->cmd) { case IW_PMKSA_ADD: - if (!rdev->ops->set_pmksa) - return -EOPNOTSUPP; - - return rdev_set_pmksa(rdev, dev, &cfg_pmksa); + if (!rdev->ops->set_pmksa) { + ret = -EOPNOTSUPP; + break; + } + ret = rdev_set_pmksa(rdev, dev, &cfg_pmksa); + break; case IW_PMKSA_REMOVE: - if (!rdev->ops->del_pmksa) - return -EOPNOTSUPP; - - return rdev_del_pmksa(rdev, dev, &cfg_pmksa); + if (!rdev->ops->del_pmksa) { + ret = -EOPNOTSUPP; + break; + } + ret = rdev_del_pmksa(rdev, dev, &cfg_pmksa); + break; case IW_PMKSA_FLUSH: - if (!rdev->ops->flush_pmksa) - return -EOPNOTSUPP; - - return rdev_flush_pmksa(rdev, dev); + if (!rdev->ops->flush_pmksa) { + ret = -EOPNOTSUPP; + break; + } + ret = rdev_flush_pmksa(rdev, dev); + break; default: - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + break; } + wiphy_unlock(&rdev->wiphy); + + return ret; } #define DEFINE_WEXT_COMPAT_STUB(func, type) \ diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 73df23570d43a..193a18a531423 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -3,7 +3,7 @@ * cfg80211 wext compat for managed mode. * * Copyright 2009 Johannes Berg - * Copyright (C) 2009 Intel Corporation. All rights reserved. + * Copyright (C) 2009, 2020-2021 Intel Corporation. */ #include @@ -379,6 +379,7 @@ int cfg80211_wext_siwmlme(struct net_device *dev, if (mlme->addr.sa_family != ARPHRD_ETHER) return -EINVAL; + wiphy_lock(&rdev->wiphy); wdev_lock(wdev); switch (mlme->cmd) { case IW_MLME_DEAUTH: @@ -390,6 +391,7 @@ int cfg80211_wext_siwmlme(struct net_device *dev, break; } wdev_unlock(wdev); + wiphy_unlock(&rdev->wiphy); return err; } -- GitLab From 5122565188bae59d507d90a9a9fd2fd6107f4439 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 21 Jan 2021 17:16:22 +0100 Subject: [PATCH 1477/2012] wext: fix NULL-ptr-dereference with cfg80211's lack of commit() Since cfg80211 doesn't implement commit, we never really cared about that code there (and it's configured out w/o CONFIG_WIRELESS_EXT). After all, since it has no commit, it shouldn't return -EIWCOMMIT to indicate commit is needed. However, EIWCOMMIT is actually an alias for EINPROGRESS, which _can_ happen if e.g. we try to change the frequency but we're already in the process of connecting to some network, and drivers could return that value (or even cfg80211 itself might). This then causes us to crash because dev->wireless_handlers is NULL but we try to check dev->wireless_handlers->standard[0]. Fix this by also checking dev->wireless_handlers. Also simplify the code a little bit. Cc: stable@vger.kernel.org Reported-by: syzbot+444248c79e117bc99f46@syzkaller.appspotmail.com Reported-by: syzbot+8b2a88a09653d4084179@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20210121171621.2076e4a37d5a.I5d9c72220fe7bb133fb718751da0180a57ecba4e@changeid Signed-off-by: Johannes Berg --- net/wireless/wext-core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 69102fda9ebd4..76a80a41615be 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -896,8 +896,9 @@ out: int call_commit_handler(struct net_device *dev) { #ifdef CONFIG_WIRELESS_EXT - if ((netif_running(dev)) && - (dev->wireless_handlers->standard[0] != NULL)) + if (netif_running(dev) && + dev->wireless_handlers && + dev->wireless_handlers->standard[0]) /* Call the commit handler on the driver */ return dev->wireless_handlers->standard[0](dev, NULL, NULL, NULL); -- GitLab From 054c9939b4800a91475d8d89905827bf9e1ad97a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 Jan 2021 17:11:16 +0100 Subject: [PATCH 1478/2012] mac80211: pause TX while changing interface type syzbot reported a crash that happened when changing the interface type around a lot, and while it might have been easy to fix just the symptom there, a little deeper investigation found that really the reason is that we allowed packets to be transmitted while in the middle of changing the interface type. Disallow TX by stopping the queues while changing the type. Fixes: 34d4bc4d41d2 ("mac80211: support runtime interface type changes") Reported-by: syzbot+d7a3b15976bf7de2238a@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20210122171115.b321f98f4d4f.I6997841933c17b093535c31d29355be3c0c39628@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/iface.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8bf9c0e974d62..8e281c2e644d5 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1078,6 +1078,7 @@ enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_FLUSH, IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN, IEEE80211_QUEUE_STOP_REASON_RESERVE_TID, + IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE, IEEE80211_QUEUE_STOP_REASONS, }; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 3b9ec4ef81c3b..b31417f40bd56 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1617,6 +1617,10 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, if (ret) return ret; + ieee80211_stop_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE); + synchronize_net(); + ieee80211_do_stop(sdata, false); ieee80211_teardown_sdata(sdata); @@ -1639,6 +1643,8 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, err = ieee80211_do_open(&sdata->wdev, false); WARN(err, "type change: do_open returned %d", err); + ieee80211_wake_vif_queues(local, sdata, + IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE); return ret; } -- GitLab From 81f153faacd04c049e5482d6ff33daddc30ed44e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Jan 2021 11:54:09 +0100 Subject: [PATCH 1479/2012] staging: rtl8723bs: fix wireless regulatory API misuse This code ends up calling wiphy_apply_custom_regulatory(), for which we document that it should be called before wiphy_register(). This driver doesn't do that, but calls it from ndo_open() with the RTNL held, which caused deadlocks. Since the driver just registers static regdomain data and then the notifier applies the channel changes if any, there's no reason for it to call this in ndo_open(), move it earlier to fix the deadlock. Reported-and-tested-by: Hans de Goede Fixes: 51d62f2f2c50 ("cfg80211: Save the regulatory domain with a lock") Link: https://lore.kernel.org/r/20210126115409.d5fd6f8fe042.Ib5823a6feb2e2aa01ca1a565d2505367f38ad246@changeid Acked-by: Greg Kroah-Hartman Signed-off-by: Johannes Berg --- drivers/staging/rtl8723bs/include/rtw_wifi_regd.h | 6 +++--- drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 6 +++--- drivers/staging/rtl8723bs/os_dep/wifi_regd.c | 10 +++------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/staging/rtl8723bs/include/rtw_wifi_regd.h b/drivers/staging/rtl8723bs/include/rtw_wifi_regd.h index ab5a8627d3712..f798b0c744a4f 100644 --- a/drivers/staging/rtl8723bs/include/rtw_wifi_regd.h +++ b/drivers/staging/rtl8723bs/include/rtw_wifi_regd.h @@ -20,9 +20,9 @@ enum country_code_type_t { COUNTRY_CODE_MAX }; -int rtw_regd_init(struct adapter *padapter, - void (*reg_notifier)(struct wiphy *wiphy, - struct regulatory_request *request)); +void rtw_regd_init(struct wiphy *wiphy, + void (*reg_notifier)(struct wiphy *wiphy, + struct regulatory_request *request)); void rtw_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request); diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index bf14172361618..11032316c53dc 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -3211,9 +3211,6 @@ void rtw_cfg80211_init_wiphy(struct adapter *padapter) rtw_cfg80211_init_ht_capab(&bands->ht_cap, NL80211_BAND_2GHZ, rf_type); } - /* init regulary domain */ - rtw_regd_init(padapter, rtw_reg_notifier); - /* copy mac_addr to wiphy */ memcpy(wiphy->perm_addr, padapter->eeprompriv.mac_addr, ETH_ALEN); @@ -3328,6 +3325,9 @@ int rtw_wdev_alloc(struct adapter *padapter, struct device *dev) *((struct adapter **)wiphy_priv(wiphy)) = padapter; rtw_cfg80211_preinit_wiphy(padapter, wiphy); + /* init regulary domain */ + rtw_regd_init(wiphy, rtw_reg_notifier); + ret = wiphy_register(wiphy); if (ret < 0) { DBG_8192C("Couldn't register wiphy device\n"); diff --git a/drivers/staging/rtl8723bs/os_dep/wifi_regd.c b/drivers/staging/rtl8723bs/os_dep/wifi_regd.c index 578b9f734231e..2833fc6901e6e 100644 --- a/drivers/staging/rtl8723bs/os_dep/wifi_regd.c +++ b/drivers/staging/rtl8723bs/os_dep/wifi_regd.c @@ -139,15 +139,11 @@ static void _rtw_regd_init_wiphy(struct rtw_regulatory *reg, _rtw_reg_apply_flags(wiphy); } -int rtw_regd_init(struct adapter *padapter, - void (*reg_notifier)(struct wiphy *wiphy, - struct regulatory_request *request)) +void rtw_regd_init(struct wiphy *wiphy, + void (*reg_notifier)(struct wiphy *wiphy, + struct regulatory_request *request)) { - struct wiphy *wiphy = padapter->rtw_wdev->wiphy; - _rtw_regd_init_wiphy(NULL, wiphy, reg_notifier); - - return 0; } void rtw_reg_notifier(struct wiphy *wiphy, struct regulatory_request *request) -- GitLab From ef99a60ffd9b918354e038bc5e61f007ff7e901d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 17 Jan 2021 09:30:15 +0000 Subject: [PATCH 1480/2012] drm/i915/gt: Clear CACHE_MODE prior to clearing residuals Since we do a bare context switch with no restore, the clear residual kernel runs on dirty state, and we must be careful to avoid executing with bad state from context registers inherited from a malicious client. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2955 Fixes: 09aa9e45863e ("drm/i915/gt: Restore clear-residual mitigations for Ivybridge, Baytrail") Testcase: igt/gem_ctx_isolation # ivb,vlv Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Akeem G Abodunrin Reviewed-by: Akeem G Abodunrin Link: https://patchwork.freedesktop.org/patch/msgid/20210117093015.29143-1-chris@chris-wilson.co.uk (cherry picked from commit ace44e13e577c2ae59980e9a6ff5ca253b1cf831) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/gen7_renderclear.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c index 94465374ca2fe..e961ad6a31294 100644 --- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -390,6 +390,16 @@ static void emit_batch(struct i915_vma * const vma, &cb_kernel_ivb, desc_count); + /* Reset inherited context registers */ + gen7_emit_pipeline_invalidate(&cmds); + batch_add(&cmds, MI_LOAD_REGISTER_IMM(2)); + batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_0_GEN7)); + batch_add(&cmds, 0xffff0000); + batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_1)); + batch_add(&cmds, 0xffff0000 | PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + gen7_emit_pipeline_flush(&cmds); + + /* Switch to the media pipeline and our base address */ gen7_emit_pipeline_invalidate(&cmds); batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); batch_add(&cmds, MI_NOOP); @@ -399,9 +409,11 @@ static void emit_batch(struct i915_vma * const vma, gen7_emit_state_base_address(&cmds, descriptors); gen7_emit_pipeline_invalidate(&cmds); + /* Set the clear-residual kernel state */ gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0); gen7_emit_interface_descriptor_load(&cmds, descriptors, desc_count); + /* Execute the kernel on all HW threads */ for (i = 0; i < num_primitives(bv); i++) gen7_emit_media_object(&cmds, i); -- GitLab From a2a5f5628e5494ca9353f761f7fe783dfa82fb9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 7 Dec 2020 22:35:11 +0200 Subject: [PATCH 1481/2012] drm/i915: Fix ICL MG PHY vswing handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MH PHY vswing table does have all the entries these days. Get rid of the old hacks in the code which claim otherwise. This hack was totally bogus anyway. The correct way to handle the lack of those two entries would have been to declare our max vswing and pre-emph to both be level 2. Cc: José Roberto de Souza Cc: Clinton Taylor Fixes: 9f7ffa297978 ("drm/i915/tc/icl: Update TC vswing tables") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20201207203512.1718-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak Reviewed-by: José Roberto de Souza (cherry picked from commit 5ec346476e795089b7dac8ab9dcee30c8d80ad84) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_ddi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index d5ace48b1acee..bf17365857caf 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2755,12 +2755,11 @@ static void icl_mg_phy_ddi_vswing_sequence(struct intel_encoder *encoder, u32 val; ddi_translations = icl_get_mg_buf_trans(encoder, crtc_state, &n_entries); - /* The table does not have values for level 3 and level 9. */ - if (level >= n_entries || level == 3 || level == 9) { + if (level >= n_entries) { drm_dbg_kms(&dev_priv->drm, "DDI translation not found for level %d. Using %d instead.", - level, n_entries - 2); - level = n_entries - 2; + level, n_entries - 1); + level = n_entries - 1; } /* Set MG_TX_LINK_PARAMS cri_use_fs32 to 0. */ -- GitLab From 8f6d08c9af284d74276da6681348e4673f13caea Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Thu, 21 Jan 2021 16:19:35 +0000 Subject: [PATCH 1482/2012] drm/i915: Check for all subplatform bits Current code is checking only 2 bits in the subplatform, but actually 3 bits are allocated for the field. Check all 3 bits. Fixes: 805446c8347c ("drm/i915: Introduce concept of a sub-platform") Cc: Tvrtko Ursulin Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20210121161936.746591-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 27b695ee1af9bb36605e67055874ec081306ac28) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 632c713227dc7..c6964f82a1bb6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1346,7 +1346,7 @@ intel_subplatform(const struct intel_runtime_info *info, enum intel_platform p) { const unsigned int pi = __platform_mask_index(info, p); - return info->platform_mask[pi] & INTEL_SUBPLATFORM_BITS; + return info->platform_mask[pi] & ((1 << INTEL_SUBPLATFORM_BITS) - 1); } static __always_inline bool -- GitLab From 3d480fe1befa0ef434f5c25199e7d45c26870555 Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Thu, 21 Jan 2021 17:56:40 -0800 Subject: [PATCH 1483/2012] drm/i915/selftest: Fix potential memory leak Object out is not released on path that no VMA instance found. The root cause is jumping to an unexpected label on the error path. Fixes: a47e788c2310 ("drm/i915/selftests: Exercise CS TLB invalidation") Signed-off-by: Pan Bian Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20210122015640.16002-1-bianpan2016@163.com (cherry picked from commit 2b015017d5cb01477a79ca184ac25c247d664568) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index c53a222e3dece..713770fb2b92d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1880,7 +1880,7 @@ static int igt_cs_tlb(void *arg) vma = i915_vma_instance(out, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); - goto out_put_batch; + goto out_put_out; } err = i915_vma_pin(vma, 0, 0, -- GitLab From f6e98a1809faa02f40e0d089d6cfc1aa372a34c0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 Jan 2021 23:28:07 +0000 Subject: [PATCH 1484/2012] drm/i915: Always flush the active worker before returning from the wait The first thing the active retirement worker does is decrement the i915_active count. The first thing we do during i915_active_wait is try to increment the i915_active count, but only if already active [non-zero]. The wait may see that the retirement is already started and so marked the i915_active as idle, and skip waiting for the retirement handler. However, the caller of i915_active_wait may immediately free the i915_active upon returning (e.g. i915_vma_destroy) so we must not return before the concurrent access from the worker is completed. We must always flush the worker. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2473 Fixes: 274cbf20fd10 ("drm/i915: Push the i915_active.retire into a worker") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Tvrtko Ursulin Cc: # v5.5+ Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210121232807.16618-1-chris@chris-wilson.co.uk (cherry picked from commit 977a372e972cb42799746c284035a33c64ebace9) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_active.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 10a865f3dc09a..9ed19b8bca600 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -631,24 +631,26 @@ static int flush_lazy_signals(struct i915_active *ref) int __i915_active_wait(struct i915_active *ref, int state) { - int err; - might_sleep(); - if (!i915_active_acquire_if_busy(ref)) - return 0; - /* Any fence added after the wait begins will not be auto-signaled */ - err = flush_lazy_signals(ref); - i915_active_release(ref); - if (err) - return err; + if (i915_active_acquire_if_busy(ref)) { + int err; - if (!i915_active_is_idle(ref) && - ___wait_var_event(ref, i915_active_is_idle(ref), - state, 0, 0, schedule())) - return -EINTR; + err = flush_lazy_signals(ref); + i915_active_release(ref); + if (err) + return err; + if (___wait_var_event(ref, i915_active_is_idle(ref), + state, 0, 0, schedule())) + return -EINTR; + } + + /* + * After the wait is complete, the caller may free the active. + * We have to flush any concurrent retirement before returning. + */ flush_work(&ref->work); return 0; } -- GitLab From 489140b5ba2e7cc4b853c29e0591895ddb462a82 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 Jan 2021 12:50:33 +0000 Subject: [PATCH 1485/2012] drm/i915/gt: Always try to reserve GGTT address 0x0 Since writing to address 0 is a very common mistake, let's try to avoid putting anything sensitive there. References: https://gitlab.freedesktop.org/drm/intel/-/issues/2989 Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210125125033.23656-1-chris@chris-wilson.co.uk Cc: stable@vger.kernel.org (cherry picked from commit 56b429cc584c6ed8b895d8d8540959655db1ff73) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 47 +++++++++++++++++++++------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index cf94525be2c18..db8c66dde6558 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -526,16 +526,39 @@ static int init_ggtt(struct i915_ggtt *ggtt) mutex_init(&ggtt->error_mutex); if (ggtt->mappable_end) { - /* Reserve a mappable slot for our lockless error capture */ - ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, - &ggtt->error_capture, - PAGE_SIZE, 0, - I915_COLOR_UNEVICTABLE, - 0, ggtt->mappable_end, - DRM_MM_INSERT_LOW); - if (ret) - return ret; + /* + * Reserve a mappable slot for our lockless error capture. + * + * We strongly prefer taking address 0x0 in order to protect + * other critical buffers against accidental overwrites, + * as writing to address 0 is a very common mistake. + * + * Since 0 may already be in use by the system (e.g. the BIOS + * framebuffer), we let the reservation fail quietly and hope + * 0 remains reserved always. + * + * If we fail to reserve 0, and then fail to find any space + * for an error-capture, remain silent. We can afford not + * to reserve an error_capture node as we have fallback + * paths, and we trust that 0 will remain reserved. However, + * the only likely reason for failure to insert is a driver + * bug, which we expect to cause other failures... + */ + ggtt->error_capture.size = I915_GTT_PAGE_SIZE; + ggtt->error_capture.color = I915_COLOR_UNEVICTABLE; + if (drm_mm_reserve_node(&ggtt->vm.mm, &ggtt->error_capture)) + drm_mm_insert_node_in_range(&ggtt->vm.mm, + &ggtt->error_capture, + ggtt->error_capture.size, 0, + ggtt->error_capture.color, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); } + if (drm_mm_node_allocated(&ggtt->error_capture)) + drm_dbg(&ggtt->vm.i915->drm, + "Reserved GGTT:[%llx, %llx] for use by error capture\n", + ggtt->error_capture.start, + ggtt->error_capture.start + ggtt->error_capture.size); /* * The upper portion of the GuC address space has a sizeable hole @@ -548,9 +571,9 @@ static int init_ggtt(struct i915_ggtt *ggtt) /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { - drm_dbg_kms(&ggtt->vm.i915->drm, - "clearing unused GTT space: [%lx, %lx]\n", - hole_start, hole_end); + drm_dbg(&ggtt->vm.i915->drm, + "clearing unused GTT space: [%lx, %lx]\n", + hole_start, hole_end); ggtt->vm.clear_range(&ggtt->vm, hole_start, hole_end - hole_start); } -- GitLab From 12bb3f7f1b03d5913b3f9d4236a488aa7774dfe9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Jan 2021 16:00:24 +0100 Subject: [PATCH 1486/2012] futex: Ensure the correct return value from futex_lock_pi() In case that futex_lock_pi() was aborted by a signal or a timeout and the task returned without acquiring the rtmutex, but is the designated owner of the futex due to a concurrent futex_unlock_pi() fixup_owner() is invoked to establish consistent state. In that case it invokes fixup_pi_state_owner() which in turn tries to acquire the rtmutex again. If that succeeds then it does not propagate this success to fixup_owner() and futex_lock_pi() returns -EINTR or -ETIMEOUT despite having the futex locked. Return success from fixup_pi_state_owner() in all cases where the current task owns the rtmutex and therefore the futex and propagate it correctly through fixup_owner(). Fixup the other callsite which does not expect a positive return value. Fixes: c1e2f0eaf015 ("futex: Avoid violating the 10th rule of futex") Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org --- kernel/futex.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index c47d1015d7591..d5e61c2e865e4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2373,8 +2373,8 @@ retry: } if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { - /* We got the lock after all, nothing to fix. */ - ret = 0; + /* We got the lock. pi_state is correct. Tell caller. */ + ret = 1; goto out_unlock; } @@ -2402,7 +2402,7 @@ retry: * We raced against a concurrent self; things are * already fixed up. Nothing to do. */ - ret = 0; + ret = 1; goto out_unlock; } newowner = argowner; @@ -2448,7 +2448,7 @@ retry: raw_spin_unlock(&newowner->pi_lock); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - return 0; + return argowner == current; /* * In order to reschedule or handle a page fault, we need to drop the @@ -2490,7 +2490,7 @@ handle_err: * Check if someone else fixed it for us: */ if (pi_state->owner != oldowner) { - ret = 0; + ret = argowner == current; goto out_unlock; } @@ -2523,8 +2523,6 @@ static long futex_wait_restart(struct restart_block *restart); */ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) { - int ret = 0; - if (locked) { /* * Got the lock. We might not be the anticipated owner if we @@ -2535,8 +2533,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) * stable state, anything else needs more attention. */ if (q->pi_state->owner != current) - ret = fixup_pi_state_owner(uaddr, q, current); - return ret ? ret : locked; + return fixup_pi_state_owner(uaddr, q, current); + return 1; } /* @@ -2547,10 +2545,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) * Another speculative read; pi_state->owner == current is unstable * but needs our attention. */ - if (q->pi_state->owner == current) { - ret = fixup_pi_state_owner(uaddr, q, NULL); - return ret; - } + if (q->pi_state->owner == current) + return fixup_pi_state_owner(uaddr, q, NULL); /* * Paranoia check. If we did not take the lock, then we should not be @@ -2563,7 +2559,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) q->pi_state->owner); } - return ret; + return 0; } /** @@ -3261,7 +3257,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); - if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { + if (ret < 0 && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { pi_state = q.pi_state; get_pi_state(pi_state); } @@ -3271,6 +3267,11 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, */ put_pi_state(q.pi_state); spin_unlock(q.lock_ptr); + /* + * Adjust the return value. It's either -EFAULT or + * success (1) but the caller expects 0 for success. + */ + ret = ret < 0 ? ret : 0; } } else { struct rt_mutex *pi_mutex; -- GitLab From 04b79c55201f02ffd675e1231d731365e335c307 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 Jan 2021 16:06:10 +0100 Subject: [PATCH 1487/2012] futex: Replace pointless printk in fixup_owner() If that unexpected case of inconsistent arguments ever happens then the futex state is left completely inconsistent and the printk is not really helpful. Replace it with a warning and make the state consistent. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org --- kernel/futex.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index d5e61c2e865e4..5dc8f893d5235 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2550,14 +2550,10 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) /* * Paranoia check. If we did not take the lock, then we should not be - * the owner of the rt_mutex. + * the owner of the rt_mutex. Warn and establish consistent state. */ - if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) { - printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " - "pi-state %p\n", ret, - q->pi_state->pi_mutex.owner, - q->pi_state->owner); - } + if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current)) + return fixup_pi_state_owner(uaddr, q, current); return 0; } -- GitLab From c5cade200ab9a2a3be9e7f32a752c8d86b502ec7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 Jan 2021 15:21:35 +0100 Subject: [PATCH 1488/2012] futex: Provide and use pi_state_update_owner() Updating pi_state::owner is done at several places with the same code. Provide a function for it and use that at the obvious places. This is also a preparation for a bug fix to avoid yet another copy of the same code or alternatively introducing a completely unpenetratable mess of gotos. Originally-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org --- kernel/futex.c | 66 +++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 5dc8f893d5235..7837f9e561fae 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -763,6 +763,29 @@ static struct futex_pi_state *alloc_pi_state(void) return pi_state; } +static void pi_state_update_owner(struct futex_pi_state *pi_state, + struct task_struct *new_owner) +{ + struct task_struct *old_owner = pi_state->owner; + + lockdep_assert_held(&pi_state->pi_mutex.wait_lock); + + if (old_owner) { + raw_spin_lock(&old_owner->pi_lock); + WARN_ON(list_empty(&pi_state->list)); + list_del_init(&pi_state->list); + raw_spin_unlock(&old_owner->pi_lock); + } + + if (new_owner) { + raw_spin_lock(&new_owner->pi_lock); + WARN_ON(!list_empty(&pi_state->list)); + list_add(&pi_state->list, &new_owner->pi_state_list); + pi_state->owner = new_owner; + raw_spin_unlock(&new_owner->pi_lock); + } +} + static void get_pi_state(struct futex_pi_state *pi_state) { WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount)); @@ -1521,26 +1544,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ ret = -EINVAL; } - if (ret) - goto out_unlock; - - /* - * This is a point of no return; once we modify the uval there is no - * going back and subsequent operations must not fail. - */ - - raw_spin_lock(&pi_state->owner->pi_lock); - WARN_ON(list_empty(&pi_state->list)); - list_del_init(&pi_state->list); - raw_spin_unlock(&pi_state->owner->pi_lock); - - raw_spin_lock(&new_owner->pi_lock); - WARN_ON(!list_empty(&pi_state->list)); - list_add(&pi_state->list, &new_owner->pi_state_list); - pi_state->owner = new_owner; - raw_spin_unlock(&new_owner->pi_lock); - - postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); + if (!ret) { + /* + * This is a point of no return; once we modified the uval + * there is no going back and subsequent operations must + * not fail. + */ + pi_state_update_owner(pi_state, new_owner); + postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); + } out_unlock: raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); @@ -2433,19 +2445,7 @@ retry: * We fixed up user space. Now we need to fix the pi_state * itself. */ - if (pi_state->owner != NULL) { - raw_spin_lock(&pi_state->owner->pi_lock); - WARN_ON(list_empty(&pi_state->list)); - list_del_init(&pi_state->list); - raw_spin_unlock(&pi_state->owner->pi_lock); - } - - pi_state->owner = newowner; - - raw_spin_lock(&newowner->pi_lock); - WARN_ON(!list_empty(&pi_state->list)); - list_add(&pi_state->list, &newowner->pi_state_list); - raw_spin_unlock(&newowner->pi_lock); + pi_state_update_owner(pi_state, newowner); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); return argowner == current; -- GitLab From 2156ac1934166d6deb6cd0f6ffc4c1076ec63697 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Jan 2021 11:32:07 +0100 Subject: [PATCH 1489/2012] rtmutex: Remove unused argument from rt_mutex_proxy_unlock() Nothing uses the argument. Remove it as preparation to use pi_state_update_owner(). Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org --- kernel/futex.c | 2 +- kernel/locking/rtmutex.c | 3 +-- kernel/locking/rtmutex_common.h | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 7837f9e561fae..cfca221c1bc59 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -818,7 +818,7 @@ static void put_pi_state(struct futex_pi_state *pi_state) list_del_init(&pi_state->list); raw_spin_unlock(&owner->pi_lock); } - rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner); + rt_mutex_proxy_unlock(&pi_state->pi_mutex); raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); } diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index cfdd5b93264d7..2f8cd616d3b29 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1716,8 +1716,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock, * possible because it belongs to the pi_state which is about to be freed * and it is not longer visible to other tasks. */ -void rt_mutex_proxy_unlock(struct rt_mutex *lock, - struct task_struct *proxy_owner) +void rt_mutex_proxy_unlock(struct rt_mutex *lock) { debug_rt_mutex_proxy_unlock(lock); rt_mutex_set_owner(lock, NULL); diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index d1d62f942be22..ca6fb489007b6 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -133,8 +133,7 @@ enum rtmutex_chainwalk { extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, struct task_struct *proxy_owner); -extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, - struct task_struct *proxy_owner); +extern void rt_mutex_proxy_unlock(struct rt_mutex *lock); extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, -- GitLab From 6ccc84f917d33312eb2846bd7b567639f585ad6d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Jan 2021 11:35:19 +0100 Subject: [PATCH 1490/2012] futex: Use pi_state_update_owner() in put_pi_state() No point in open coding it. This way it gains the extra sanity checks. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org --- kernel/futex.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index cfca221c1bc59..a0fe63c342487 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -808,16 +808,10 @@ static void put_pi_state(struct futex_pi_state *pi_state) * and has cleaned up the pi_state already */ if (pi_state->owner) { - struct task_struct *owner; unsigned long flags; raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); - owner = pi_state->owner; - if (owner) { - raw_spin_lock(&owner->pi_lock); - list_del_init(&pi_state->list); - raw_spin_unlock(&owner->pi_lock); - } + pi_state_update_owner(pi_state, NULL); rt_mutex_proxy_unlock(&pi_state->pi_mutex); raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); } -- GitLab From f2dac39d93987f7de1e20b3988c8685523247ae2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 Jan 2021 16:26:38 +0100 Subject: [PATCH 1491/2012] futex: Simplify fixup_pi_state_owner() Too many gotos already and an upcoming fix would make it even more unreadable. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org --- kernel/futex.c | 53 +++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index a0fe63c342487..7a38ead96cae7 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2329,18 +2329,13 @@ static void unqueue_me_pi(struct futex_q *q) spin_unlock(q->lock_ptr); } -static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, - struct task_struct *argowner) +static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, + struct task_struct *argowner) { struct futex_pi_state *pi_state = q->pi_state; - u32 uval, curval, newval; struct task_struct *oldowner, *newowner; - u32 newtid; - int ret, err = 0; - - lockdep_assert_held(q->lock_ptr); - - raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + u32 uval, curval, newval, newtid; + int err = 0; oldowner = pi_state->owner; @@ -2374,14 +2369,12 @@ retry: * We raced against a concurrent self; things are * already fixed up. Nothing to do. */ - ret = 0; - goto out_unlock; + return 0; } if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) { /* We got the lock. pi_state is correct. Tell caller. */ - ret = 1; - goto out_unlock; + return 1; } /* @@ -2408,8 +2401,7 @@ retry: * We raced against a concurrent self; things are * already fixed up. Nothing to do. */ - ret = 1; - goto out_unlock; + return 1; } newowner = argowner; } @@ -2440,7 +2432,6 @@ retry: * itself. */ pi_state_update_owner(pi_state, newowner); - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); return argowner == current; @@ -2463,17 +2454,16 @@ handle_err: switch (err) { case -EFAULT: - ret = fault_in_user_writeable(uaddr); + err = fault_in_user_writeable(uaddr); break; case -EAGAIN: cond_resched(); - ret = 0; + err = 0; break; default: WARN_ON_ONCE(1); - ret = err; break; } @@ -2483,17 +2473,26 @@ handle_err: /* * Check if someone else fixed it for us: */ - if (pi_state->owner != oldowner) { - ret = argowner == current; - goto out_unlock; - } + if (pi_state->owner != oldowner) + return argowner == current; - if (ret) - goto out_unlock; + /* Retry if err was -EAGAIN or the fault in succeeded */ + if (!err) + goto retry; - goto retry; + return err; +} -out_unlock: +static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, + struct task_struct *argowner) +{ + struct futex_pi_state *pi_state = q->pi_state; + int ret; + + lockdep_assert_held(q->lock_ptr); + + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + ret = __fixup_pi_state_owner(uaddr, q, argowner); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); return ret; } -- GitLab From 34b1a1ce1458f50ef27c54e28eb9b1947012907a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 18 Jan 2021 19:01:21 +0100 Subject: [PATCH 1492/2012] futex: Handle faults correctly for PI futexes fixup_pi_state_owner() tries to ensure that the state of the rtmutex, pi_state and the user space value related to the PI futex are consistent before returning to user space. In case that the user space value update faults and the fault cannot be resolved by faulting the page in via fault_in_user_writeable() the function returns with -EFAULT and leaves the rtmutex and pi_state owner state inconsistent. A subsequent futex_unlock_pi() operates on the inconsistent pi_state and releases the rtmutex despite not owning it which can corrupt the RB tree of the rtmutex and cause a subsequent kernel stack use after free. It was suggested to loop forever in fixup_pi_state_owner() if the fault cannot be resolved, but that results in runaway tasks which is especially undesired when the problem happens due to a programming error and not due to malice. As the user space value cannot be fixed up, the proper solution is to make the rtmutex and the pi_state consistent so both have the same owner. This leaves the user space value out of sync. Any subsequent operation on the futex will fail because the 10th rule of PI futexes (pi_state owner and user space value are consistent) has been violated. As a consequence this removes the inept attempts of 'fixing' the situation in case that the current task owns the rtmutex when returning with an unresolvable fault by unlocking the rtmutex which left pi_state::owner and rtmutex::owner out of sync in a different and only slightly less dangerous way. Fixes: 1b7558e457ed ("futexes: fix fault handling in futex_lock_pi") Reported-by: gzobqq@gmail.com Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org --- kernel/futex.c | 57 ++++++++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 37 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 7a38ead96cae7..45a13eb8894e5 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -958,7 +958,8 @@ static inline void exit_pi_state_list(struct task_struct *curr) { } * FUTEX_OWNER_DIED bit. See [4] * * [10] There is no transient state which leaves owner and user space - * TID out of sync. + * TID out of sync. Except one error case where the kernel is denied + * write access to the user address, see fixup_pi_state_owner(). * * * Serialization and lifetime rules: @@ -2480,6 +2481,24 @@ handle_err: if (!err) goto retry; + /* + * fault_in_user_writeable() failed so user state is immutable. At + * best we can make the kernel state consistent but user state will + * be most likely hosed and any subsequent unlock operation will be + * rejected due to PI futex rule [10]. + * + * Ensure that the rtmutex owner is also the pi_state owner despite + * the user space value claiming something different. There is no + * point in unlocking the rtmutex if current is the owner as it + * would need to wait until the next waiter has taken the rtmutex + * to guarantee consistent state. Keep it simple. Userspace asked + * for this wreckaged state. + * + * The rtmutex has an owner - either current or some other + * task. See the EAGAIN loop above. + */ + pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex)); + return err; } @@ -2756,7 +2775,6 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to; - struct futex_pi_state *pi_state = NULL; struct task_struct *exiting = NULL; struct rt_mutex_waiter rt_waiter; struct futex_hash_bucket *hb; @@ -2892,23 +2910,8 @@ no_block: if (res) ret = (res < 0) ? res : 0; - /* - * If fixup_owner() faulted and was unable to handle the fault, unlock - * it and return the fault to userspace. - */ - if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) { - pi_state = q.pi_state; - get_pi_state(pi_state); - } - /* Unqueue and drop the lock */ unqueue_me_pi(&q); - - if (pi_state) { - rt_mutex_futex_unlock(&pi_state->pi_mutex); - put_pi_state(pi_state); - } - goto out; out_unlock_put_key: @@ -3168,7 +3171,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32 __user *uaddr2) { struct hrtimer_sleeper timeout, *to; - struct futex_pi_state *pi_state = NULL; struct rt_mutex_waiter rt_waiter; struct futex_hash_bucket *hb; union futex_key key2 = FUTEX_KEY_INIT; @@ -3246,10 +3248,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); - if (ret < 0 && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { - pi_state = q.pi_state; - get_pi_state(pi_state); - } /* * Drop the reference to the pi state which * the requeue_pi() code acquired for us. @@ -3291,25 +3289,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (res) ret = (res < 0) ? res : 0; - /* - * If fixup_pi_state_owner() faulted and was unable to handle - * the fault, unlock the rt_mutex and return the fault to - * userspace. - */ - if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { - pi_state = q.pi_state; - get_pi_state(pi_state); - } - /* Unqueue and drop the lock. */ unqueue_me_pi(&q); } - if (pi_state) { - rt_mutex_futex_unlock(&pi_state->pi_mutex); - put_pi_state(pi_state); - } - if (ret == -EINTR) { /* * We've already been requeued, but cannot restart by calling -- GitLab From a1bb3cd58913338e1b627ea6b8c03c2ae82d293f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 26 Jan 2021 15:28:26 +0000 Subject: [PATCH 1493/2012] io_uring: fix __io_uring_files_cancel() with TASK_UNINTERRUPTIBLE If the tctx inflight number haven't changed because of cancellation, __io_uring_task_cancel() will continue leaving the task in TASK_UNINTERRUPTIBLE state, that's not expected by __io_uring_files_cancel(). Ensure we always call finish_wait() before retrying. Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 2166c469789d5..09aada153a71b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9124,16 +9124,15 @@ void __io_uring_task_cancel(void) prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE); /* - * If we've seen completions, retry. This avoids a race where - * a completion comes in before we did prepare_to_wait(). + * If we've seen completions, retry without waiting. This + * avoids a race where a completion comes in before we did + * prepare_to_wait(). */ - if (inflight != tctx_inflight(tctx)) - continue; - schedule(); + if (inflight == tctx_inflight(tctx)) + schedule(); finish_wait(&tctx->wait, &wait); } while (1); - finish_wait(&tctx->wait, &wait); atomic_dec(&tctx->in_idle); io_uring_remove_task_files(tctx); -- GitLab From ca70f00bed6cb255b7a9b91aa18a2717c9217f70 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 26 Jan 2021 15:28:27 +0000 Subject: [PATCH 1494/2012] io_uring: fix cancellation taking mutex while TASK_UNINTERRUPTIBLE do not call blocking ops when !TASK_RUNNING; state=2 set at [<00000000ced9dbfc>] prepare_to_wait+0x1f4/0x3b0 kernel/sched/wait.c:262 WARNING: CPU: 1 PID: 19888 at kernel/sched/core.c:7853 __might_sleep+0xed/0x100 kernel/sched/core.c:7848 RIP: 0010:__might_sleep+0xed/0x100 kernel/sched/core.c:7848 Call Trace: __mutex_lock_common+0xc4/0x2ef0 kernel/locking/mutex.c:935 __mutex_lock kernel/locking/mutex.c:1103 [inline] mutex_lock_nested+0x1a/0x20 kernel/locking/mutex.c:1118 io_wq_submit_work+0x39a/0x720 fs/io_uring.c:6411 io_run_cancel fs/io-wq.c:856 [inline] io_wqe_cancel_pending_work fs/io-wq.c:990 [inline] io_wq_cancel_cb+0x614/0xcb0 fs/io-wq.c:1027 io_uring_cancel_files fs/io_uring.c:8874 [inline] io_uring_cancel_task_requests fs/io_uring.c:8952 [inline] __io_uring_files_cancel+0x115d/0x19e0 fs/io_uring.c:9038 io_uring_files_cancel include/linux/io_uring.h:51 [inline] do_exit+0x2e6/0x2490 kernel/exit.c:780 do_group_exit+0x168/0x2d0 kernel/exit.c:922 get_signal+0x16b5/0x2030 kernel/signal.c:2770 arch_do_signal_or_restart+0x8e/0x6a0 arch/x86/kernel/signal.c:811 handle_signal_work kernel/entry/common.c:147 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0xac/0x1e0 kernel/entry/common.c:201 __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline] syscall_exit_to_user_mode+0x48/0x190 kernel/entry/common.c:302 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Rewrite io_uring_cancel_files() to mimic __io_uring_task_cancel()'s counting scheme, so it does all the heavy work before setting TASK_UNINTERRUPTIBLE. Cc: stable@vger.kernel.org # 5.9+ Reported-by: syzbot+f655445043a26a7cfab8@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov [axboe: fix inverted task check] Signed-off-by: Jens Axboe --- fs/io_uring.c | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 09aada153a71b..bb0270eeb8cb6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8873,30 +8873,31 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, } } +static int io_uring_count_inflight(struct io_ring_ctx *ctx, + struct task_struct *task, + struct files_struct *files) +{ + struct io_kiocb *req; + int cnt = 0; + + spin_lock_irq(&ctx->inflight_lock); + list_for_each_entry(req, &ctx->inflight_list, inflight_entry) + cnt += io_match_task(req, task, files); + spin_unlock_irq(&ctx->inflight_lock); + return cnt; +} + static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct task_struct *task, struct files_struct *files) { while (!list_empty_careful(&ctx->inflight_list)) { struct io_task_cancel cancel = { .task = task, .files = files }; - struct io_kiocb *req; DEFINE_WAIT(wait); - bool found = false; - - spin_lock_irq(&ctx->inflight_lock); - list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { - if (!io_match_task(req, task, files)) - continue; - found = true; - break; - } - if (found) - prepare_to_wait(&task->io_uring->wait, &wait, - TASK_UNINTERRUPTIBLE); - spin_unlock_irq(&ctx->inflight_lock); + int inflight; - /* We need to keep going until we don't find a matching req */ - if (!found) + inflight = io_uring_count_inflight(ctx, task, files); + if (!inflight) break; io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); @@ -8905,7 +8906,11 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, io_cqring_overflow_flush(ctx, true, task, files); /* cancellations _may_ trigger task work */ io_run_task_work(); - schedule(); + + prepare_to_wait(&task->io_uring->wait, &wait, + TASK_UNINTERRUPTIBLE); + if (inflight == io_uring_count_inflight(ctx, task, files)) + schedule(); finish_wait(&task->io_uring->wait, &wait); } } -- GitLab From 78031381ae9c88f4f914d66154f4745122149c58 Mon Sep 17 00:00:00 2001 From: Mikko Ylinen Date: Mon, 25 Jan 2021 08:39:36 +0200 Subject: [PATCH 1495/2012] bpf: Drop disabled LSM hooks from the sleepable set Some networking and keys LSM hooks are conditionally enabled and when building the new sleepable BPF LSM hooks with those LSM hooks disabled, the following build error occurs: BTFIDS vmlinux FAILED unresolved symbol bpf_lsm_socket_socketpair To fix the error, conditionally add the relevant networking/keys LSM hooks to the sleepable set. Fixes: 423f16108c9d8 ("bpf: Augment the set of sleepable LSM hooks") Signed-off-by: Mikko Ylinen Signed-off-by: Daniel Borkmann Acked-by: KP Singh Link: https://lore.kernel.org/bpf/20210125063936.89365-1-mikko.ylinen@linux.intel.com --- kernel/bpf/bpf_lsm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 70e5e0b6d69d0..1622a44d1617e 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -149,7 +149,11 @@ BTF_ID(func, bpf_lsm_file_ioctl) BTF_ID(func, bpf_lsm_file_lock) BTF_ID(func, bpf_lsm_file_open) BTF_ID(func, bpf_lsm_file_receive) + +#ifdef CONFIG_SECURITY_NETWORK BTF_ID(func, bpf_lsm_inet_conn_established) +#endif /* CONFIG_SECURITY_NETWORK */ + BTF_ID(func, bpf_lsm_inode_create) BTF_ID(func, bpf_lsm_inode_free_security) BTF_ID(func, bpf_lsm_inode_getattr) @@ -166,7 +170,11 @@ BTF_ID(func, bpf_lsm_inode_symlink) BTF_ID(func, bpf_lsm_inode_unlink) BTF_ID(func, bpf_lsm_kernel_module_request) BTF_ID(func, bpf_lsm_kernfs_init_security) + +#ifdef CONFIG_KEYS BTF_ID(func, bpf_lsm_key_free) +#endif /* CONFIG_KEYS */ + BTF_ID(func, bpf_lsm_mmap_file) BTF_ID(func, bpf_lsm_netlink_send) BTF_ID(func, bpf_lsm_path_notify) @@ -181,6 +189,8 @@ BTF_ID(func, bpf_lsm_sb_show_options) BTF_ID(func, bpf_lsm_sb_statfs) BTF_ID(func, bpf_lsm_sb_umount) BTF_ID(func, bpf_lsm_settime) + +#ifdef CONFIG_SECURITY_NETWORK BTF_ID(func, bpf_lsm_socket_accept) BTF_ID(func, bpf_lsm_socket_bind) BTF_ID(func, bpf_lsm_socket_connect) @@ -195,6 +205,8 @@ BTF_ID(func, bpf_lsm_socket_recvmsg) BTF_ID(func, bpf_lsm_socket_sendmsg) BTF_ID(func, bpf_lsm_socket_shutdown) BTF_ID(func, bpf_lsm_socket_socketpair) +#endif /* CONFIG_SECURITY_NETWORK */ + BTF_ID(func, bpf_lsm_syslog) BTF_ID(func, bpf_lsm_task_alloc) BTF_ID(func, bpf_lsm_task_getsecid) -- GitLab From 4961167bf7482944ca09a6f71263b9e47f949851 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 26 Jan 2021 17:56:03 +0100 Subject: [PATCH 1496/2012] ALSA: hda/via: Apply the workaround generically for Clevo machines We've got another report indicating a similar problem wrt the power-saving behavior with VIA codec on Clevo machines. Let's apply the existing workaround generically to all Clevo devices with VIA codecs to cover all in once. BugLink: https://bugzilla.opensuse.org/show_bug.cgi?id=1181330 Cc: Link: https://lore.kernel.org/r/20210126165603.11683-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_via.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index 834367dd54e1b..a5c1a2c4eae4e 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -1043,7 +1043,7 @@ static const struct hda_fixup via_fixups[] = { static const struct snd_pci_quirk vt2002p_fixups[] = { SND_PCI_QUIRK(0x1043, 0x1487, "Asus G75", VIA_FIXUP_ASUS_G75), SND_PCI_QUIRK(0x1043, 0x8532, "Asus X202E", VIA_FIXUP_INTMIC_BOOST), - SND_PCI_QUIRK(0x1558, 0x3501, "Clevo W35xSS_370SS", VIA_FIXUP_POWER_SAVE), + SND_PCI_QUIRK_VENDOR(0x1558, "Clevo", VIA_FIXUP_POWER_SAVE), {} }; -- GitLab From 519ea6f1c82fcdc9842908155ae379de47818778 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 26 Jan 2021 13:40:56 +0000 Subject: [PATCH 1497/2012] arm64: Fix kernel address detection of __is_lm_address() Currently, the __is_lm_address() check just masks out the top 12 bits of the address, but if they are 0, it still yields a true result. This has as a side effect that virt_addr_valid() returns true even for invalid virtual addresses (e.g. 0x0). Fix the detection checking that it's actually a kernel address starting at PAGE_OFFSET. Fixes: 68dd8ef32162 ("arm64: memory: Fix virt_addr_valid() using __is_lm_address()") Cc: # 5.4.x Cc: Will Deacon Suggested-by: Catalin Marinas Reviewed-by: Catalin Marinas Acked-by: Mark Rutland Signed-off-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210126134056.45747-1-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/memory.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 18fce223b67b2..99d7e1494aaa6 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -247,9 +247,11 @@ static inline const void *__tag_set(const void *addr, u8 tag) /* - * The linear kernel range starts at the bottom of the virtual address space. + * Check whether an arbitrary address is within the linear map, which + * lives in the [PAGE_OFFSET, PAGE_END) interval at the bottom of the + * kernel's TTBR1 address range. */ -#define __is_lm_address(addr) (((u64)(addr) & ~PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET)) +#define __is_lm_address(addr) (((u64)(addr) ^ PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET)) #define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET) #define __kimg_to_phys(addr) ((addr) - kimage_voffset) -- GitLab From a53e3c189cc6460b60e152af3fc24edf8e0ea9d2 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 18 Jan 2021 16:37:00 +0100 Subject: [PATCH 1498/2012] media: v4l2-subdev.h: BIT() is not available in userspace The BIT macro is not available in userspace, so replace BIT(0) by 0x00000001. Signed-off-by: Hans Verkuil Fixes: 6446ec6cbf46 ("media: v4l2-subdev: add VIDIOC_SUBDEV_QUERYCAP ioctl") Cc: Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-subdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h index 00850b98078a2..a38454d9e0f54 100644 --- a/include/uapi/linux/v4l2-subdev.h +++ b/include/uapi/linux/v4l2-subdev.h @@ -176,7 +176,7 @@ struct v4l2_subdev_capability { }; /* The v4l2 sub-device video device node is registered in read-only mode. */ -#define V4L2_SUBDEV_CAP_RO_SUBDEV BIT(0) +#define V4L2_SUBDEV_CAP_RO_SUBDEV 0x00000001 /* Backwards compatibility define --- to be removed */ #define v4l2_subdev_edid v4l2_edid -- GitLab From 73bc0b0c2a96b31199da0ce6c3d04be81ef73bb9 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Wed, 23 Dec 2020 12:06:59 +0100 Subject: [PATCH 1499/2012] media: cedrus: Fix H264 decoding During H264 API overhaul subtle bug was introduced Cedrus driver. Progressive references have both, top and bottom reference flags set. Cedrus reference list expects only bottom reference flag and only when interlaced frames are decoded. However, due to a bug in Cedrus check, exclusivity is not tested and that flag is set also for progressive references. That causes "jumpy" background with many videos. Fix that by checking that only bottom reference flag is set in control and nothing else. Tested-by: Andre Heider Fixes: cfc8c3ed533e ("media: cedrus: h264: Properly configure reference field") Signed-off-by: Jernej Skrabec Signed-off-by: Hans Verkuil Cc: Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/sunxi/cedrus/cedrus_h264.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c index 781c84a9b1b79..de7442d4834dc 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c @@ -203,7 +203,7 @@ static void _cedrus_write_ref_list(struct cedrus_ctx *ctx, position = cedrus_buf->codec.h264.position; sram_array[i] |= position << 1; - if (ref_list[i].fields & V4L2_H264_BOTTOM_FIELD_REF) + if (ref_list[i].fields == V4L2_H264_BOTTOM_FIELD_REF) sram_array[i] |= BIT(0); } -- GitLab From eaf18a4165141f04dd26f0c48a7e53438e5a3ea2 Mon Sep 17 00:00:00 2001 From: Yannick Fertre Date: Fri, 15 Jan 2021 15:31:44 +0100 Subject: [PATCH 1500/2012] media: cec: add stm32 driver Missing stm32 directory to Makefile. Signed-off-by: Yannick Fertre Signed-off-by: Hans Verkuil Fixes: 4be5e8648b0c ("media: move CEC platform drivers to a separate directory") Cc: Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/platform/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/cec/platform/Makefile b/drivers/media/cec/platform/Makefile index 3a947159b25ac..ea6f8ee8161c9 100644 --- a/drivers/media/cec/platform/Makefile +++ b/drivers/media/cec/platform/Makefile @@ -10,5 +10,6 @@ obj-$(CONFIG_CEC_MESON_AO) += meson/ obj-$(CONFIG_CEC_SAMSUNG_S5P) += s5p/ obj-$(CONFIG_CEC_SECO) += seco/ obj-$(CONFIG_CEC_STI) += sti/ +obj-$(CONFIG_CEC_STM32) += stm32/ obj-$(CONFIG_CEC_TEGRA) += tegra/ -- GitLab From e081863ab48d9b2eee9e899cbd05752a2a30308d Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Thu, 14 Jan 2021 14:03:16 +0100 Subject: [PATCH 1501/2012] media: hantro: Fix reset_raw_fmt initialization raw_fmt->height in never initialized. But width in initialized twice. Fixes: 88d06362d1d05 ("media: hantro: Refactor for V4L2 API spec compliancy") Signed-off-by: Ricardo Ribalda Signed-off-by: Hans Verkuil Cc: Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/hantro/hantro_v4l2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c index b668a82d40ad4..f5fbdbc4ffdb1 100644 --- a/drivers/staging/media/hantro/hantro_v4l2.c +++ b/drivers/staging/media/hantro/hantro_v4l2.c @@ -367,7 +367,7 @@ hantro_reset_raw_fmt(struct hantro_ctx *ctx) hantro_reset_fmt(raw_fmt, raw_vpu_fmt); raw_fmt->width = encoded_fmt->width; - raw_fmt->width = encoded_fmt->width; + raw_fmt->height = encoded_fmt->height; if (ctx->is_encoder) hantro_set_fmt_out(ctx, raw_fmt); else -- GitLab From 377bf660d07a47269510435d11f3b65d53edca20 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 26 Jan 2021 10:39:46 -0800 Subject: [PATCH 1502/2012] Revert "mm: fix initialization of struct page for holes in memory layout" This reverts commit d3921cb8be29ce5668c64e23ffdaeec5f8c69399. Chris Wilson reports that it causes boot problems: "We have half a dozen or so different machines in CI that are silently failing to boot, that we believe is bisected to this patch" and the CI team confirmed that a revert fixed the issues. The cause is unknown for now, so let's revert it. Link: https://lore.kernel.org/lkml/161160687463.28991.354987542182281928@build.alporthouse.com/ Reported-and-tested-by: Chris Wilson Acked-by: Mike Rapoport Cc: Andrea Arcangeli Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 84 ++++++++++++++++++++----------------------------- 1 file changed, 34 insertions(+), 50 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 783913e41f65d..519a60d5b6f7d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7080,26 +7080,23 @@ void __init free_area_init_memoryless_node(int nid) * Initialize all valid struct pages in the range [spfn, epfn) and mark them * PageReserved(). Return the number of struct pages that were initialized. */ -static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn, - int zone, int nid) +static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn) { - unsigned long pfn, zone_spfn, zone_epfn; + unsigned long pfn; u64 pgcnt = 0; - zone_spfn = arch_zone_lowest_possible_pfn[zone]; - zone_epfn = arch_zone_highest_possible_pfn[zone]; - - spfn = clamp(spfn, zone_spfn, zone_epfn); - epfn = clamp(epfn, zone_spfn, zone_epfn); - for (pfn = spfn; pfn < epfn; pfn++) { if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) { pfn = ALIGN_DOWN(pfn, pageblock_nr_pages) + pageblock_nr_pages - 1; continue; } - - __init_single_page(pfn_to_page(pfn), pfn, zone, nid); + /* + * Use a fake node/zone (0) for now. Some of these pages + * (in memblock.reserved but not in memblock.memory) will + * get re-initialized via reserve_bootmem_region() later. + */ + __init_single_page(pfn_to_page(pfn), pfn, 0, 0); __SetPageReserved(pfn_to_page(pfn)); pgcnt++; } @@ -7108,64 +7105,51 @@ static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn, } /* - * Only struct pages that correspond to ranges defined by memblock.memory - * are zeroed and initialized by going through __init_single_page() during - * memmap_init(). - * - * But, there could be struct pages that correspond to holes in - * memblock.memory. This can happen because of the following reasons: - * - phyiscal memory bank size is not necessarily the exact multiple of the - * arbitrary section size - * - early reserved memory may not be listed in memblock.memory - * - memory layouts defined with memmap= kernel parameter may not align - * nicely with memmap sections + * Only struct pages that are backed by physical memory are zeroed and + * initialized by going through __init_single_page(). But, there are some + * struct pages which are reserved in memblock allocator and their fields + * may be accessed (for example page_to_pfn() on some configuration accesses + * flags). We must explicitly initialize those struct pages. * - * Explicitly initialize those struct pages so that: - * - PG_Reserved is set - * - zone link is set accorging to the architecture constrains - * - node is set to node id of the next populated region except for the - * trailing hole where last node id is used + * This function also addresses a similar issue where struct pages are left + * uninitialized because the physical address range is not covered by + * memblock.memory or memblock.reserved. That could happen when memblock + * layout is manually configured via memmap=, or when the highest physical + * address (max_pfn) does not end on a section boundary. */ -static void __init init_zone_unavailable_mem(int zone) +static void __init init_unavailable_mem(void) { - unsigned long start, end; - int i, nid; - u64 pgcnt; - unsigned long next = 0; + phys_addr_t start, end; + u64 i, pgcnt; + phys_addr_t next = 0; /* - * Loop through holes in memblock.memory and initialize struct - * pages corresponding to these holes + * Loop through unavailable ranges not covered by memblock.memory. */ pgcnt = 0; - for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) { + for_each_mem_range(i, &start, &end) { if (next < start) - pgcnt += init_unavailable_range(next, start, zone, nid); + pgcnt += init_unavailable_range(PFN_DOWN(next), + PFN_UP(start)); next = end; } /* - * Last section may surpass the actual end of memory (e.g. we can - * have 1Gb section and 512Mb of RAM pouplated). - * Make sure that memmap has a well defined state in this case. + * Early sections always have a fully populated memmap for the whole + * section - see pfn_valid(). If the last section has holes at the + * end and that section is marked "online", the memmap will be + * considered initialized. Make sure that memmap has a well defined + * state. */ - end = round_up(max_pfn, PAGES_PER_SECTION); - pgcnt += init_unavailable_range(next, end, zone, nid); + pgcnt += init_unavailable_range(PFN_DOWN(next), + round_up(max_pfn, PAGES_PER_SECTION)); /* * Struct pages that do not have backing memory. This could be because * firmware is using some of this memory, or for some other reasons. */ if (pgcnt) - pr_info("Zone %s: zeroed struct page in unavailable ranges: %lld pages", zone_names[zone], pgcnt); -} - -static void __init init_unavailable_mem(void) -{ - int zone; - - for (zone = 0; zone < ZONE_MOVABLE; zone++) - init_zone_unavailable_mem(zone); + pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt); } #else static inline void __init init_unavailable_mem(void) -- GitLab From 29e2d9eb82647654abff150ff02fa1e07362214f Mon Sep 17 00:00:00 2001 From: Henry Tieman Date: Fri, 20 Nov 2020 16:38:30 -0800 Subject: [PATCH 1503/2012] ice: fix FDir IPv6 flexbyte The packet classifier would occasionally misrecognize an IPv6 training packet when the next protocol field was 0. The correct value for unspecified protocol is IPPROTO_NONE. Fixes: 165d80d6adab ("ice: Support IPv6 Flow Director filters") Signed-off-by: Henry Tieman Reviewed-by: Paul Menzel Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 2d27f66ac8534..192729546bbfc 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1576,7 +1576,13 @@ ice_set_fdir_input_set(struct ice_vsi *vsi, struct ethtool_rx_flow_spec *fsp, sizeof(struct in6_addr)); input->ip.v6.l4_header = fsp->h_u.usr_ip6_spec.l4_4_bytes; input->ip.v6.tc = fsp->h_u.usr_ip6_spec.tclass; - input->ip.v6.proto = fsp->h_u.usr_ip6_spec.l4_proto; + + /* if no protocol requested, use IPPROTO_NONE */ + if (!fsp->m_u.usr_ip6_spec.l4_proto) + input->ip.v6.proto = IPPROTO_NONE; + else + input->ip.v6.proto = fsp->h_u.usr_ip6_spec.l4_proto; + memcpy(input->mask.v6.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst, sizeof(struct in6_addr)); memcpy(input->mask.v6.src_ip, fsp->m_u.usr_ip6_spec.ip6src, -- GitLab From 1b0b0b581b945ee27beb70e8199270a22dd5a2f6 Mon Sep 17 00:00:00 2001 From: Nick Nunley Date: Fri, 20 Nov 2020 16:38:31 -0800 Subject: [PATCH 1504/2012] ice: Implement flow for IPv6 next header (extension header) This patch is based on a similar change to i40e by Slawomir Laba: "i40e: Implement flow for IPv6 next header (extension header)". When a packet contains an IPv6 header with next header which is an extension header and not a protocol one, the kernel function skb_transport_header called with such sk_buff will return a pointer to the extension header and not to the TCP one. The above explained call caused a problem with packet processing for skb with encapsulation for tunnel with ICE_TX_CTX_EIPT_IPV6. The extension header was not skipped at all. The ipv6_skip_exthdr function does check if next header of the IPV6 header is an extension header and doesn't modify the l4_proto pointer if it points to a protocol header value so its safe to omit the comparison of exthdr and l4.hdr pointers. The ipv6_skip_exthdr can return value -1. This means that the skipping process failed and there is something wrong with the packet so it will be dropped. Fixes: a4e82a81f573 ("ice: Add support for tunnel offloads") Signed-off-by: Nick Nunley Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_txrx.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index a2d0aad8cfdd7..b6fa83c619dd7 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1923,12 +1923,15 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) ICE_TX_CTX_EIPT_IPV4_NO_CSUM; l4_proto = ip.v4->protocol; } else if (first->tx_flags & ICE_TX_FLAGS_IPV6) { + int ret; + tunnel |= ICE_TX_CTX_EIPT_IPV6; exthdr = ip.hdr + sizeof(*ip.v6); l4_proto = ip.v6->nexthdr; - if (l4.hdr != exthdr) - ipv6_skip_exthdr(skb, exthdr - skb->data, - &l4_proto, &frag_off); + ret = ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); + if (ret < 0) + return -1; } /* define outer transport */ -- GitLab From 13ed5e8a9b9ccd140a79e80283f69d724c9bb2be Mon Sep 17 00:00:00 2001 From: Nick Nunley Date: Fri, 20 Nov 2020 16:38:33 -0800 Subject: [PATCH 1505/2012] ice: update dev_addr in ice_set_mac_address even if HW filter exists Fix the driver to copy the MAC address configured in ndo_set_mac_address into dev_addr, even if the MAC filter already exists in HW. In some situations (e.g. bonding) the netdev's dev_addr could have been modified outside of the driver, with no change to the HW filter, so the driver cannot assume that they match. Fixes: 757976ab16be ("ice: Fix check for removing/adding mac filters") Signed-off-by: Nick Nunley Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c52b9bb0e3ab1..fb81aa5979e3e 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4884,9 +4884,15 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) goto err_update_filters; } - /* Add filter for new MAC. If filter exists, just return success */ + /* Add filter for new MAC. If filter exists, return success */ status = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI); if (status == ICE_ERR_ALREADY_EXISTS) { + /* Although this MAC filter is already present in hardware it's + * possible in some cases (e.g. bonding) that dev_addr was + * modified outside of the driver and needs to be restored back + * to this value. + */ + memcpy(netdev->dev_addr, mac, netdev->addr_len); netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac); return 0; } -- GitLab From 943b881e35829403da638fcb34a959125deafef3 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 21 Jan 2021 10:38:05 -0800 Subject: [PATCH 1506/2012] ice: Don't allow more channels than LAN MSI-X available Currently users could create more channels than LAN MSI-X available. This is happening because there is no check against pf->num_lan_msix when checking the max allowed channels and will cause performance issues if multiple Tx and Rx queues are tied to a single MSI-X. Fix this by not allowing more channels than LAN MSI-X available in pf->num_lan_msix. Fixes: 87324e747fde ("ice: Implement ethtool ops for channels") Signed-off-by: Brett Creeley Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 9e8e9531cd871..69c113a4de7e6 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3258,8 +3258,8 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, */ static int ice_get_max_txq(struct ice_pf *pf) { - return min_t(int, num_online_cpus(), - pf->hw.func_caps.common_cap.num_txq); + return min3(pf->num_lan_msix, (u16)num_online_cpus(), + (u16)pf->hw.func_caps.common_cap.num_txq); } /** @@ -3268,8 +3268,8 @@ static int ice_get_max_txq(struct ice_pf *pf) */ static int ice_get_max_rxq(struct ice_pf *pf) { - return min_t(int, num_online_cpus(), - pf->hw.func_caps.common_cap.num_rxq); + return min3(pf->num_lan_msix, (u16)num_online_cpus(), + (u16)pf->hw.func_caps.common_cap.num_rxq); } /** -- GitLab From f3fe97f64384fa4073d9dc0278c4b351c92e295c Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 21 Jan 2021 10:38:06 -0800 Subject: [PATCH 1507/2012] ice: Fix MSI-X vector fallback logic The current MSI-X enablement logic tries to enable best-case MSI-X vectors and if that fails we only support a bare-minimum set. This includes a single MSI-X for 1 Tx and 1 Rx queue and a single MSI-X for the OICR interrupt. Unfortunately, the driver fails to load when we don't get as many MSI-X as requested for a couple reasons. First, the code to allocate MSI-X in the driver tries to allocate num_online_cpus() MSI-X for LAN traffic without caring about the number of MSI-X actually enabled/requested from the kernel for LAN traffic. So, when calling ice_get_res() for the PF VSI, it returns failure because the number of available vectors is less than requested. Fix this by not allowing the PF VSI to allocation more than pf->num_lan_msix MSI-X vectors and pf->num_lan_msix Rx/Tx queues. Limiting the number of queues is done because we don't want more than 1 Tx/Rx queue per interrupt due to performance conerns. Second, the driver assigns pf->num_lan_msix = 2, to account for LAN traffic and the OICR. However, pf->num_lan_msix is only meant for LAN MSI-X. This is causing a failure when the PF VSI tries to allocate/reserve the minimum pf->num_lan_msix because the OICR MSI-X has already been reserved, so there may not be enough MSI-X vectors left. Fix this by setting pf->num_lan_msix = 1 for the failure case. Then the ICE_MIN_MSIX accounts for the LAN MSI-X and the OICR MSI-X needed for the failure case. Update the related defines used in ice_ena_msix_range() to align with the above behavior and remove the unused RDMA defines because RDMA is currently not supported. Also, remove the now incorrect comment. Fixes: 152b978a1f90 ("ice: Rework ice_ena_msix_range") Signed-off-by: Brett Creeley Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 4 +++- drivers/net/ethernet/intel/ice/ice_lib.c | 14 +++++++++----- drivers/net/ethernet/intel/ice/ice_main.c | 8 ++------ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 56725356a17b8..fa1e128c24eca 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -68,7 +68,9 @@ #define ICE_INT_NAME_STR_LEN (IFNAMSIZ + 16) #define ICE_AQ_LEN 64 #define ICE_MBXSQ_LEN 64 -#define ICE_MIN_MSIX 2 +#define ICE_MIN_LAN_TXRX_MSIX 1 +#define ICE_MIN_LAN_OICR_MSIX 1 +#define ICE_MIN_MSIX (ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX) #define ICE_FDIR_MSIX 1 #define ICE_NO_VSI 0xffff #define ICE_VSI_MAP_CONTIG 0 diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 3df67486d42d9..ad9c22a1b97a0 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -161,8 +161,9 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) switch (vsi->type) { case ICE_VSI_PF: - vsi->alloc_txq = min_t(int, ice_get_avail_txq_count(pf), - num_online_cpus()); + vsi->alloc_txq = min3(pf->num_lan_msix, + ice_get_avail_txq_count(pf), + (u16)num_online_cpus()); if (vsi->req_txq) { vsi->alloc_txq = vsi->req_txq; vsi->num_txq = vsi->req_txq; @@ -174,8 +175,9 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { vsi->alloc_rxq = 1; } else { - vsi->alloc_rxq = min_t(int, ice_get_avail_rxq_count(pf), - num_online_cpus()); + vsi->alloc_rxq = min3(pf->num_lan_msix, + ice_get_avail_rxq_count(pf), + (u16)num_online_cpus()); if (vsi->req_rxq) { vsi->alloc_rxq = vsi->req_rxq; vsi->num_rxq = vsi->req_rxq; @@ -184,7 +186,9 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) pf->num_lan_rx = vsi->alloc_rxq; - vsi->num_q_vectors = max_t(int, vsi->alloc_rxq, vsi->alloc_txq); + vsi->num_q_vectors = min_t(int, pf->num_lan_msix, + max_t(int, vsi->alloc_rxq, + vsi->alloc_txq)); break; case ICE_VSI_VF: vf = &pf->vf[vsi->vf_id]; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index fb81aa5979e3e..e10ca8929f85e 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3430,18 +3430,14 @@ static int ice_ena_msix_range(struct ice_pf *pf) if (v_actual < v_budget) { dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", v_budget, v_actual); -/* 2 vectors each for LAN and RDMA (traffic + OICR), one for flow director */ -#define ICE_MIN_LAN_VECS 2 -#define ICE_MIN_RDMA_VECS 2 -#define ICE_MIN_VECS (ICE_MIN_LAN_VECS + ICE_MIN_RDMA_VECS + 1) - if (v_actual < ICE_MIN_LAN_VECS) { + if (v_actual < ICE_MIN_MSIX) { /* error if we can't get minimum vectors */ pci_disable_msix(pf->pdev); err = -ERANGE; goto msix_err; } else { - pf->num_lan_msix = ICE_MIN_LAN_VECS; + pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; } } -- GitLab From 67a3c6b3cc40bb217c3ff947a55053151a00fea0 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Mon, 30 Nov 2020 14:12:57 +0100 Subject: [PATCH 1508/2012] i40e: acquire VSI pointer only after VF is initialized This change simplifies the VF initialization check and also minimizes the delay between acquiring the VSI pointer and using it. As known by the commit being fixed, there is a risk of the VSI pointer getting changed. Therefore minimize the delay between getting and using the pointer. Fixes: 9889707b06ac ("i40e: Fix crash caused by stress setting of VF MAC addresses") Signed-off-by: Stefan Assmann Reviewed-by: Jacob Keller Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 21ee56420c3ae..7efc61aacb0a5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -4046,20 +4046,16 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) goto error_param; vf = &pf->vf[vf_id]; - vsi = pf->vsi[vf->lan_vsi_idx]; /* When the VF is resetting wait until it is done. * It can take up to 200 milliseconds, * but wait for up to 300 milliseconds to be safe. - * If the VF is indeed in reset, the vsi pointer has - * to show on the newly loaded vsi under pf->vsi[id]. + * Acquire the VSI pointer only after the VF has been + * properly initialized. */ for (i = 0; i < 15; i++) { - if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { - if (i > 0) - vsi = pf->vsi[vf->lan_vsi_idx]; + if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) break; - } msleep(20); } if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { @@ -4068,6 +4064,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) ret = -EAGAIN; goto error_param; } + vsi = pf->vsi[vf->lan_vsi_idx]; if (is_multicast_ether_addr(mac)) { dev_err(&pf->pdev->dev, -- GitLab From 0aa91f84b1804b59841c834128b2c15330a1ec59 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 14 Jan 2021 16:14:48 -0800 Subject: [PATCH 1509/2012] parisc: Remove leftover reference to the power_tasklet This was removed long ago, back in: 6e16d9409e1 ([PARISC] Convert soft power switch driver to kthread) Signed-off-by: Davidlohr Bueso Signed-off-by: Helge Deller --- arch/parisc/include/asm/irq.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/parisc/include/asm/irq.h b/arch/parisc/include/asm/irq.h index 959e79cd2c148..378f63c4015b4 100644 --- a/arch/parisc/include/asm/irq.h +++ b/arch/parisc/include/asm/irq.h @@ -47,7 +47,4 @@ extern unsigned long txn_affinity_addr(unsigned int irq, int cpu); extern int cpu_claim_irq(unsigned int irq, struct irq_chip *, void *); extern int cpu_check_affinity(struct irq_data *d, const struct cpumask *dest); -/* soft power switch support (power.c) */ -extern struct tasklet_struct power_tasklet; - #endif /* _ASM_PARISC_IRQ_H */ -- GitLab From 00e35f2b0e8acb88d4e1aa96ff0490e3bfe46580 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 26 Jan 2021 20:16:21 +0100 Subject: [PATCH 1510/2012] parisc: Enable -mlong-calls gcc option by default when !CONFIG_MODULES When building a kernel without module support, the CONFIG_MLONGCALL option needs to be enabled in order to reach symbols which are outside of a 22-bit branch. This patch changes the autodetection in the Kconfig script to always enable CONFIG_MLONGCALL when modules are disabled and uses a far call to preempt_schedule_irq() in intr_do_preempt() to reach the symbol in all cases. Signed-off-by: Helge Deller Reported-by: kernel test robot Cc: stable@vger.kernel.org # v5.6+ --- arch/parisc/Kconfig | 5 ++--- arch/parisc/kernel/entry.S | 13 ++++++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 78b17621ee4a5..278462186ac47 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -202,9 +202,8 @@ config PREFETCH depends on PA8X00 || PA7200 config MLONGCALLS - bool "Enable the -mlong-calls compiler option for big kernels" - default y if !MODULES || UBSAN || FTRACE - default n + def_bool y if !MODULES || UBSAN || FTRACE + bool "Enable the -mlong-calls compiler option for big kernels" if MODULES && !UBSAN && !FTRACE depends on PA8X00 help If you configure the kernel to include many drivers built-in instead diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index beba9816cc6c1..4d37cc9cba37c 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -997,10 +997,17 @@ intr_do_preempt: bb,<,n %r20, 31 - PSW_SM_I, intr_restore nop + /* ssm PSW_SM_I done later in intr_restore */ +#ifdef CONFIG_MLONGCALLS + ldil L%intr_restore, %r2 + load32 preempt_schedule_irq, %r1 + bv %r0(%r1) + ldo R%intr_restore(%r2), %r2 +#else + ldil L%intr_restore, %r1 BL preempt_schedule_irq, %r2 - nop - - b,n intr_restore /* ssm PSW_SM_I done by intr_restore */ + ldo R%intr_restore(%r1), %r2 +#endif #endif /* CONFIG_PREEMPTION */ /* -- GitLab From 2ab38c17aac10bf55ab3efde4c4db3893d8691d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Tue, 26 Jan 2021 01:04:38 +0100 Subject: [PATCH 1511/2012] mailmap: remove the "repo-abbrev" comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the magical "repo-abbrev" comment added when this file was introduced in e0ab1ec9fcd3 ([PATCH] add .mailmap for proper git-shortlog output, 2007-02-14). It's been an undocumented feature of git-shortlog(1), originally added to git for Linus's use. Since then he's no longer using it[1], and I've removed the feature in git.git's 4e168333a87 (shortlog: remove unused(?) "repo-abbrev" feature, 2021-01-12). It's on the "master" branch, but not yet in a release version. Let's also remove it from linux.git, both as a heads-up to any potential users of it in linux.git whose use would be broken sooner than later by git itself, and because it'll eventually be entirely redundant. 1. https://lore.kernel.org/git/CAHk-=wixHyBKZVUcxq+NCWMbkrX0xnppb7UCopRWw1+oExYpYw@mail.gmail.com/ Acked-by: Linus Torvalds Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Linus Torvalds --- .mailmap | 3 --- 1 file changed, 3 deletions(-) diff --git a/.mailmap b/.mailmap index b1ab0129c7d6b..cc4e91d3075e8 100644 --- a/.mailmap +++ b/.mailmap @@ -9,9 +9,6 @@ # # Please keep this list dictionary sorted. # -# This comment is parsed by git-shortlog: -# repo-abbrev: /pub/scm/linux/kernel/git/ -# Aaron Durbin Adam Oldham Adam Radford -- GitLab From 329a3678ec69962aa67c91397efbd46d36635f91 Mon Sep 17 00:00:00 2001 From: Corinna Vinschen Date: Tue, 17 Nov 2020 20:50:40 +0100 Subject: [PATCH 1512/2012] igc: fix link speed advertising Link speed advertising in igc has two problems: - When setting the advertisement via ethtool, the link speed is converted to the legacy 32 bit representation for the intel PHY code. This inadvertently drops ETHTOOL_LINK_MODE_2500baseT_Full_BIT (being beyond bit 31). As a result, any call to `ethtool -s ...' drops the 2500Mbit/s link speed from the PHY settings. Only reloading the driver alleviates that problem. Fix this by converting the ETHTOOL_LINK_MODE_2500baseT_Full_BIT to the Intel PHY ADVERTISE_2500_FULL bit explicitly. - Rather than checking the actual PHY setting, the .get_link_ksettings function always fills link_modes.advertising with all link speeds the device is capable of. Fix this by checking the PHY autoneg_advertised settings and report only the actually advertised speeds up to ethtool. Fixes: 8c5ad0dae93c ("igc: Add ethtool support") Signed-off-by: Corinna Vinschen Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 24 +++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 61d331ce38cdd..831f2f09de5fb 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1675,12 +1675,18 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, cmd->base.phy_address = hw->phy.addr; /* advertising link modes */ - ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Half); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 2500baseT_Full); + if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Half); + if (hw->phy.autoneg_advertised & ADVERTISE_10_FULL) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Full); + if (hw->phy.autoneg_advertised & ADVERTISE_100_HALF) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Half); + if (hw->phy.autoneg_advertised & ADVERTISE_100_FULL) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Full); + if (hw->phy.autoneg_advertised & ADVERTISE_1000_FULL) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full); + if (hw->phy.autoneg_advertised & ADVERTISE_2500_FULL) + ethtool_link_ksettings_add_link_mode(cmd, advertising, 2500baseT_Full); /* set autoneg settings */ if (hw->mac.autoneg == 1) { @@ -1792,6 +1798,12 @@ igc_ethtool_set_link_ksettings(struct net_device *netdev, ethtool_convert_link_mode_to_legacy_u32(&advertising, cmd->link_modes.advertising); + /* Converting to legacy u32 drops ETHTOOL_LINK_MODE_2500baseT_Full_BIT. + * We have to check this and convert it to ADVERTISE_2500_FULL + * (aka ETHTOOL_LINK_MODE_2500baseX_Full_BIT) explicitly. + */ + if (ethtool_link_ksettings_test_link_mode(cmd, advertising, 2500baseT_Full)) + advertising |= ADVERTISE_2500_FULL; if (cmd->base.autoneg == AUTONEG_ENABLE) { hw->mac.autoneg = 1; -- GitLab From 150a27328b681425c8cab239894a48f2aeb870e9 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Tue, 26 Jan 2021 16:13:20 +0000 Subject: [PATCH 1513/2012] bpf, preload: Fix build when $(O) points to a relative path Building the kernel with CONFIG_BPF_PRELOAD, and by providing a relative path for the output directory, may fail with the following error: $ make O=build bindeb-pkg ... /.../linux/tools/scripts/Makefile.include:5: *** O=build does not exist. Stop. make[7]: *** [/.../linux/kernel/bpf/preload/Makefile:9: kernel/bpf/preload/libbpf.a] Error 2 make[6]: *** [/.../linux/scripts/Makefile.build:500: kernel/bpf/preload] Error 2 make[5]: *** [/.../linux/scripts/Makefile.build:500: kernel/bpf] Error 2 make[4]: *** [/.../linux/Makefile:1799: kernel] Error 2 make[4]: *** Waiting for unfinished jobs.... In the case above, for the "bindeb-pkg" target, the error is produced by the "dummy" check in Makefile.include, called from libbpf's Makefile. This check changes directory to $(PWD) before checking for the existence of $(O). But at this step we have $(PWD) pointing to "/.../linux/build", and $(O) pointing to "build". So the Makefile.include tries in fact to assert the existence of a directory named "/.../linux/build/build", which does not exist. Note that the error does not occur for all make targets and architectures combinations. This was observed on x86 for "bindeb-pkg", or for a regular build for UML [0]. Here are some details. The root Makefile recursively calls itself once, after changing directory to $(O). The content for the variable $(PWD) is preserved across recursive calls to make, so it is unchanged at this step. For "bindeb-pkg", $(PWD) is eventually updated because the target writes a new Makefile (as debian/rules) and calls it indirectly through dpkg-buildpackage. This script does not preserve $(PWD), which is reset to the current working directory when the target in debian/rules is called. Although not investigated, it seems likely that something similar causes UML to change its value for $(PWD). Non-trivial fixes could be to remove the use of $(PWD) from the "dummy" check, or to make sure that $(PWD) and $(O) are preserved or updated to always play well and form a valid $(PWD)/$(O) path across the different targets and architectures. Instead, we take a simpler approach and just update $(O) when calling libbpf's Makefile, so it points to an absolute path which should always resolve for the "dummy" check run (through includes) by that Makefile. David Gow previously posted a slightly different version of this patch as a RFC [0], two months ago or so. [0] https://lore.kernel.org/bpf/20201119085022.3606135-1-davidgow@google.com/t/#u Fixes: d71fa5c9763c ("bpf: Add kernel module with user mode driver that populates bpffs.") Reported-by: David Gow Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Cc: Brendan Higgins Cc: Masahiro Yamada Link: https://lore.kernel.org/bpf/20210126161320.24561-1-quentin@isovalent.com --- kernel/bpf/preload/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile index 23ee310b6eb49..1951332dd15f5 100644 --- a/kernel/bpf/preload/Makefile +++ b/kernel/bpf/preload/Makefile @@ -4,8 +4,11 @@ LIBBPF_SRCS = $(srctree)/tools/lib/bpf/ LIBBPF_A = $(obj)/libbpf.a LIBBPF_OUT = $(abspath $(obj)) +# Although not in use by libbpf's Makefile, set $(O) so that the "dummy" test +# in tools/scripts/Makefile.include always succeeds when building the kernel +# with $(O) pointing to a relative path, as in "make O=build bindeb-pkg". $(LIBBPF_A): - $(Q)$(MAKE) -C $(LIBBPF_SRCS) OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a + $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \ -I $(srctree)/tools/lib/ -Wno-unused-result -- GitLab From 81a86e1bd8e7060ebba1718b284d54f1238e9bf9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Jan 2021 07:09:49 -0800 Subject: [PATCH 1514/2012] iwlwifi: provide gso_type to GSO packets net/core/tso.c got recent support for USO, and this broke iwlfifi because the driver implemented a limited form of GSO. Providing ->gso_type allows for skb_is_gso_tcp() to provide a correct result. Fixes: 3d5b459ba0e3 ("net: tso: add UDP segmentation support") Signed-off-by: Eric Dumazet Reported-by: Ben Greear Tested-by: Ben Greear Cc: Luca Coelho Cc: Johannes Berg Link: https://bugzilla.kernel.org/show_bug.cgi?id=209913 Link: https://lore.kernel.org/r/20210125150949.619309-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index a983c215df310..3712adc3ccc25 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -773,6 +773,7 @@ iwl_mvm_tx_tso_segment(struct sk_buff *skb, unsigned int num_subframes, next = skb_gso_segment(skb, netdev_flags); skb_shinfo(skb)->gso_size = mss; + skb_shinfo(skb)->gso_type = ipv4 ? SKB_GSO_TCPV4 : SKB_GSO_TCPV6; if (WARN_ON_ONCE(IS_ERR(next))) return -EINVAL; else if (next) @@ -795,6 +796,8 @@ iwl_mvm_tx_tso_segment(struct sk_buff *skb, unsigned int num_subframes, if (tcp_payload_len > mss) { skb_shinfo(tmp)->gso_size = mss; + skb_shinfo(tmp)->gso_type = ipv4 ? SKB_GSO_TCPV4 : + SKB_GSO_TCPV6; } else { if (qos) { u8 *qc; -- GitLab From 487c6ef81eb98d0a43cb08be91b1fcc9b4250626 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 12 Jan 2021 14:04:29 +0200 Subject: [PATCH 1515/2012] net/mlx5: Fix memory leak on flow table creation error flow When we create the ft object we also init rhltable in ft->fgs_hash. So in error flow before kfree of ft we need to destroy that rhltable. Fixes: 693c6883bbc4 ("net/mlx5: Add hash table for flow groups in flow table") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index b899539a07860..0fcee702b808b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1141,6 +1141,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa destroy_ft: root->cmds->destroy_flow_table(root, ft); free_ft: + rhltable_destroy(&ft->fgs_hash); kfree(ft); unlock_root: mutex_unlock(&root->chain_lock); -- GitLab From 1fe3e3166b35240615ab7f8276af2bbf2e51f559 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 12 Jan 2021 16:13:22 +0200 Subject: [PATCH 1516/2012] net/mlx5e: E-switch, Fix rate calculation for overflow rate_bytes_ps is a 64-bit field. It passed as 32-bit field to apply_police_params(). Due to this when police rate is higher than 4Gbps, 32-bit calculation ignores the carry. This results in incorrect rate configurationn the device. Fix it by performing 64-bit calculation. Fixes: fcb64c0f5640 ("net/mlx5: E-Switch, add ingress rate support") Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 4cdf834fa74af..661235027b476 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -67,6 +67,7 @@ #include "lib/geneve.h" #include "lib/fs_chains.h" #include "diag/en_tc_tracepoint.h" +#include #define nic_chains(priv) ((priv)->fs.tc.chains) #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) @@ -5007,13 +5008,13 @@ errout: return err; } -static int apply_police_params(struct mlx5e_priv *priv, u32 rate, +static int apply_police_params(struct mlx5e_priv *priv, u64 rate, struct netlink_ext_ack *extack) { struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch *esw; + u32 rate_mbps = 0; u16 vport_num; - u32 rate_mbps; int err; vport_num = rpriv->rep->vport; @@ -5030,7 +5031,11 @@ static int apply_police_params(struct mlx5e_priv *priv, u32 rate, * Moreover, if rate is non zero we choose to configure to a minimum of * 1 mbit/sec. */ - rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0; + if (rate) { + rate = (rate * BITS_PER_BYTE) + 500000; + rate_mbps = max_t(u32, do_div(rate, 1000000), 1); + } + err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); if (err) NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); -- GitLab From 258ed19f075fbc834fe5d69d8b54983fc11e0d4a Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Wed, 20 Jan 2021 20:58:30 -0800 Subject: [PATCH 1517/2012] net/mlx5e: free page before return Instead of directly return, goto the error handling label to free allocated page. Fixes: 5f29458b77d5 ("net/mlx5e: Support dump callback in TX reporter") Signed-off-by: Pan Bian Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/health.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 718f8c0a4f6b7..84e501e057b4f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -273,7 +273,7 @@ int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key err = devlink_fmsg_binary_pair_nest_start(fmsg, "data"); if (err) - return err; + goto free_page; cmd = mlx5_rsc_dump_cmd_create(mdev, key); if (IS_ERR(cmd)) { -- GitLab From 48470a90a42a64dd2f70743a149894a292b356e0 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Tue, 19 Jan 2021 17:21:38 +0200 Subject: [PATCH 1518/2012] net/mlx5e: Reduce tc unsupported key print level "Unsupported key used:" appears in kernel log when flows with unsupported key are used, arp fields for example. OpenVSwitch was changed to match on arp fields by default that caused this warning to appear in kernel log for every arp rule, which can be a lot. Fix by lowering print level from warning to debug. Fixes: e3a2b7ed018e ("net/mlx5e: Support offload cls_flower with drop action") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Reviewed-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 661235027b476..f4ce5e208e02b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2270,8 +2270,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | BIT(FLOW_DISSECTOR_KEY_MPLS))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); - netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", - dissector->used_keys); + netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n", + dissector->used_keys); return -EOPNOTSUPP; } -- GitLab From 45c9a30835d84009dfe711f5c8836720767c286e Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 25 Nov 2020 13:52:36 +0200 Subject: [PATCH 1519/2012] net/mlx5e: Fix IPSEC stats When IPSEC offload isn't active, the number of stats is not zero, but the strings are not filled, leading to exposing stats with empty names. Fix this by using the same condition for NUM_STATS and FILL_STRS. Fixes: 0aab3e1b04ae ("net/mlx5e: IPSec, Expose IPsec HW stat only for supporting HW") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Raed Salem Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c index 6c5c54bcd9be0..5cb936541b9e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c @@ -76,7 +76,7 @@ static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = { static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_sw) { - return NUM_IPSEC_SW_COUNTERS; + return priv->ipsec ? NUM_IPSEC_SW_COUNTERS : 0; } static inline MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_sw) {} @@ -105,7 +105,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_sw) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_hw) { - return (mlx5_fpga_ipsec_device_caps(priv->mdev)) ? NUM_IPSEC_HW_COUNTERS : 0; + return (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev)) ? NUM_IPSEC_HW_COUNTERS : 0; } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_hw) -- GitLab From 0aa128475d33d2d0095947eeab6b3e4d22dbd578 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Fri, 22 Jan 2021 23:13:53 +0200 Subject: [PATCH 1520/2012] net/mlx5: Maintain separate page trees for ECPF and PF functions Pages for the host PF and ECPF were stored in the same tree, so the ECPF pages were being freed along with the host PF's when the host driver unloaded. Combine the function ID and ECPF flag to use as an index into the x-array containing the trees to get a different tree for the host PF and ECPF. Fixes: c6168161f693 ("net/mlx5: Add support for release all pages event") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/pagealloc.c | 58 +++++++++++-------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index eb956ce904bc0..eaa8958e24d79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -58,7 +58,7 @@ struct fw_page { struct rb_node rb_node; u64 addr; struct page *page; - u16 func_id; + u32 function; unsigned long bitmask; struct list_head list; unsigned free_count; @@ -74,12 +74,17 @@ enum { MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE, }; -static struct rb_root *page_root_per_func_id(struct mlx5_core_dev *dev, u16 func_id) +static u32 get_function(u16 func_id, bool ec_function) +{ + return func_id & (ec_function << 16); +} + +static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function) { struct rb_root *root; int err; - root = xa_load(&dev->priv.page_root_xa, func_id); + root = xa_load(&dev->priv.page_root_xa, function); if (root) return root; @@ -87,7 +92,7 @@ static struct rb_root *page_root_per_func_id(struct mlx5_core_dev *dev, u16 func if (!root) return ERR_PTR(-ENOMEM); - err = xa_insert(&dev->priv.page_root_xa, func_id, root, GFP_KERNEL); + err = xa_insert(&dev->priv.page_root_xa, function, root, GFP_KERNEL); if (err) { kfree(root); return ERR_PTR(err); @@ -98,7 +103,7 @@ static struct rb_root *page_root_per_func_id(struct mlx5_core_dev *dev, u16 func return root; } -static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id) +static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u32 function) { struct rb_node *parent = NULL; struct rb_root *root; @@ -107,7 +112,7 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u struct fw_page *tfp; int i; - root = page_root_per_func_id(dev, func_id); + root = page_root_per_function(dev, function); if (IS_ERR(root)) return PTR_ERR(root); @@ -130,7 +135,7 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u nfp->addr = addr; nfp->page = page; - nfp->func_id = func_id; + nfp->function = function; nfp->free_count = MLX5_NUM_4K_IN_PAGE; for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++) set_bit(i, &nfp->bitmask); @@ -143,14 +148,14 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u } static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr, - u32 func_id) + u32 function) { struct fw_page *result = NULL; struct rb_root *root; struct rb_node *tmp; struct fw_page *tfp; - root = xa_load(&dev->priv.page_root_xa, func_id); + root = xa_load(&dev->priv.page_root_xa, function); if (WARN_ON_ONCE(!root)) return NULL; @@ -194,14 +199,14 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, return err; } -static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id) +static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u32 function) { struct fw_page *fp = NULL; struct fw_page *iter; unsigned n; list_for_each_entry(iter, &dev->priv.free_list, list) { - if (iter->func_id != func_id) + if (iter->function != function) continue; fp = iter; } @@ -231,7 +236,7 @@ static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp, { struct rb_root *root; - root = xa_load(&dev->priv.page_root_xa, fwp->func_id); + root = xa_load(&dev->priv.page_root_xa, fwp->function); if (WARN_ON_ONCE(!root)) return; @@ -244,12 +249,12 @@ static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp, kfree(fwp); } -static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 func_id) +static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function) { struct fw_page *fwp; int n; - fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK, func_id); + fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK, function); if (!fwp) { mlx5_core_warn_rl(dev, "page not found\n"); return; @@ -263,7 +268,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 func_id) list_add(&fwp->list, &dev->priv.free_list); } -static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id) +static int alloc_system_page(struct mlx5_core_dev *dev, u32 function) { struct device *device = mlx5_core_dma_dev(dev); int nid = dev_to_node(device); @@ -291,7 +296,7 @@ map: goto map; } - err = insert_page(dev, addr, page, func_id); + err = insert_page(dev, addr, page, function); if (err) { mlx5_core_err(dev, "failed to track allocated page\n"); dma_unmap_page(device, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); @@ -328,6 +333,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id, static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, int notify_fail, bool ec_function) { + u32 function = get_function(func_id, ec_function); u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); u64 addr; @@ -345,10 +351,10 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, for (i = 0; i < npages; i++) { retry: - err = alloc_4k(dev, &addr, func_id); + err = alloc_4k(dev, &addr, function); if (err) { if (err == -ENOMEM) - err = alloc_system_page(dev, func_id); + err = alloc_system_page(dev, function); if (err) goto out_4k; @@ -384,7 +390,7 @@ retry: out_4k: for (i--; i >= 0; i--) - free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), func_id); + free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), function); out_free: kvfree(in); if (notify_fail) @@ -392,14 +398,15 @@ out_free: return err; } -static void release_all_pages(struct mlx5_core_dev *dev, u32 func_id, +static void release_all_pages(struct mlx5_core_dev *dev, u16 func_id, bool ec_function) { + u32 function = get_function(func_id, ec_function); struct rb_root *root; struct rb_node *p; int npages = 0; - root = xa_load(&dev->priv.page_root_xa, func_id); + root = xa_load(&dev->priv.page_root_xa, function); if (WARN_ON_ONCE(!root)) return; @@ -446,6 +453,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, struct rb_root *root; struct fw_page *fwp; struct rb_node *p; + bool ec_function; u32 func_id; u32 npages; u32 i = 0; @@ -456,8 +464,9 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, /* No hard feelings, we want our pages back! */ npages = MLX5_GET(manage_pages_in, in, input_num_entries); func_id = MLX5_GET(manage_pages_in, in, function_id); + ec_function = MLX5_GET(manage_pages_in, in, embedded_cpu_function); - root = xa_load(&dev->priv.page_root_xa, func_id); + root = xa_load(&dev->priv.page_root_xa, get_function(func_id, ec_function)); if (WARN_ON_ONCE(!root)) return -EEXIST; @@ -473,9 +482,10 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, return 0; } -static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, +static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, int *nclaimed, bool ec_function) { + u32 function = get_function(func_id, ec_function); int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {}; int num_claimed; @@ -514,7 +524,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, } for (i = 0; i < num_claimed; i++) - free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]), func_id); + free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]), function); if (nclaimed) *nclaimed = num_claimed; -- GitLab From 156878d0e697187c7d207ee6c22afe50b7f3678c Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Sun, 24 Jan 2021 17:21:25 +0200 Subject: [PATCH 1521/2012] net/mlx5e: Disable hw-tc-offload when MLX5_CLS_ACT config is disabled The cited commit introduce new CONFIG_MLX5_CLS_ACT kconfig variable to control compilation of TC hardware offloads implementation. When this configuration is disabled the driver is still wrongly reports in ethtool that hw-tc-offload is supported. Fixed by reporting hw-tc-offload is supported only when CONFIG_MLX5_CLS_ACT is enabled. Fixes: d956873f908c ("net/mlx5e: Introduce kconfig var for TC support") Signed-off-by: Maor Dickman Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6a852b4901aa0..300e0e9f96b60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5027,7 +5027,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) FT_CAP(modify_root) && FT_CAP(identified_miss_table_mode) && FT_CAP(flow_table_modify)) { -#ifdef CONFIG_MLX5_ESWITCH +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) netdev->hw_features |= NETIF_F_HW_TC; #endif #ifdef CONFIG_MLX5_EN_ARFS diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 989c70c1eda37..f0ceae65f6cfa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -737,7 +737,9 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->features |= NETIF_F_NETNS_LOCAL; +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) netdev->hw_features |= NETIF_F_HW_TC; +#endif netdev->hw_features |= NETIF_F_SG; netdev->hw_features |= NETIF_F_IP_CSUM; netdev->hw_features |= NETIF_F_IPV6_CSUM; -- GitLab From 89e394675818bde8e30e135611c506455fa03fb7 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 21 Jan 2021 10:06:45 +0200 Subject: [PATCH 1522/2012] net/mlx5e: Fix CT rule + encap slow path offload and deletion Currently, if a neighbour isn't valid when offloading tunnel encap rules, we offload the original match and replace the original action with "goto slow path" action. For this we use a temporary flow attribute based on the original flow attribute and then change the action. Flow flags, which among those is the CT flag, are still shared for the slow path rule offload, so we end up parsing this flow as a CT + goto slow path rule. Besides being unnecessary, CT action offload saves extra information in the passed flow attribute, such as created ct_flow and mod_hdr, which is lost onces the temporary flow attribute is freed. When a neigh is updated and is valid, we offload the original CT rule with original CT action, which again creates a ct_flow and mod_hdr and saves it in the flow's original attribute. Then we delete the slow path rule with a temporary flow attribute based on original updated flow attribute, and we free the relevant ct_flow and mod_hdr. Then when tc deletes this flow, we try to free the ct_flow and mod_hdr on the flow's attribute again. To fix the issue, skip all furture proccesing (CT/Sample/Split rules) in offload/unoffload of slow path rules. Call trace: [ 758.850525] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000218 [ 758.952987] Internal error: Oops: 96000005 [#1] PREEMPT SMP [ 758.964170] Modules linked in: act_csum(E) act_pedit(E) act_tunnel_key(E) act_ct(E) nf_flow_table(E) xt_nat(E) ip6table_filter(E) ip6table_nat(E) xt_comment(E) ip6_tables(E) xt_conntrack(E) xt_MASQUERADE(E) nf_conntrack_netlink(E) xt_addrtype(E) iptable_filter(E) iptable_nat(E) bpfilter(E) br_netfilter(E) bridge(E) stp(E) llc(E) xfrm_user(E) overlay(E) act_mirred(E) act_skbedit(E) rdma_ucm(OE) rdma_cm(OE) iw_cm(OE) ib_ipoib(OE) ib_cm(OE) ib_umad(OE) esp6_offload(E) esp6(E) esp4_offload(E) esp4(E) xfrm_algo(E) mlx5_ib(OE) ib_uverbs(OE) geneve(E) ip6_udp_tunnel(E) udp_tunnel(E) nfnetlink_cttimeout(E) nfnetlink(E) mlx5_core(OE) act_gact(E) cls_flower(E) sch_ingress(E) openvswitch(E) nsh(E) nf_conncount(E) nf_nat(E) mlxfw(OE) psample(E) nf_conntrack(E) nf_defrag_ipv4(E) vfio_mdev(E) mdev(E) ib_core(OE) mlx_compat(OE) crct10dif_ce(E) uio_pdrv_genirq(E) uio(E) i2c_mlx(E) mlxbf_pmc(E) sbsa_gwdt(E) mlxbf_gige(E) gpio_mlxbf2(E) mlxbf_pka(E) mlx_trio(E) mlx_bootctl(E) bluefield_edac(E) knem(O) [ 758.964225] ip_tables(E) mlxbf_tmfifo(E) ipv6(E) crc_ccitt(E) nf_defrag_ipv6(E) [ 759.154186] CPU: 5 PID: 122 Comm: kworker/u16:1 Tainted: G OE 5.4.60-mlnx.52.gde81e85 #1 [ 759.172870] Hardware name: https://www.mellanox.com BlueField SoC/BlueField SoC, BIOS BlueField:3.5.0-2-gc1b5d64 Jan 4 2021 [ 759.195466] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core] [ 759.207344] pstate: a0000005 (NzCv daif -PAN -UAO) [ 759.217003] pc : mlx5_del_flow_rules+0x5c/0x160 [mlx5_core] [ 759.228229] lr : mlx5_del_flow_rules+0x34/0x160 [mlx5_core] [ 759.405858] Call trace: [ 759.410804] mlx5_del_flow_rules+0x5c/0x160 [mlx5_core] [ 759.421337] __mlx5_eswitch_del_rule.isra.43+0x5c/0x1c8 [mlx5_core] [ 759.433963] mlx5_eswitch_del_offloaded_rule_ct+0x34/0x40 [mlx5_core] [ 759.446942] mlx5_tc_rule_delete_ct+0x68/0x74 [mlx5_core] [ 759.457821] mlx5_tc_ct_delete_flow+0x160/0x21c [mlx5_core] [ 759.469051] mlx5e_tc_unoffload_fdb_rules+0x158/0x168 [mlx5_core] [ 759.481325] mlx5e_tc_encap_flows_del+0x140/0x26c [mlx5_core] [ 759.492901] mlx5e_rep_update_flows+0x11c/0x1ec [mlx5_core] [ 759.504127] mlx5e_rep_neigh_update+0x160/0x200 [mlx5_core] [ 759.515314] process_one_work+0x178/0x400 [ 759.523350] worker_thread+0x58/0x3e8 [ 759.530685] kthread+0x100/0x12c [ 759.537152] ret_from_fork+0x10/0x18 [ 759.544320] Code: 97ffef55 51000673 3100067f 54ffff41 (b9421ab3) [ 759.556548] ---[ end trace fab818bb1085832d ]--- Fixes: 4c3844d9e97e ("net/mlx5e: CT: Introduce connection tracking") Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f4ce5e208e02b..dd0bfbacad474 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1163,6 +1163,9 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts; struct mlx5_flow_handle *rule; + if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + if (flow_flag_test(flow, CT)) { mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; @@ -1193,6 +1196,9 @@ mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, { flow_flag_clear(flow, OFFLOADED); + if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) + goto offload_rule_0; + if (flow_flag_test(flow, CT)) { mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); return; @@ -1201,6 +1207,7 @@ mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, if (attr->esw_attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); +offload_rule_0: mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); } -- GitLab From 57ac4a31c48377a3e675b2a731ceacbefefcd34d Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Fri, 11 Dec 2020 12:56:56 +0200 Subject: [PATCH 1523/2012] net/mlx5e: Correctly handle changing the number of queues when the interface is down This commit addresses two issues related to changing the number of queues when the channels are closed: 1. Missing call to mlx5e_num_channels_changed to update real_num_tx_queues when the number of TCs is changed. 2. When mlx5e_num_channels_changed returns an error, the channel parameters must be reverted. Two Fixes: tags correspond to the first commits where these two issues were introduced. Fixes: 3909a12e7913 ("net/mlx5e: Fix configuration of XPS cpumasks and netdev queues in corner cases") Fixes: fa3748775b92 ("net/mlx5e: Handle errors from netif_set_real_num_{tx,rx}_queues") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 8 +++++++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 2d37742a888c1..302001d6661ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -447,12 +447,18 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, goto out; } - new_channels.params = priv->channels.params; + new_channels.params = *cur_params; new_channels.params.num_channels = count; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + struct mlx5e_params old_params; + + old_params = *cur_params; *cur_params = new_channels.params; err = mlx5e_num_channels_changed(priv); + if (err) + *cur_params = old_params; + goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 300e0e9f96b60..ac76d32bad7de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3614,7 +3614,14 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, new_channels.params.num_tc = tc ? tc : 1; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + struct mlx5e_params old_params; + + old_params = priv->channels.params; priv->channels.params = new_channels.params; + err = mlx5e_num_channels_changed(priv); + if (err) + priv->channels.params = old_params; + goto out; } -- GitLab From 912c9b5fcca1ab65b806c19dd3b3cb12d73c6fe2 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 14 Jan 2021 12:34:01 +0200 Subject: [PATCH 1524/2012] net/mlx5e: Revert parameters on errors when changing trust state without reset Trust state may be changed without recreating the channels. It happens when the channels are closed, and when channel parameters (min inline mode) stay the same after changing the trust state. Changing the trust state is a hardware command that may fail. The current code didn't restore the channel parameters to their old values if an error happened and the channels were closed. This commit adds handling for this case. Fixes: 6e0504c69811 ("net/mlx5e: Change inline mode correctly when changing trust state") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index d20243d6a0326..f23c67575073a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1151,6 +1151,7 @@ static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) { struct mlx5e_channels new_channels = {}; bool reset_channels = true; + bool opened; int err = 0; mutex_lock(&priv->state_lock); @@ -1159,22 +1160,24 @@ static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &new_channels.params, trust_state); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (!opened) reset_channels = false; - } /* Skip if tx_min_inline is the same */ if (new_channels.params.tx_min_inline_mode == priv->channels.params.tx_min_inline_mode) reset_channels = false; - if (reset_channels) + if (reset_channels) { err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_update_trust_state_hw, &trust_state); - else + } else { err = mlx5e_update_trust_state_hw(priv, &trust_state); + if (!err && !opened) + priv->channels.params = new_channels.params; + } mutex_unlock(&priv->state_lock); -- GitLab From 8355060f5ec381abda77659f91f56302203df535 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Fri, 11 Dec 2020 16:05:01 +0200 Subject: [PATCH 1525/2012] net/mlx5e: Revert parameters on errors when changing MTU and LRO state without reset Sometimes, channel params are changed without recreating the channels. It happens in two basic cases: when the channels are closed, and when the parameter being changed doesn't affect how channels are configured. Such changes invoke a hardware command that might fail. The whole operation should be reverted in such cases, but the code that restores the parameters' values in the driver was missing. This commit adds this handling. Fixes: 2e20a151205b ("net/mlx5e: Fail safe mtu and lro setting") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ac76d32bad7de..a9d824a9cb05a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3764,7 +3764,7 @@ static int set_feature_lro(struct net_device *netdev, bool enable) struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; - struct mlx5e_params *old_params; + struct mlx5e_params *cur_params; int err = 0; bool reset; @@ -3777,8 +3777,8 @@ static int set_feature_lro(struct net_device *netdev, bool enable) goto out; } - old_params = &priv->channels.params; - if (enable && !MLX5E_GET_PFLAG(old_params, MLX5E_PFLAG_RX_STRIDING_RQ)) { + cur_params = &priv->channels.params; + if (enable && !MLX5E_GET_PFLAG(cur_params, MLX5E_PFLAG_RX_STRIDING_RQ)) { netdev_warn(netdev, "can't set LRO with legacy RQ\n"); err = -EINVAL; goto out; @@ -3786,18 +3786,23 @@ static int set_feature_lro(struct net_device *netdev, bool enable) reset = test_bit(MLX5E_STATE_OPENED, &priv->state); - new_channels.params = *old_params; + new_channels.params = *cur_params; new_channels.params.lro_en = enable; - if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { - if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params, NULL) == + if (cur_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) == mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL)) reset = false; } if (!reset) { - *old_params = new_channels.params; + struct mlx5e_params old_params; + + old_params = *cur_params; + *cur_params = new_channels.params; err = mlx5e_modify_tirs_lro(priv); + if (err) + *cur_params = old_params; goto out; } @@ -4074,9 +4079,16 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, } if (!reset) { + unsigned int old_mtu = params->sw_mtu; + params->sw_mtu = new_mtu; - if (preactivate) - preactivate(priv, NULL); + if (preactivate) { + err = preactivate(priv, NULL); + if (err) { + params->sw_mtu = old_mtu; + goto out; + } + } netdev->mtu = params->sw_mtu; goto out; } -- GitLab From e2194a1744e8594e82a861687808c1adca419b85 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Mon, 25 Jan 2021 17:31:26 +0200 Subject: [PATCH 1526/2012] net/mlx5: CT: Fix incorrect removal of tuple_nat_node from nat rhashtable If a non nat tuple entry is inserted just to the regular tuples rhashtable (ct_tuples_ht) and not to natted tuples rhashtable (ct_nat_tuples_ht). Commit bc562be9674b ("net/mlx5e: CT: Save ct entries tuples in hashtables") mixed up the return labels and names sot that on cleanup or failure we still try to remove for the natted tuples rhashtable. Fix that by correctly checking if a natted tuples insertion before removing it. While here make it more readable. Fixes: bc562be9674b ("net/mlx5e: CT: Save ct entries tuples in hashtables") Reviewed-by: Roi Dayan Signed-off-by: Paul Blakey Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_ct.c | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 072363e73f1ce..6bc6b48a56dc7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -167,6 +167,12 @@ static const struct rhashtable_params tuples_nat_ht_params = { .min_size = 16 * 1024, }; +static bool +mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry) +{ + return !!(entry->tuple_nat_node.next); +} + static int mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule) { @@ -911,13 +917,13 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, err_insert: mlx5_tc_ct_entry_del_rules(ct_priv, entry); err_rules: - rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, - &entry->tuple_nat_node, tuples_nat_ht_params); + if (mlx5_tc_ct_entry_has_nat(entry)) + rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, + &entry->tuple_nat_node, tuples_nat_ht_params); err_tuple_nat: - if (entry->tuple_node.next) - rhashtable_remove_fast(&ct_priv->ct_tuples_ht, - &entry->tuple_node, - tuples_ht_params); + rhashtable_remove_fast(&ct_priv->ct_tuples_ht, + &entry->tuple_node, + tuples_ht_params); err_tuple: err_set: kfree(entry); @@ -932,7 +938,7 @@ mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv, { mlx5_tc_ct_entry_del_rules(ct_priv, entry); mutex_lock(&ct_priv->shared_counter_lock); - if (entry->tuple_node.next) + if (mlx5_tc_ct_entry_has_nat(entry)) rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, &entry->tuple_nat_node, tuples_nat_ht_params); -- GitLab From 3e841bacf72fd5fd98172c42bbc9ae7d461b6304 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 25 Jan 2021 10:20:40 +0200 Subject: [PATCH 1527/2012] net: bridge: multicast: fix br_multicast_eht_set_entry_lookup indentation Fix the messed up indentation in br_multicast_eht_set_entry_lookup(). Fixes: baa74d39ca39 ("net: bridge: multicast: add EHT source set handling functions") Reported-by: kernel test robot Signed-off-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20210125082040.13022-1-razor@blackwall.org Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast_eht.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c index a4fa1760bc8a2..ff9b3ba37cabb 100644 --- a/net/bridge/br_multicast_eht.c +++ b/net/bridge/br_multicast_eht.c @@ -85,15 +85,15 @@ br_multicast_eht_set_entry_lookup(struct net_bridge_group_eht_set *eht_set, struct net_bridge_group_eht_set_entry *this; int result; - this = rb_entry(node, struct net_bridge_group_eht_set_entry, - rb_node); - result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr)); - if (result < 0) - node = node->rb_left; - else if (result > 0) - node = node->rb_right; - else - return this; + this = rb_entry(node, struct net_bridge_group_eht_set_entry, + rb_node); + result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr)); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return this; } return NULL; -- GitLab From 5cfeb5626d4acef8df993eceec442f7b54943976 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 Jan 2021 09:32:20 -0800 Subject: [PATCH 1528/2012] MAINTAINERS: add David Ahern to IPv4/IPv6 maintainers David has been the de-facto maintainer for much of the IP code for the last couple of years, let's make it official. Link: https://lore.kernel.org/r/20210122173220.3579491-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 705776b31c8de..1a5facde5a908 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12413,6 +12413,7 @@ F: tools/testing/selftests/net/ipsec.c NETWORKING [IPv4/IPv6] M: "David S. Miller" M: Hideaki YOSHIFUJI +M: David Ahern L: netdev@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git -- GitLab From 2961f562bb7b8b3cbaeaf5d9f0ea0fa8e72cc066 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Sat, 23 Jan 2021 13:11:02 +0800 Subject: [PATCH 1529/2012] usbnet: fix the indentation of one code snippet Every line of code should start with tab (8 characters) Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20210123051102.1091541-1-mudongliangabcd@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 1447da1d5729a..305c5f7b9a9ba 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1964,12 +1964,12 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, cmd, reqtype, value, index, buf, size, USB_CTRL_GET_TIMEOUT); if (err > 0 && err <= size) { - if (data) - memcpy(data, buf, err); - else - netdev_dbg(dev->net, - "Huh? Data requested but thrown away.\n"); - } + if (data) + memcpy(data, buf, err); + else + netdev_dbg(dev->net, + "Huh? Data requested but thrown away.\n"); + } kfree(buf); out: return err; -- GitLab From f0947d0d21b219e03940b9be6628a43445c0de7a Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Mon, 25 Jan 2021 08:44:16 +0100 Subject: [PATCH 1530/2012] team: protect features update by RCU to avoid deadlock Function __team_compute_features() is protected by team->lock mutex when it is called from team_compute_features() used when features of an underlying device is changed. This causes a deadlock when NETDEV_FEAT_CHANGE notifier for underlying device is fired due to change propagated from team driver (e.g. MTU change). It's because callbacks like team_change_mtu() or team_vlan_rx_{add,del}_vid() protect their port list traversal by team->lock mutex. Example (r8169 case where this driver disables TSO for certain MTU values): ... [ 6391.348202] __mutex_lock.isra.6+0x2d0/0x4a0 [ 6391.358602] team_device_event+0x9d/0x160 [team] [ 6391.363756] notifier_call_chain+0x47/0x70 [ 6391.368329] netdev_update_features+0x56/0x60 [ 6391.373207] rtl8169_change_mtu+0x14/0x50 [r8169] [ 6391.378457] dev_set_mtu_ext+0xe1/0x1d0 [ 6391.387022] dev_set_mtu+0x52/0x90 [ 6391.390820] team_change_mtu+0x64/0xf0 [team] [ 6391.395683] dev_set_mtu_ext+0xe1/0x1d0 [ 6391.399963] do_setlink+0x231/0xf50 ... In fact team_compute_features() called from team_device_event() does not need to be protected by team->lock mutex and rcu_read_lock() is sufficient there for port list traversal. Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Cc: Saeed Mahameed Signed-off-by: Ivan Vecera Reviewed-by: Cong Wang Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20210125074416.4056484-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/team/team.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index c19dac21c468b..dd7917cab2b12 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -992,7 +992,8 @@ static void __team_compute_features(struct team *team) unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; - list_for_each_entry(port, &team->port_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(port, &team->port_list, list) { vlan_features = netdev_increment_features(vlan_features, port->dev->vlan_features, TEAM_VLAN_FEATURES); @@ -1006,6 +1007,7 @@ static void __team_compute_features(struct team *team) if (port->dev->hard_header_len > max_hard_header_len) max_hard_header_len = port->dev->hard_header_len; } + rcu_read_unlock(); team->dev->vlan_features = vlan_features; team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | @@ -1020,9 +1022,7 @@ static void __team_compute_features(struct team *team) static void team_compute_features(struct team *team) { - mutex_lock(&team->lock); __team_compute_features(team); - mutex_unlock(&team->lock); netdev_change_features(team->dev); } -- GitLab From 30596ae0547dbda469d31a2678d9072fb0a3fa27 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 25 Jan 2021 20:39:05 -0600 Subject: [PATCH 1531/2012] ARM: zImage: atags_to_fdt: Fix node names on added root nodes Commit 7536c7e03e74 ("of/fdt: Remove redundant kbasename function call") exposed a bug creating DT nodes in the ATAGS to DT fixup code. Non-existent nodes would mistaken get created with a leading '/'. The problem was fdt_path_offset() takes a full path while creating a node with fdt_add_subnode() takes just the basename. Since this we only add root child nodes, we can just skip over the '/'. Fixes: 7536c7e03e74 ("of/fdt: Remove redundant kbasename function call") Reported-by: Chris Packham Cc: Qi Zheng Cc: Russell King Signed-off-by: Rob Herring Tested-by: Chris Packham Link: https://lore.kernel.org/r/20210126023905.1631161-1-robh@kernel.org --- arch/arm/boot/compressed/atags_to_fdt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c index 8452753efebe5..31927d2fe2972 100644 --- a/arch/arm/boot/compressed/atags_to_fdt.c +++ b/arch/arm/boot/compressed/atags_to_fdt.c @@ -15,7 +15,8 @@ static int node_offset(void *fdt, const char *node_path) { int offset = fdt_path_offset(fdt, node_path); if (offset == -FDT_ERR_NOTFOUND) - offset = fdt_add_subnode(fdt, 0, node_path); + /* Add the node to root if not found, dropping the leading '/' */ + offset = fdt_add_subnode(fdt, 0, node_path + 1); return offset; } -- GitLab From 871127e6ab0d6abb904cec81fc022baf6953be1f Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 26 Jan 2021 01:20:24 -0500 Subject: [PATCH 1532/2012] bnxt_en: Convert to use netif_level() helpers. Use the various netif_level() helpers to simplify the C code. This was suggested by Joe Perches. Cc: Joe Perches Signed-off-by: Michael Chan Link: https://lore.kernel.org/r/1611642024-3166-1-git-send-email-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 34 ++++++++++------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index dd7d2caa57a21..f508c5c61a303 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1266,8 +1266,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, } else { tpa_info->hash_type = PKT_HASH_TYPE_NONE; tpa_info->gso_type = 0; - if (netif_msg_rx_err(bp)) - netdev_warn(bp->dev, "TPA packet without valid hash\n"); + netif_warn(bp, rx_err, bp->dev, "TPA packet without valid hash\n"); } tpa_info->flags2 = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_flags2); tpa_info->metadata = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_metadata); @@ -2039,12 +2038,11 @@ static int bnxt_async_event_process(struct bnxt *bp, fatal_str = "fatal"; set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); } - if (netif_msg_hw(bp)) { - netdev_warn(bp->dev, "Firmware %s reset event, data1: 0x%x, data2: 0x%x, min wait %u ms, max wait %u ms\n", - fatal_str, data1, data2, - bp->fw_reset_min_dsecs * 100, - bp->fw_reset_max_dsecs * 100); - } + netif_warn(bp, hw, bp->dev, + "Firmware %s reset event, data1: 0x%x, data2: 0x%x, min wait %u ms, max wait %u ms\n", + fatal_str, data1, data2, + bp->fw_reset_min_dsecs * 100, + bp->fw_reset_max_dsecs * 100); set_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event); break; } @@ -2059,13 +2057,11 @@ static int bnxt_async_event_process(struct bnxt *bp, if (!fw_health->enabled) break; - if (netif_msg_drv(bp)) - netdev_info(bp->dev, "Error recovery info: error recovery[%d], master[%d], reset count[0x%x], health status: 0x%x\n", - fw_health->enabled, fw_health->master, - bnxt_fw_health_readl(bp, - BNXT_FW_RESET_CNT_REG), - bnxt_fw_health_readl(bp, - BNXT_FW_HEALTH_REG)); + netif_info(bp, drv, bp->dev, + "Error recovery info: error recovery[%d], master[%d], reset count[0x%x], health status: 0x%x\n", + fw_health->enabled, fw_health->master, + bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG), + bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG)); fw_health->tmr_multiplier = DIV_ROUND_UP(fw_health->polling_dsecs * HZ, bp->current_interval * 10); @@ -2077,11 +2073,9 @@ static int bnxt_async_event_process(struct bnxt *bp, goto async_event_process_exit; } case ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION: - if (netif_msg_hw(bp)) { - netdev_notice(bp->dev, - "Received firmware debug notification, data1: 0x%x, data2: 0x%x\n", - data1, data2); - } + netif_notice(bp, hw, bp->dev, + "Received firmware debug notification, data1: 0x%x, data2: 0x%x\n", + data1, data2); goto async_event_process_exit; case ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG: { struct bnxt_rx_ring_info *rxr; -- GitLab From 285715ac9a81209540bfdb2495e3a005b701bef8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Jan 2021 12:31:59 +0100 Subject: [PATCH 1533/2012] bonding: add TLS dependency When TLS is a module, the built-in bonding driver may cause a link error: x86_64-linux-ld: drivers/net/bonding/bond_main.o: in function `bond_start_xmit': bond_main.c:(.text+0xc451): undefined reference to `tls_validate_xmit_skb' Add a dependency to avoid the problem. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210125113209.2248522-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 260f9f46668b8..1ebb4b9438769 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -42,6 +42,7 @@ config BONDING tristate "Bonding driver support" depends on INET depends on IPV6 || IPV6=n + depends on TLS || TLS_DEVICE=n help Say 'Y' or 'M' if you wish to be able to 'bond' multiple Ethernet Channels together. This is called 'Etherchannel' by Cisco, -- GitLab From 9b0b7837b9f1b29e89de8c5e321dc94601fda4b5 Mon Sep 17 00:00:00 2001 From: Seth David Schoen Date: Mon, 25 Jan 2021 20:08:34 -0800 Subject: [PATCH 1534/2012] selftests: add IPv4 unicast extensions tests Add selftests for kernel behavior with regard to various classes of unallocated/reserved IPv4 addresses, checking whether or not these addresses can be assigned as unicast addresses on links and used in routing. Expect the current kernel behavior at the time of this patch. That is: * 0/8 and 240/4 may be used as unicast, with the exceptions of 0.0.0.0 and 255.255.255.255; * the lowest address in a subnet may only be used as a broadcast address; * 127/8 may not be used as unicast (the route_localnet option, which is disabled by default, still leaves it treated slightly specially); * 224/4 may not be used as unicast. Signed-off-by: Seth David Schoen Suggested-by: John Gilmore Acked-by: Dave Taht Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210126040834.GR24989@frotz.zork.net Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + .../selftests/net/unicast_extensions.sh | 228 ++++++++++++++++++ 2 files changed, 229 insertions(+) create mode 100755 tools/testing/selftests/net/unicast_extensions.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index fa5fa425d1487..25f198bec0b25 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -22,6 +22,7 @@ TEST_PROGS += devlink_port_split.py TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh TEST_PROGS += bareudp.sh +TEST_PROGS += unicast_extensions.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh new file mode 100755 index 0000000000000..dbf0421986df6 --- /dev/null +++ b/tools/testing/selftests/net/unicast_extensions.sh @@ -0,0 +1,228 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# By Seth Schoen (c) 2021, for the IPv4 Unicast Extensions Project +# Thanks to David Ahern for help and advice on nettest modifications. +# +# Self-tests for IPv4 address extensions: the kernel's ability to accept +# certain traditionally unused or unallocated IPv4 addresses. For each kind +# of address, we test for interface assignment, ping, TCP, and forwarding. +# Must be run as root (to manipulate network namespaces and virtual +# interfaces). +# +# Things we test for here: +# +# * Currently the kernel accepts addresses in 0/8 and 240/4 as valid. +# +# * Notwithstanding that, 0.0.0.0 and 255.255.255.255 cannot be assigned. +# +# * Currently the kernel DOES NOT accept unicast use of the lowest +# address in an IPv4 subnet (e.g. 192.168.100.0/32 in 192.168.100.0/24). +# This is treated as a second broadcast address, for compatibility +# with 4.2BSD (!). +# +# * Currently the kernel DOES NOT accept unicast use of any of 127/8. +# +# * Currently the kernel DOES NOT accept unicast use of any of 224/4. +# +# These tests provide an easy way to flip the expected result of any +# of these behaviors for testing kernel patches that change them. + +# nettest can be run from PATH or from same directory as this selftest +if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + exit 0 + fi +fi + +result=0 + +hide_output(){ exec 3>&1 4>&2 >/dev/null 2>/dev/null; } +show_output(){ exec >&3 2>&4; } + +show_result(){ + if [ $1 -eq 0 ]; then + printf "TEST: %-60s [ OK ]\n" "${2}" + else + printf "TEST: %-60s [FAIL]\n" "${2}" + result=1 + fi +} + +_do_segmenttest(){ + # Perform a simple set of link tests between a pair of + # IP addresses on a shared (virtual) segment, using + # ping and nettest. + # foo --- bar + # Arguments: ip_a ip_b prefix_length test_description + # + # Caller must set up foo-ns and bar-ns namespaces + # containing linked veth devices foo and bar, + # respectively. + + ip -n foo-ns address add $1/$3 dev foo || return 1 + ip -n foo-ns link set foo up || return 1 + ip -n bar-ns address add $2/$3 dev bar || return 1 + ip -n bar-ns link set bar up || return 1 + + ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1 + ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1 + + nettest -B -N bar-ns -O foo-ns -r $1 || return 1 + nettest -B -N foo-ns -O bar-ns -r $2 || return 1 + + return 0 +} + +_do_route_test(){ + # Perform a simple set of gateway tests. + # + # [foo] <---> [foo1]-[bar1] <---> [bar] /prefix + # host gateway host + # + # Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description + # Displays test result and returns success or failure. + + # Caller must set up foo-ns, bar-ns, and router-ns + # containing linked veth devices foo-foo1, bar1-bar + # (foo in foo-ns, foo1 and bar1 in router-ns, and + # bar in bar-ns). + + ip -n foo-ns address add $1/$5 dev foo || return 1 + ip -n foo-ns link set foo up || return 1 + ip -n foo-ns route add default via $2 || return 1 + ip -n bar-ns address add $4/$5 dev bar || return 1 + ip -n bar-ns link set bar up || return 1 + ip -n bar-ns route add default via $3 || return 1 + ip -n router-ns address add $2/$5 dev foo1 || return 1 + ip -n router-ns link set foo1 up || return 1 + ip -n router-ns address add $3/$5 dev bar1 || return 1 + ip -n router-ns link set bar1 up || return 1 + + echo 1 | ip netns exec router-ns tee /proc/sys/net/ipv4/ip_forward + + ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1 + ip netns exec foo-ns timeout 2 ping -c 1 $4 || return 1 + ip netns exec bar-ns timeout 2 ping -c 1 $3 || return 1 + ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1 + + nettest -B -N bar-ns -O foo-ns -r $1 || return 1 + nettest -B -N foo-ns -O bar-ns -r $4 || return 1 + + return 0 +} + +segmenttest(){ + # Sets up veth link and tries to connect over it. + # Arguments: ip_a ip_b prefix_len test_description + hide_output + ip netns add foo-ns + ip netns add bar-ns + ip link add foo netns foo-ns type veth peer name bar netns bar-ns + + test_result=0 + _do_segmenttest "$@" || test_result=1 + + ip netns pids foo-ns | xargs -r kill -9 + ip netns pids bar-ns | xargs -r kill -9 + ip netns del foo-ns + ip netns del bar-ns + show_output + + # inverted tests will expect failure instead of success + [ -n "$expect_failure" ] && test_result=`expr 1 - $test_result` + + show_result $test_result "$4" +} + +route_test(){ + # Sets up a simple gateway and tries to connect through it. + # [foo] <---> [foo1]-[bar1] <---> [bar] /prefix + # Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description + # Returns success or failure. + + hide_output + ip netns add foo-ns + ip netns add bar-ns + ip netns add router-ns + ip link add foo netns foo-ns type veth peer name foo1 netns router-ns + ip link add bar netns bar-ns type veth peer name bar1 netns router-ns + + test_result=0 + _do_route_test "$@" || test_result=1 + + ip netns pids foo-ns | xargs -r kill -9 + ip netns pids bar-ns | xargs -r kill -9 + ip netns pids router-ns | xargs -r kill -9 + ip netns del foo-ns + ip netns del bar-ns + ip netns del router-ns + + show_output + + # inverted tests will expect failure instead of success + [ -n "$expect_failure" ] && test_result=`expr 1 - $test_result` + show_result $test_result "$6" +} + +echo "###########################################################################" +echo "Unicast address extensions tests (behavior of reserved IPv4 addresses)" +echo "###########################################################################" +# +# Test support for 240/4 +segmenttest 240.1.2.1 240.1.2.4 24 "assign and ping within 240/4 (1 of 2) (is allowed)" +segmenttest 250.100.2.1 250.100.30.4 16 "assign and ping within 240/4 (2 of 2) (is allowed)" +# +# Test support for 0/8 +segmenttest 0.1.2.17 0.1.2.23 24 "assign and ping within 0/8 (1 of 2) (is allowed)" +segmenttest 0.77.240.17 0.77.2.23 16 "assign and ping within 0/8 (2 of 2) (is allowed)" +# +# Even 255.255/16 is OK! +segmenttest 255.255.3.1 255.255.50.77 16 "assign and ping inside 255.255/16 (is allowed)" +# +# Or 255.255.255/24 +segmenttest 255.255.255.1 255.255.255.254 24 "assign and ping inside 255.255.255/24 (is allowed)" +# +# Routing between different networks +route_test 240.5.6.7 240.5.6.1 255.1.2.1 255.1.2.3 24 "route between 240.5.6/24 and 255.1.2/24 (is allowed)" +route_test 0.200.6.7 0.200.38.1 245.99.101.1 245.99.200.111 16 "route between 0.200/16 and 245.99/16 (is allowed)" +# +# ============================================== +# ==== TESTS THAT CURRENTLY EXPECT FAILURE ===== +# ============================================== +expect_failure=true +# It should still not be possible to use 0.0.0.0 or 255.255.255.255 +# as a unicast address. Thus, these tests expect failure. +segmenttest 0.0.1.5 0.0.0.0 16 "assigning 0.0.0.0 (is forbidden)" +segmenttest 255.255.255.1 255.255.255.255 16 "assigning 255.255.255.255 (is forbidden)" +# +# Test support for not having all of 127 be loopback +# Currently Linux does not allow this, so this should fail too +segmenttest 127.99.4.5 127.99.4.6 16 "assign and ping inside 127/8 (is forbidden)" +# +# Test support for lowest address +# Currently Linux does not allow this, so this should fail too +segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (is forbidden)" +# +# Routing using lowest address as a gateway/endpoint +# Currently Linux does not allow this, so this should fail too +route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address (is forbidden)" +# +# Test support for unicast use of class D +# Currently Linux does not allow this, so this should fail too +segmenttest 225.1.2.3 225.1.2.200 24 "assign and ping class D address (is forbidden)" +# +# Routing using class D as a gateway +route_test 225.1.42.1 225.1.42.2 9.8.7.6 9.8.7.1 24 "routing using class D (is forbidden)" +# +# Routing using 127/8 +# Currently Linux does not allow this, so this should fail too +route_test 127.99.2.3 127.99.2.4 200.1.2.3 200.1.2.4 24 "routing using 127/8 (is forbidden)" +# +unset expect_failure +# ===================================================== +# ==== END OF TESTS THAT CURRENTLY EXPECT FAILURE ===== +# ===================================================== +exit ${result} -- GitLab From b491e6a7391e3ecdebdd7a097550195cc878924a Mon Sep 17 00:00:00 2001 From: Xie He Date: Mon, 25 Jan 2021 20:09:39 -0800 Subject: [PATCH 1535/2012] net: lapb: Add locking to the lapb module In the lapb module, the timers may run concurrently with other code in this module, and there is currently no locking to prevent the code from racing on "struct lapb_cb". This patch adds locking to prevent racing. 1. Add "spinlock_t lock" to "struct lapb_cb"; Add "spin_lock_bh" and "spin_unlock_bh" to APIs, timer functions and notifier functions. 2. Add "bool t1timer_stop, t2timer_stop" to "struct lapb_cb" to make us able to ask running timers to abort; Modify "lapb_stop_t1timer" and "lapb_stop_t2timer" to make them able to abort running timers; Modify "lapb_t2timer_expiry" and "lapb_t1timer_expiry" to make them abort after they are stopped by "lapb_stop_t1timer", "lapb_stop_t2timer", and "lapb_start_t1timer", "lapb_start_t2timer". 3. Let lapb_unregister wait for other API functions and running timers to stop. 4. The lapb_device_event function calls lapb_disconnect_request. In order to avoid trying to hold the lock twice, add a new function named "__lapb_disconnect_request" which assumes the lock is held, and make it called by lapb_disconnect_request and lapb_device_event. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: Martin Schiller Signed-off-by: Xie He Link: https://lore.kernel.org/r/20210126040939.69995-1-xie.he.0141@gmail.com Signed-off-by: Jakub Kicinski --- include/net/lapb.h | 2 ++ net/lapb/lapb_iface.c | 70 +++++++++++++++++++++++++++++++++---------- net/lapb/lapb_timer.c | 30 ++++++++++++++++--- 3 files changed, 82 insertions(+), 20 deletions(-) diff --git a/include/net/lapb.h b/include/net/lapb.h index ccc3d1f020b0c..eee73442a1ba1 100644 --- a/include/net/lapb.h +++ b/include/net/lapb.h @@ -92,6 +92,7 @@ struct lapb_cb { unsigned short n2, n2count; unsigned short t1, t2; struct timer_list t1timer, t2timer; + bool t1timer_stop, t2timer_stop; /* Internal control information */ struct sk_buff_head write_queue; @@ -103,6 +104,7 @@ struct lapb_cb { struct lapb_frame frmr_data; unsigned char frmr_type; + spinlock_t lock; refcount_t refcnt; }; diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index 40961889e9c01..0511bbe4af7b4 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -122,6 +122,8 @@ static struct lapb_cb *lapb_create_cb(void) timer_setup(&lapb->t1timer, NULL, 0); timer_setup(&lapb->t2timer, NULL, 0); + lapb->t1timer_stop = true; + lapb->t2timer_stop = true; lapb->t1 = LAPB_DEFAULT_T1; lapb->t2 = LAPB_DEFAULT_T2; @@ -129,6 +131,8 @@ static struct lapb_cb *lapb_create_cb(void) lapb->mode = LAPB_DEFAULT_MODE; lapb->window = LAPB_DEFAULT_WINDOW; lapb->state = LAPB_STATE_0; + + spin_lock_init(&lapb->lock); refcount_set(&lapb->refcnt, 1); out: return lapb; @@ -178,11 +182,23 @@ int lapb_unregister(struct net_device *dev) goto out; lapb_put(lapb); + /* Wait for other refs to "lapb" to drop */ + while (refcount_read(&lapb->refcnt) > 2) + usleep_range(1, 10); + + spin_lock_bh(&lapb->lock); + lapb_stop_t1timer(lapb); lapb_stop_t2timer(lapb); lapb_clear_queues(lapb); + spin_unlock_bh(&lapb->lock); + + /* Wait for running timers to stop */ + del_timer_sync(&lapb->t1timer); + del_timer_sync(&lapb->t2timer); + __lapb_remove_cb(lapb); lapb_put(lapb); @@ -201,6 +217,8 @@ int lapb_getparms(struct net_device *dev, struct lapb_parms_struct *parms) if (!lapb) goto out; + spin_lock_bh(&lapb->lock); + parms->t1 = lapb->t1 / HZ; parms->t2 = lapb->t2 / HZ; parms->n2 = lapb->n2; @@ -219,6 +237,7 @@ int lapb_getparms(struct net_device *dev, struct lapb_parms_struct *parms) else parms->t2timer = (lapb->t2timer.expires - jiffies) / HZ; + spin_unlock_bh(&lapb->lock); lapb_put(lapb); rc = LAPB_OK; out: @@ -234,6 +253,8 @@ int lapb_setparms(struct net_device *dev, struct lapb_parms_struct *parms) if (!lapb) goto out; + spin_lock_bh(&lapb->lock); + rc = LAPB_INVALUE; if (parms->t1 < 1 || parms->t2 < 1 || parms->n2 < 1) goto out_put; @@ -256,6 +277,7 @@ int lapb_setparms(struct net_device *dev, struct lapb_parms_struct *parms) rc = LAPB_OK; out_put: + spin_unlock_bh(&lapb->lock); lapb_put(lapb); out: return rc; @@ -270,6 +292,8 @@ int lapb_connect_request(struct net_device *dev) if (!lapb) goto out; + spin_lock_bh(&lapb->lock); + rc = LAPB_OK; if (lapb->state == LAPB_STATE_1) goto out_put; @@ -285,24 +309,18 @@ int lapb_connect_request(struct net_device *dev) rc = LAPB_OK; out_put: + spin_unlock_bh(&lapb->lock); lapb_put(lapb); out: return rc; } EXPORT_SYMBOL(lapb_connect_request); -int lapb_disconnect_request(struct net_device *dev) +static int __lapb_disconnect_request(struct lapb_cb *lapb) { - struct lapb_cb *lapb = lapb_devtostruct(dev); - int rc = LAPB_BADTOKEN; - - if (!lapb) - goto out; - switch (lapb->state) { case LAPB_STATE_0: - rc = LAPB_NOTCONNECTED; - goto out_put; + return LAPB_NOTCONNECTED; case LAPB_STATE_1: lapb_dbg(1, "(%p) S1 TX DISC(1)\n", lapb->dev); @@ -310,12 +328,10 @@ int lapb_disconnect_request(struct net_device *dev) lapb_send_control(lapb, LAPB_DISC, LAPB_POLLON, LAPB_COMMAND); lapb->state = LAPB_STATE_0; lapb_start_t1timer(lapb); - rc = LAPB_NOTCONNECTED; - goto out_put; + return LAPB_NOTCONNECTED; case LAPB_STATE_2: - rc = LAPB_OK; - goto out_put; + return LAPB_OK; } lapb_clear_queues(lapb); @@ -328,8 +344,22 @@ int lapb_disconnect_request(struct net_device *dev) lapb_dbg(1, "(%p) S3 DISC(1)\n", lapb->dev); lapb_dbg(0, "(%p) S3 -> S2\n", lapb->dev); - rc = LAPB_OK; -out_put: + return LAPB_OK; +} + +int lapb_disconnect_request(struct net_device *dev) +{ + struct lapb_cb *lapb = lapb_devtostruct(dev); + int rc = LAPB_BADTOKEN; + + if (!lapb) + goto out; + + spin_lock_bh(&lapb->lock); + + rc = __lapb_disconnect_request(lapb); + + spin_unlock_bh(&lapb->lock); lapb_put(lapb); out: return rc; @@ -344,6 +374,8 @@ int lapb_data_request(struct net_device *dev, struct sk_buff *skb) if (!lapb) goto out; + spin_lock_bh(&lapb->lock); + rc = LAPB_NOTCONNECTED; if (lapb->state != LAPB_STATE_3 && lapb->state != LAPB_STATE_4) goto out_put; @@ -352,6 +384,7 @@ int lapb_data_request(struct net_device *dev, struct sk_buff *skb) lapb_kick(lapb); rc = LAPB_OK; out_put: + spin_unlock_bh(&lapb->lock); lapb_put(lapb); out: return rc; @@ -364,7 +397,9 @@ int lapb_data_received(struct net_device *dev, struct sk_buff *skb) int rc = LAPB_BADTOKEN; if (lapb) { + spin_lock_bh(&lapb->lock); lapb_data_input(lapb, skb); + spin_unlock_bh(&lapb->lock); lapb_put(lapb); rc = LAPB_OK; } @@ -435,6 +470,8 @@ static int lapb_device_event(struct notifier_block *this, unsigned long event, if (!lapb) return NOTIFY_DONE; + spin_lock_bh(&lapb->lock); + switch (event) { case NETDEV_UP: lapb_dbg(0, "(%p) Interface up: %s\n", dev, dev->name); @@ -454,7 +491,7 @@ static int lapb_device_event(struct notifier_block *this, unsigned long event, break; case NETDEV_GOING_DOWN: if (netif_carrier_ok(dev)) - lapb_disconnect_request(dev); + __lapb_disconnect_request(lapb); break; case NETDEV_DOWN: lapb_dbg(0, "(%p) Interface down: %s\n", dev, dev->name); @@ -489,6 +526,7 @@ static int lapb_device_event(struct notifier_block *this, unsigned long event, break; } + spin_unlock_bh(&lapb->lock); lapb_put(lapb); return NOTIFY_DONE; } diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c index baa247fe4ed05..0230b272b7d1d 100644 --- a/net/lapb/lapb_timer.c +++ b/net/lapb/lapb_timer.c @@ -40,6 +40,7 @@ void lapb_start_t1timer(struct lapb_cb *lapb) lapb->t1timer.function = lapb_t1timer_expiry; lapb->t1timer.expires = jiffies + lapb->t1; + lapb->t1timer_stop = false; add_timer(&lapb->t1timer); } @@ -50,16 +51,19 @@ void lapb_start_t2timer(struct lapb_cb *lapb) lapb->t2timer.function = lapb_t2timer_expiry; lapb->t2timer.expires = jiffies + lapb->t2; + lapb->t2timer_stop = false; add_timer(&lapb->t2timer); } void lapb_stop_t1timer(struct lapb_cb *lapb) { + lapb->t1timer_stop = true; del_timer(&lapb->t1timer); } void lapb_stop_t2timer(struct lapb_cb *lapb) { + lapb->t2timer_stop = true; del_timer(&lapb->t2timer); } @@ -72,16 +76,31 @@ static void lapb_t2timer_expiry(struct timer_list *t) { struct lapb_cb *lapb = from_timer(lapb, t, t2timer); + spin_lock_bh(&lapb->lock); + if (timer_pending(&lapb->t2timer)) /* A new timer has been set up */ + goto out; + if (lapb->t2timer_stop) /* The timer has been stopped */ + goto out; + if (lapb->condition & LAPB_ACK_PENDING_CONDITION) { lapb->condition &= ~LAPB_ACK_PENDING_CONDITION; lapb_timeout_response(lapb); } + +out: + spin_unlock_bh(&lapb->lock); } static void lapb_t1timer_expiry(struct timer_list *t) { struct lapb_cb *lapb = from_timer(lapb, t, t1timer); + spin_lock_bh(&lapb->lock); + if (timer_pending(&lapb->t1timer)) /* A new timer has been set up */ + goto out; + if (lapb->t1timer_stop) /* The timer has been stopped */ + goto out; + switch (lapb->state) { /* @@ -108,7 +127,7 @@ static void lapb_t1timer_expiry(struct timer_list *t) lapb->state = LAPB_STATE_0; lapb_disconnect_indication(lapb, LAPB_TIMEDOUT); lapb_dbg(0, "(%p) S1 -> S0\n", lapb->dev); - return; + goto out; } else { lapb->n2count++; if (lapb->mode & LAPB_EXTENDED) { @@ -132,7 +151,7 @@ static void lapb_t1timer_expiry(struct timer_list *t) lapb->state = LAPB_STATE_0; lapb_disconnect_confirmation(lapb, LAPB_TIMEDOUT); lapb_dbg(0, "(%p) S2 -> S0\n", lapb->dev); - return; + goto out; } else { lapb->n2count++; lapb_dbg(1, "(%p) S2 TX DISC(1)\n", lapb->dev); @@ -150,7 +169,7 @@ static void lapb_t1timer_expiry(struct timer_list *t) lapb_stop_t2timer(lapb); lapb_disconnect_indication(lapb, LAPB_TIMEDOUT); lapb_dbg(0, "(%p) S3 -> S0\n", lapb->dev); - return; + goto out; } else { lapb->n2count++; lapb_requeue_frames(lapb); @@ -167,7 +186,7 @@ static void lapb_t1timer_expiry(struct timer_list *t) lapb->state = LAPB_STATE_0; lapb_disconnect_indication(lapb, LAPB_TIMEDOUT); lapb_dbg(0, "(%p) S4 -> S0\n", lapb->dev); - return; + goto out; } else { lapb->n2count++; lapb_transmit_frmr(lapb); @@ -176,4 +195,7 @@ static void lapb_t1timer_expiry(struct timer_list *t) } lapb_start_t1timer(lapb); + +out: + spin_unlock_bh(&lapb->lock); } -- GitLab From 67c9ed1c8809d554082a30a1b38b460a6e8405c0 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 25 Jan 2021 16:04:48 +0100 Subject: [PATCH 1536/2012] net: dsa: mv88e6xxx: use mv88e6185_g1_vtu_getnext() for the 6250 mv88e6250_g1_vtu_getnext is almost identical to mv88e6185_g1_vtu_getnext, except for the 6250 only having 64 databases instead of 256. We can reduce code duplication by simply masking off the extra two garbage bits when assembling the fid from VTU op [3:0] and [11:8]. Reviewed-by: Tobias Waldekranz Tested-by: Tobias Waldekranz Reviewed-by: Florian Fainelli Signed-off-by: Rasmus Villemoes Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- drivers/net/dsa/mv88e6xxx/global1.h | 2 -- drivers/net/dsa/mv88e6xxx/global1_vtu.c | 36 ++----------------------- 3 files changed, 3 insertions(+), 37 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 2f976050a0d71..a579518d73332 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -4049,7 +4049,7 @@ static const struct mv88e6xxx_ops mv88e6250_ops = { .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6250_g1_reset, - .vtu_getnext = mv88e6250_g1_vtu_getnext, + .vtu_getnext = mv88e6185_g1_vtu_getnext, .vtu_loadpurge = mv88e6250_g1_vtu_loadpurge, .avb_ops = &mv88e6352_avb_ops, .ptp_ops = &mv88e6250_ptp_ops, diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h index 80a182c5b98af..d2dd2f4e4730d 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.h +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -336,8 +336,6 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry); int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry); -int mv88e6250_g1_vtu_getnext(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry); int mv88e6250_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry); int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip, diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c index 7b96396be609e..519ae48ba96e9 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c @@ -336,39 +336,6 @@ int mv88e6xxx_g1_vtu_getnext(struct mv88e6xxx_chip *chip, return mv88e6xxx_g1_vtu_vid_read(chip, entry); } -int mv88e6250_g1_vtu_getnext(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) -{ - u16 val; - int err; - - err = mv88e6xxx_g1_vtu_getnext(chip, entry); - if (err) - return err; - - if (entry->valid) { - err = mv88e6185_g1_vtu_data_read(chip, entry); - if (err) - return err; - - err = mv88e6185_g1_stu_data_read(chip, entry); - if (err) - return err; - - /* VTU DBNum[3:0] are located in VTU Operation 3:0 - * VTU DBNum[5:4] are located in VTU Operation 9:8 - */ - err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_OP, &val); - if (err) - return err; - - entry->fid = val & 0x000f; - entry->fid |= (val & 0x0300) >> 4; - } - - return 0; -} - int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry) { @@ -389,7 +356,7 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip, return err; /* VTU DBNum[3:0] are located in VTU Operation 3:0 - * VTU DBNum[7:4] are located in VTU Operation 11:8 + * VTU DBNum[7:4] ([5:4] for 6250) are located in VTU Operation 11:8 (9:8) */ err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_OP, &val); if (err) @@ -397,6 +364,7 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip, entry->fid = val & 0x000f; entry->fid |= (val & 0x0f00) >> 4; + entry->fid &= mv88e6xxx_num_databases(chip) - 1; } return 0; -- GitLab From b28f3f3c3f30552285f4a420acb7afba2322d668 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 25 Jan 2021 16:04:49 +0100 Subject: [PATCH 1537/2012] net: dsa: mv88e6xxx: use mv88e6185_g1_vtu_loadpurge() for the 6250 Apart from the mask used to get the high bits of the fid, mv88e6185_g1_vtu_loadpurge() and mv88e6250_g1_vtu_loadpurge() are identical. Since the entry->fid passed in should never exceed the number of databases, we can simply use the former as-is as replacement for the latter. Suggested-by: Tobias Waldekranz Signed-off-by: Rasmus Villemoes Reviewed-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 2 +- drivers/net/dsa/mv88e6xxx/global1.h | 2 -- drivers/net/dsa/mv88e6xxx/global1_vtu.c | 33 +++---------------------- 3 files changed, 5 insertions(+), 32 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index a579518d73332..5143649479442 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -4050,7 +4050,7 @@ static const struct mv88e6xxx_ops mv88e6250_ops = { .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6250_g1_reset, .vtu_getnext = mv88e6185_g1_vtu_getnext, - .vtu_loadpurge = mv88e6250_g1_vtu_loadpurge, + .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, .avb_ops = &mv88e6352_avb_ops, .ptp_ops = &mv88e6250_ptp_ops, .phylink_validate = mv88e6065_phylink_validate, diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h index d2dd2f4e4730d..7c396964d0b26 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.h +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -336,8 +336,6 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry); int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry); -int mv88e6250_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry); int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry); int mv88e6352_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c index 519ae48ba96e9..ae12c981923e8 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c @@ -434,35 +434,6 @@ int mv88e6390_g1_vtu_getnext(struct mv88e6xxx_chip *chip, return 0; } -int mv88e6250_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) -{ - u16 op = MV88E6XXX_G1_VTU_OP_VTU_LOAD_PURGE; - int err; - - err = mv88e6xxx_g1_vtu_op_wait(chip); - if (err) - return err; - - err = mv88e6xxx_g1_vtu_vid_write(chip, entry); - if (err) - return err; - - if (entry->valid) { - err = mv88e6185_g1_vtu_data_write(chip, entry); - if (err) - return err; - - /* VTU DBNum[3:0] are located in VTU Operation 3:0 - * VTU DBNum[5:4] are located in VTU Operation 9:8 - */ - op |= entry->fid & 0x000f; - op |= (entry->fid & 0x0030) << 4; - } - - return mv88e6xxx_g1_vtu_op(chip, op); -} - int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry) { @@ -484,6 +455,10 @@ int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, /* VTU DBNum[3:0] are located in VTU Operation 3:0 * VTU DBNum[7:4] are located in VTU Operation 11:8 + * + * For the 6250/6220, the latter are really [5:4] and + * 9:8, but in those cases bits 7:6 of entry->fid are + * 0 since they have num_databases = 64. */ op |= entry->fid & 0x000f; op |= (entry->fid & 0x00f0) << 4; -- GitLab From 90a586b8d741b4b8a1368f6a2ccf858e1a0fe23c Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sat, 23 Jan 2021 18:32:20 +0100 Subject: [PATCH 1538/2012] net: usbnet: initialize tasklet using tasklet_init Initialize tasklet using tasklet_init() rather than open-coding it. Signed-off-by: Emil Renner Berthing Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 305c5f7b9a9ba..0fa31f1e9f7e9 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1673,8 +1673,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) skb_queue_head_init (&dev->txq); skb_queue_head_init (&dev->done); skb_queue_head_init(&dev->rxq_pause); - dev->bh.func = usbnet_bh_tasklet; - dev->bh.data = (unsigned long)&dev->delay; + tasklet_init(&dev->bh, usbnet_bh_tasklet, (unsigned long)&dev->delay); INIT_WORK (&dev->kevent, usbnet_deferred_kevent); init_usb_anchor(&dev->deferred); timer_setup(&dev->delay, usbnet_bh, 0); -- GitLab From c955e329bb9d44fab75cf2116542fcc0de0473c5 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sat, 23 Jan 2021 18:32:21 +0100 Subject: [PATCH 1539/2012] net: usbnet: use new tasklet API This converts the driver to use the new tasklet API introduced in commit 12cc923f1ccc ("tasklet: Introduce new initialization API") Signed-off-by: Emil Renner Berthing Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 0fa31f1e9f7e9..b4c8080e6f87a 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1539,11 +1539,11 @@ static void usbnet_bh (struct timer_list *t) } } -static void usbnet_bh_tasklet(unsigned long data) +static void usbnet_bh_tasklet(struct tasklet_struct *t) { - struct timer_list *t = (struct timer_list *)data; + struct usbnet *dev = from_tasklet(dev, t, bh); - usbnet_bh(t); + usbnet_bh(&dev->delay); } @@ -1673,7 +1673,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) skb_queue_head_init (&dev->txq); skb_queue_head_init (&dev->done); skb_queue_head_init(&dev->rxq_pause); - tasklet_init(&dev->bh, usbnet_bh_tasklet, (unsigned long)&dev->delay); + tasklet_setup(&dev->bh, usbnet_bh_tasklet); INIT_WORK (&dev->kevent, usbnet_deferred_kevent); init_usb_anchor(&dev->deferred); timer_setup(&dev->delay, usbnet_bh, 0); -- GitLab From c730ab423bfa1ae99b688a9abdedf74477d44955 Mon Sep 17 00:00:00 2001 From: Laurent Badel Date: Mon, 25 Jan 2021 11:07:45 +0100 Subject: [PATCH 1540/2012] net: fec: Fix temporary RMII clock reset on link up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fec_restart() does a hard reset of the MAC module when the link status changes to up. This temporarily resets the R_CNTRL register which controls the MII mode of the ENET_OUT clock. In the case of RMII, the clock frequency momentarily drops from 50MHz to 25MHz until the register is reconfigured. Some link partners do not tolerate this glitch and invalidate the link causing failure to establish a stable link when using PHY polling mode. Since as per IEEE802.3 the criteria for link validity are PHY-specific, what the partner should tolerate cannot be assumed, so avoid resetting the MII clock by using software reset instead of hardware reset when the link is up. This is generally relevant only if the SoC provides the clock to an external PHY and the PHY is configured for RMII. Signed-off-by: Laurent Badel Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec.h | 5 +++++ drivers/net/ethernet/freescale/fec_main.c | 6 ++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index c527f4ee1d3ae..0602d5d5d2eee 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -462,6 +462,11 @@ struct bufdesc_ex { */ #define FEC_QUIRK_CLEAR_SETUP_MII (1 << 17) +/* Some link partners do not tolerate the momentary reset of the REF_CLK + * frequency when the RNCTL register is cleared by hardware reset. + */ +#define FEC_QUIRK_NO_HARD_RESET (1 << 18) + struct bufdesc_prop { int qid; /* Address of Rx and Tx buffers */ diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 55c28fbc5f9ea..9ebdb0e54291b 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -100,7 +100,8 @@ static const struct fec_devinfo fec_imx27_info = { static const struct fec_devinfo fec_imx28_info = { .quirks = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME | FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC | - FEC_QUIRK_HAS_FRREG | FEC_QUIRK_CLEAR_SETUP_MII, + FEC_QUIRK_HAS_FRREG | FEC_QUIRK_CLEAR_SETUP_MII | + FEC_QUIRK_NO_HARD_RESET, }; static const struct fec_devinfo fec_imx6q_info = { @@ -953,7 +954,8 @@ fec_restart(struct net_device *ndev) * For i.MX6SX SOC, enet use AXI bus, we use disable MAC * instead of reset MAC itself. */ - if (fep->quirks & FEC_QUIRK_HAS_AVB) { + if (fep->quirks & FEC_QUIRK_HAS_AVB || + ((fep->quirks & FEC_QUIRK_NO_HARD_RESET) && fep->link)) { writel(0, fep->hwp + FEC_ECNTRL); } else { writel(1, fep->hwp + FEC_ECNTRL); -- GitLab From 907d1df30a51cc1a1d25414a00cde0494b83df7b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 26 Jan 2021 23:35:10 +0000 Subject: [PATCH 1541/2012] io_uring: fix wqe->lock/completion_lock deadlock Joseph reports following deadlock: CPU0: ... io_kill_linked_timeout // &ctx->completion_lock io_commit_cqring __io_queue_deferred __io_queue_async_work io_wq_enqueue io_wqe_enqueue // &wqe->lock CPU1: ... __io_uring_files_cancel io_wq_cancel_cb io_wqe_cancel_pending_work // &wqe->lock io_cancel_task_cb // &ctx->completion_lock Only __io_queue_deferred() calls queue_async_work() while holding ctx->completion_lock, enqueue drained requests via io_req_task_queue() instead. Cc: stable@vger.kernel.org # 5.9+ Reported-by: Joseph Qi Tested-by: Joseph Qi Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index bb0270eeb8cb6..c218deaf73a9f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1026,6 +1026,7 @@ static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, const struct iovec *fast_iov, struct iov_iter *iter, bool force); static void io_req_drop_files(struct io_kiocb *req); +static void io_req_task_queue(struct io_kiocb *req); static struct kmem_cache *req_cachep; @@ -1634,18 +1635,11 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx) do { struct io_defer_entry *de = list_first_entry(&ctx->defer_list, struct io_defer_entry, list); - struct io_kiocb *link; if (req_need_defer(de->req, de->seq)) break; list_del_init(&de->list); - /* punt-init is done before queueing for defer */ - link = __io_queue_async_work(de->req); - if (link) { - __io_queue_linked_timeout(link); - /* drop submission reference */ - io_put_req_deferred(link, 1); - } + io_req_task_queue(de->req); kfree(de); } while (!list_empty(&ctx->defer_list)); } -- GitLab From 6b2e04bc240fe9be9e690059f710e9f95346d34d Mon Sep 17 00:00:00 2001 From: Praveen Chaudhary Date: Mon, 25 Jan 2021 13:44:30 -0800 Subject: [PATCH 1542/2012] net: allow user to set metric on default route learned via Router Advertisement For IPv4, default route is learned via DHCPv4 and user is allowed to change metric using config etc/network/interfaces. But for IPv6, default route can be learned via RA, for which, currently a fixed metric value 1024 is used. Ideally, user should be able to configure metric on default route for IPv6 similar to IPv4. This patch adds sysctl for the same. Logs: For IPv4: Config in etc/network/interfaces: auto eth0 iface eth0 inet dhcp metric 4261413864 IPv4 Kernel Route Table: $ ip route list default via 172.21.47.1 dev eth0 metric 4261413864 FRR Table, if a static route is configured: [In real scenario, it is useful to prefer BGP learned default route over DHCPv4 default route.] Codes: K - kernel route, C - connected, S - static, R - RIP, O - OSPF, I - IS-IS, B - BGP, P - PIM, E - EIGRP, N - NHRP, T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP, > - selected route, * - FIB route S>* 0.0.0.0/0 [20/0] is directly connected, eth0, 00:00:03 K 0.0.0.0/0 [254/1000] via 172.21.47.1, eth0, 6d08h51m i.e. User can prefer Default Router learned via Routing Protocol in IPv4. Similar behavior is not possible for IPv6, without this fix. After fix [for IPv6]: sudo sysctl -w net.ipv6.conf.eth0.net.ipv6.conf.eth0.ra_defrtr_metric=1996489705 IP monitor: [When IPv6 RA is received] default via fe80::xx16:xxxx:feb3:ce8e dev eth0 proto ra metric 1996489705 pref high Kernel IPv6 routing table $ ip -6 route list default via fe80::be16:65ff:feb3:ce8e dev eth0 proto ra metric 1996489705 expires 21sec hoplimit 64 pref high FRR Table, if a static route is configured: [In real scenario, it is useful to prefer BGP learned default route over IPv6 RA default route.] Codes: K - kernel route, C - connected, S - static, R - RIPng, O - OSPFv3, I - IS-IS, B - BGP, N - NHRP, T - Table, v - VNC, V - VNC-Direct, A - Babel, D - SHARP, > - selected route, * - FIB route S>* ::/0 [20/0] is directly connected, eth0, 00:00:06 K ::/0 [119/1001] via fe80::xx16:xxxx:feb3:ce8e, eth0, 6d07h43m If the metric is changed later, the effect will be seen only when next IPv6 RA is received, because the default route must be fully controlled by RA msg. Below metric is changed from 1996489705 to 1996489704. $ sudo sysctl -w net.ipv6.conf.eth0.ra_defrtr_metric=1996489704 net.ipv6.conf.eth0.ra_defrtr_metric = 1996489704 IP monitor: [On next IPv6 RA msg, Kernel deletes prev route and installs new route with updated metric] Deleted default via fe80::xx16:xxxx:feb3:ce8e dev eth0 proto ra metric 1996489705 expires 3sec hoplimit 64 pref high default via fe80::xx16:xxxx:feb3:ce8e dev eth0 proto ra metric 1996489704 pref high Signed-off-by: Praveen Chaudhary Signed-off-by: Zhenggen Xu Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210125214430.24079-1-pchaudhary@linkedin.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 10 ++++++++++ include/linux/ipv6.h | 1 + include/net/ip6_route.h | 3 ++- include/uapi/linux/ipv6.h | 1 + include/uapi/linux/sysctl.h | 1 + net/ipv6/addrconf.c | 11 +++++++++++ net/ipv6/ndisc.c | 12 ++++++++---- net/ipv6/route.c | 5 +++-- 8 files changed, 37 insertions(+), 7 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index dd2b12a32b739..0e51ddd9a2f13 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1871,6 +1871,16 @@ accept_ra_defrtr - BOOLEAN - enabled if accept_ra is enabled. - disabled if accept_ra is disabled. +ra_defrtr_metric - UNSIGNED INTEGER + Route metric for default route learned in Router Advertisement. This value + will be assigned as metric for the default route learned via IPv6 Router + Advertisement. Takes affect only if accept_ra_defrtr is enabled. + + Possible values: + 1 to 0xFFFFFFFF + + Default: IP6_RT_PRIO_USER i.e. 1024. + accept_ra_from_local - BOOLEAN Accept RA with source-address that is found on local machine if the RA is otherwise proper and able to be accepted. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index dda61d150a138..9d1f29f0c5129 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -31,6 +31,7 @@ struct ipv6_devconf { __s32 max_desync_factor; __s32 max_addresses; __s32 accept_ra_defrtr; + __u32 ra_defrtr_metric; __s32 accept_ra_min_hop_limit; __s32 accept_ra_pinfo; __s32 ignore_routes_with_linkdown; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 2a5277758379e..f51a118bfce8b 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -174,7 +174,8 @@ struct fib6_info *rt6_get_dflt_router(struct net *net, struct net_device *dev); struct fib6_info *rt6_add_dflt_router(struct net *net, const struct in6_addr *gwaddr, - struct net_device *dev, unsigned int pref); + struct net_device *dev, unsigned int pref, + u32 defrtr_usr_metric); void rt6_purge_dflt_routers(struct net *net); diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 13e8751bf24a0..70603775fe91d 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -189,6 +189,7 @@ enum { DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN, DEVCONF_NDISC_TCLASS, DEVCONF_RPL_SEG_ENABLED, + DEVCONF_RA_DEFRTR_METRIC, DEVCONF_MAX }; diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 458179df9b271..1e05d3caa712f 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -571,6 +571,7 @@ enum { NET_IPV6_ACCEPT_SOURCE_ROUTE=25, NET_IPV6_ACCEPT_RA_FROM_LOCAL=26, NET_IPV6_ACCEPT_RA_RT_INFO_MIN_PLEN=27, + NET_IPV6_RA_DEFRTR_METRIC=28, __NET_IPV6_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9edc5bb2d531a..f2337fb756ac7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -205,6 +205,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, + .ra_defrtr_metric = IP6_RT_PRIO_USER, .accept_ra_from_local = 0, .accept_ra_min_hop_limit= 1, .accept_ra_pinfo = 1, @@ -260,6 +261,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, + .ra_defrtr_metric = IP6_RT_PRIO_USER, .accept_ra_from_local = 0, .accept_ra_min_hop_limit= 1, .accept_ra_pinfo = 1, @@ -5476,6 +5478,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; + array[DEVCONF_RA_DEFRTR_METRIC] = cnf->ra_defrtr_metric; array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit; array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; #ifdef CONFIG_IPV6_ROUTER_PREF @@ -6668,6 +6671,14 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "ra_defrtr_metric", + .data = &ipv6_devconf.ra_defrtr_metric, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = (void *)SYSCTL_ONE, + }, { .procname = "accept_ra_min_hop_limit", .data = &ipv6_devconf.accept_ra_min_hop_limit, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 76717478f1733..c467c6419893c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1173,6 +1173,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) struct neighbour *neigh = NULL; struct inet6_dev *in6_dev; struct fib6_info *rt = NULL; + u32 defrtr_usr_metric; struct net *net; int lifetime; struct ndisc_options ndopts; @@ -1303,18 +1304,21 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } } - if (rt && lifetime == 0) { + /* Set default route metric as specified by user */ + defrtr_usr_metric = in6_dev->cnf.ra_defrtr_metric; + /* delete the route if lifetime is 0 or if metric needs change */ + if (rt && (lifetime == 0 || rt->fib6_metric != defrtr_usr_metric)) { ip6_del_rt(net, rt, false); rt = NULL; } - ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, for dev: %s\n", - rt, lifetime, skb->dev->name); + ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, metric: %d, for dev: %s\n", + rt, lifetime, defrtr_usr_metric, skb->dev->name); if (!rt && lifetime) { ND_PRINTK(3, info, "RA: adding default router\n"); rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr, - skb->dev, pref); + skb->dev, pref, defrtr_usr_metric); if (!rt) { ND_PRINTK(0, err, "RA: %s failed to add default route\n", diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 188e114b29b4a..41d8f801b75fe 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4252,11 +4252,12 @@ struct fib6_info *rt6_get_dflt_router(struct net *net, struct fib6_info *rt6_add_dflt_router(struct net *net, const struct in6_addr *gwaddr, struct net_device *dev, - unsigned int pref) + unsigned int pref, + u32 defrtr_usr_metric) { struct fib6_config cfg = { .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT, - .fc_metric = IP6_RT_PRIO_USER, + .fc_metric = defrtr_usr_metric, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), -- GitLab From 5f46400f7a6a4fad635d5a79e2aa5a04a30ffea1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 26 Jan 2021 17:01:49 +0000 Subject: [PATCH 1543/2012] xen: Fix XenStore initialisation for XS_LOCAL In commit 3499ba8198ca ("xen: Fix event channel callback via INTX/GSI") I reworked the triggering of xenbus_probe(). I tried to simplify things by taking out the workqueue based startup triggered from wake_waiting(); the somewhat poorly named xenbus IRQ handler. I missed the fact that in the XS_LOCAL case (Dom0 starting its own xenstored or xenstore-stubdom, which happens after the kernel is booted completely), that IRQ-based trigger is still actually needed. So... put it back, except more cleanly. By just spawning a xenbus_probe thread which waits on xb_waitq and runs the probe the first time it gets woken, just as the workqueue-based hack did. This is actually a nicer approach for *all* the back ends with different interrupt methods, and we can switch them all over to that without the complex conditions for when to trigger it. But not in -rc6. This is the minimal fix for the regression, although it's a step in the right direction instead of doing a partial revert and actually putting the workqueue back. It's also simpler than the workqueue. Fixes: 3499ba8198ca ("xen: Fix event channel callback via INTX/GSI") Reported-by: Juergen Gross Signed-off-by: David Woodhouse Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/4c9af052a6e0f6485d1de43f2c38b1461996db99.camel@infradead.org Signed-off-by: Juergen Gross --- drivers/xen/xenbus/xenbus_probe.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index c8f0282bb6497..18ffd0551b542 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -714,6 +714,23 @@ static bool xs_hvm_defer_init_for_callback(void) #endif } +static int xenbus_probe_thread(void *unused) +{ + DEFINE_WAIT(w); + + /* + * We actually just want to wait for *any* trigger of xb_waitq, + * and run xenbus_probe() the moment it occurs. + */ + prepare_to_wait(&xb_waitq, &w, TASK_INTERRUPTIBLE); + schedule(); + finish_wait(&xb_waitq, &w); + + DPRINTK("probing"); + xenbus_probe(); + return 0; +} + static int __init xenbus_probe_initcall(void) { /* @@ -725,6 +742,20 @@ static int __init xenbus_probe_initcall(void) !xs_hvm_defer_init_for_callback())) xenbus_probe(); + /* + * For XS_LOCAL, spawn a thread which will wait for xenstored + * or a xenstore-stubdom to be started, then probe. It will be + * triggered when communication starts happening, by waiting + * on xb_waitq. + */ + if (xen_store_domain_type == XS_LOCAL) { + struct task_struct *probe_task; + + probe_task = kthread_run(xenbus_probe_thread, NULL, + "xenbus_probe"); + if (IS_ERR(probe_task)) + return PTR_ERR(probe_task); + } return 0; } device_initcall(xenbus_probe_initcall); -- GitLab From 211e5db19d15a721b2953ea54b8f26c2963720eb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 Jan 2021 18:02:11 +0100 Subject: [PATCH 1544/2012] rtc: mc146818: Detect and handle broken RTCs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recent fix for handling the UIP bit unearthed another issue in the RTC code. If the RTC is advertised but the readout is straight 0xFF because it's not available, the old code just proceeded with crappy values, but the new code hangs because it waits for the UIP bit to become low. Add a sanity check in the RTC CMOS probe function which reads the RTC_VALID register (Register D) which should have bit 0-6 cleared. If that's not the case then fail to register the CMOS. Add the same check to mc146818_get_time(), warn once when the condition is true and invalidate the rtc_time data. Reported-by: Mickaël Salaün Signed-off-by: Thomas Gleixner Tested-by: Mickaël Salaün Acked-by: Alexandre Belloni Link: https://lore.kernel.org/r/87tur3fx7w.fsf@nanos.tec.linutronix.de --- drivers/rtc/rtc-cmos.c | 8 ++++++++ drivers/rtc/rtc-mc146818-lib.c | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 51e80bc70d423..68a9ac6f2fe17 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -805,6 +805,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) spin_lock_irq(&rtc_lock); + /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */ + if ((CMOS_READ(RTC_VALID) & 0x7f) != 0) { + spin_unlock_irq(&rtc_lock); + dev_warn(dev, "not accessible\n"); + retval = -ENXIO; + goto cleanup1; + } + if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) { /* force periodic irq to CMOS reset default of 1024Hz; * diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 972a5b9a629d3..f83c13818af3b 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -21,6 +21,13 @@ unsigned int mc146818_get_time(struct rtc_time *time) again: spin_lock_irqsave(&rtc_lock, flags); + /* Ensure that the RTC is accessible. Bit 0-6 must be 0! */ + if (WARN_ON_ONCE((CMOS_READ(RTC_VALID) & 0x7f) != 0)) { + spin_unlock_irqrestore(&rtc_lock, flags); + memset(time, 0xff, sizeof(*time)); + return 0; + } + /* * Check whether there is an update in progress during which the * readout is unspecified. The maximum update time is ~2ms. Poll -- GitLab From 12da7a1f3cb6ec5c8a1256bbc17cc931f72f7329 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 27 Jan 2021 09:13:03 +0100 Subject: [PATCH 1545/2012] can: gw: fix typo This patch fixes a typo found by codespell. Fixes: 94c23097f991 ("can: gw: support modification of Classical CAN DLCs") Link: https://lore.kernel.org/r/20210127085529.2768537-3-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- net/can/gw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/can/gw.c b/net/can/gw.c index 8598d9da0e5fc..ba41248056029 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -225,7 +225,7 @@ static void mod_store_ccdlc(struct canfd_frame *cf) if (ccf->len <= CAN_MAX_DLEN) return; - /* potentially broken values are catched in can_can_gw_rcv() */ + /* potentially broken values are caught in can_can_gw_rcv() */ if (ccf->len > CAN_MAX_RAW_DLC) return; -- GitLab From 02ee6808179100b46345f3b4e6ecc083b9d08e9d Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 27 Jan 2021 09:13:03 +0100 Subject: [PATCH 1546/2012] can: flexcan: fix typos This patch fixes two typos found by codespell. Fixes: 812f0116c66a ("can: flexcan: add CAN wakeup function for i.MX8QM") Link: https://lore.kernel.org/r/20210127085529.2768537-2-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 5d9157c655e9a..971ada36e37fa 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1975,14 +1975,14 @@ static int flexcan_setup_stop_mode_scfw(struct platform_device *pdev) priv = netdev_priv(dev); priv->scu_idx = scu_idx; - /* this function could be defered probe, return -EPROBE_DEFER */ + /* this function could be deferred probe, return -EPROBE_DEFER */ return imx_scu_get_handle(&priv->sc_ipc_handle); } /* flexcan_setup_stop_mode - Setup stop mode for wakeup * * Return: = 0 setup stop mode successfully or doesn't support this feature - * < 0 fail to setup stop mode (could be defered probe) + * < 0 fail to setup stop mode (could be deferred probe) */ static int flexcan_setup_stop_mode(struct platform_device *pdev) { -- GitLab From 6fe27d68b45666381691b6a841a2adbda8c8d153 Mon Sep 17 00:00:00 2001 From: Vincent Mailhol Date: Wed, 20 Jan 2021 02:03:55 +0900 Subject: [PATCH 1547/2012] can: dev: export can_get_state_str() function The can_get_state_str() function is also relevant to the drivers. Export the symbol and make it visible in the can/dev.h header. Link: https://lore.kernel.org/r/20210119170355.12040-1-mailhol.vincent@wanadoo.fr Signed-off-by: Vincent Mailhol Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/dev.c | 3 ++- include/linux/can/dev.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 01e4a194f187e..d9281ae853f8e 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -74,7 +74,7 @@ static int can_rx_state_to_frame(struct net_device *dev, enum can_state state) } } -static const char *can_get_state_str(const enum can_state state) +const char *can_get_state_str(const enum can_state state) { switch (state) { case CAN_STATE_ERROR_ACTIVE: @@ -95,6 +95,7 @@ static const char *can_get_state_str(const enum can_state state) return ""; } +EXPORT_SYMBOL_GPL(can_get_state_str); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state) diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 7faf6a37d5b2b..ac4d83a1ab81c 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -123,6 +123,7 @@ void unregister_candev(struct net_device *dev); int can_restart_now(struct net_device *dev); void can_bus_off(struct net_device *dev); +const char *can_get_state_str(const enum can_state state); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state); -- GitLab From 54eca60b1c94fb1de3e59a936d428a291879de8a Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sun, 17 Jan 2021 23:06:28 +0100 Subject: [PATCH 1548/2012] can: length: can_fd_len2dlc(): make legnth calculation readable again In commit 652562e5ff06 ("can: length: can_fd_len2dlc(): simplify length calculcation") the readability of the code degraded and became more error prone. To counteract this, partially convert that patch and replace open coded values (of the original code) with proper defines. Fixes: 652562e5ff06 ("can: length: can_fd_len2dlc(): simplify length calculcation") Cc: Vincent MAILHOL Link: https://lore.kernel.org/r/20210118201346.79422-1-socketcan@hartkopp.net Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/length.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/dev/length.c b/drivers/net/can/dev/length.c index d35c4e82314d1..b48140b1102ea 100644 --- a/drivers/net/can/dev/length.c +++ b/drivers/net/can/dev/length.c @@ -27,12 +27,17 @@ static const u8 len2dlc[] = { 13, 13, 13, 13, 13, 13, 13, 13, /* 25 - 32 */ 14, 14, 14, 14, 14, 14, 14, 14, /* 33 - 40 */ 14, 14, 14, 14, 14, 14, 14, 14, /* 41 - 48 */ + 15, 15, 15, 15, 15, 15, 15, 15, /* 49 - 56 */ + 15, 15, 15, 15, 15, 15, 15, 15 /* 57 - 64 */ }; /* map the sanitized data length to an appropriate data length code */ u8 can_fd_len2dlc(u8 len) { - if (len >= ARRAY_SIZE(len2dlc)) + /* check for length mapping table size at build time */ + BUILD_BUG_ON(ARRAY_SIZE(len2dlc) != CANFD_MAX_DLEN + 1); + + if (unlikely(len > CANFD_MAX_DLEN)) return CANFD_MAX_DLC; return len2dlc[len]; -- GitLab From 22d63be91c5016207d0de25ed279707865f07e4f Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sun, 24 Jan 2021 07:09:16 -0800 Subject: [PATCH 1549/2012] can: mcba_usb: remove h from printk format specifier This change fixes the checkpatch warning described in this commit commit cbacb5ab0aa0 ("docs: printk-formats: Stop encouraging use of unnecessary %h[xudi] and %hh[xudi]") Standard integer promotion is already done and %hx and %hhx is useless so do not encourage the use of %hh[xudi] or %h[xudi]. Link: https://lore.kernel.org/r/20210124150916.1920434-1-trix@redhat.com Signed-off-by: Tom Rix Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/mcba_usb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 4232a7126c1bd..1f649d1780107 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -466,7 +466,7 @@ static void mcba_usb_process_ka_usb(struct mcba_priv *priv, struct mcba_usb_msg_ka_usb *msg) { if (unlikely(priv->usb_ka_first_pass)) { - netdev_info(priv->netdev, "PIC USB version %hhu.%hhu\n", + netdev_info(priv->netdev, "PIC USB version %u.%u\n", msg->soft_ver_major, msg->soft_ver_minor); priv->usb_ka_first_pass = false; @@ -492,7 +492,7 @@ static void mcba_usb_process_ka_can(struct mcba_priv *priv, struct mcba_usb_msg_ka_can *msg) { if (unlikely(priv->can_ka_first_pass)) { - netdev_info(priv->netdev, "PIC CAN version %hhu.%hhu\n", + netdev_info(priv->netdev, "PIC CAN version %u.%u\n", msg->soft_ver_major, msg->soft_ver_minor); priv->can_ka_first_pass = false; @@ -554,7 +554,7 @@ static void mcba_usb_process_rx(struct mcba_priv *priv, break; default: - netdev_warn(priv->netdev, "Unsupported msg (0x%hhX)", + netdev_warn(priv->netdev, "Unsupported msg (0x%X)", msg->cmd_id); break; } -- GitLab From cdc4c698e4be256634e722494dac0fa40e4137e2 Mon Sep 17 00:00:00 2001 From: Su Yanjun Date: Fri, 22 Jan 2021 16:13:34 +0800 Subject: [PATCH 1550/2012] can: mcp251xfd: replace sizeof(u32) with val_bytes in regmap The sizeof(u32) is hardcoded. It's better to use the config value from the regmap. It increases the size of target object, but it's flexible when new mcp chip need other val_bytes. Link: https://lore.kernel.org/r/20210122081334.213957-1-suyanjun218@gmail.com Signed-off-by: Su Yanjun Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 00e9855c23d1f..897c9310266a4 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1308,6 +1308,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv, const u8 offset, const u8 len) { const struct mcp251xfd_tx_ring *tx_ring = priv->tx; + const int val_bytes = regmap_get_val_bytes(priv->map_rx); if (IS_ENABLED(CONFIG_CAN_MCP251XFD_SANITY) && (offset > tx_ring->obj_num || @@ -1322,7 +1323,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv, return regmap_bulk_read(priv->map_rx, mcp251xfd_get_tef_obj_addr(offset), hw_tef_obj, - sizeof(*hw_tef_obj) / sizeof(u32) * len); + sizeof(*hw_tef_obj) / val_bytes * len); } static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) @@ -1510,12 +1511,13 @@ mcp251xfd_rx_obj_read(const struct mcp251xfd_priv *priv, struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj, const u8 offset, const u8 len) { + const int val_bytes = regmap_get_val_bytes(priv->map_rx); int err; err = regmap_bulk_read(priv->map_rx, mcp251xfd_get_rx_obj_addr(ring, offset), hw_rx_obj, - len * ring->obj_size / sizeof(u32)); + len * ring->obj_size / val_bytes); return err; } @@ -2137,6 +2139,7 @@ static int mcp251xfd_handle_spicrcif(struct mcp251xfd_priv *priv) static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) { struct mcp251xfd_priv *priv = dev_id; + const int val_bytes = regmap_get_val_bytes(priv->map_reg); irqreturn_t handled = IRQ_NONE; int err; @@ -2162,7 +2165,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) err = regmap_bulk_read(priv->map_reg, MCP251XFD_REG_INT, &priv->regs_status, sizeof(priv->regs_status) / - sizeof(u32)); + val_bytes); if (err) goto out_fail; -- GitLab From 9845b8f530196fd86fcc46c1c1298bf94b1604bf Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sun, 20 Dec 2020 14:02:29 +0100 Subject: [PATCH 1551/2012] can: mcp251xfd: mcp251xfd_start_xmit(): use mcp251xfd_get_tx_free() to check TX is is full This patch replaces an open coded check if the TX ring is full by a check if mcp251xfd_get_tx_free() returns 0. Link: https://lore.kernel.org/r/20210114153448.1506901-2-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 897c9310266a4..1dbb87c280496 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2436,7 +2436,7 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, /* Stop queue if we occupy the complete TX FIFO */ tx_head = mcp251xfd_get_tx_head(tx_ring); tx_ring->head++; - if (tx_ring->head - tx_ring->tail >= tx_ring->obj_num) + if (mcp251xfd_get_tx_free(tx_ring) == 0) netif_stop_queue(ndev); can_put_echo_skb(skb, ndev, tx_head, 0); -- GitLab From 561aa5b4ce223064ea655da899013bec5326ec45 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 21 Dec 2020 21:48:20 +0100 Subject: [PATCH 1552/2012] can: mcp251xfd: mcp251xfd_tx_obj_from_skb(): clean up padding of CAN-FD frames CAN-FD frames have only specific frame length (0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 16, 20, 24, 32, 48, 64). A CAN-FD frame provided by user space might not cover the whole CAN-FD frame. To avoid sending garbage over the CAN bus the driver pads the CAN frame with 0x0 (if MCP251XFD_SANITIZE_CAN is activated). This patch cleans up the pad len calculation. Rounding to full u32 brings no benefit, in case of CRC transfers, the hw_tx_obj->data is not aligned to u32 anyway. Link: https://lore.kernel.org/r/20210114153448.1506901-3-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 1dbb87c280496..aa992e71d7873 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2303,7 +2303,7 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv, union mcp251xfd_tx_obj_load_buf *load_buf; u8 dlc; u32 id, flags; - int offset, len; + int pad_len, len; if (cfd->can_id & CAN_EFF_FLAG) { u32 sid, eid; @@ -2351,13 +2351,14 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv, put_unaligned_le32(id, &hw_tx_obj->id); put_unaligned_le32(flags, &hw_tx_obj->flags); - /* Clear data at end of CAN frame */ - offset = round_down(cfd->len, sizeof(u32)); - len = round_up(can_fd_dlc2len(dlc), sizeof(u32)) - offset; - if (MCP251XFD_SANITIZE_CAN && len) - memset(hw_tx_obj->data + offset, 0x0, len); + /* Copy data */ memcpy(hw_tx_obj->data, cfd->data, cfd->len); + /* Clear unused data at end of CAN frame */ + pad_len = can_fd_dlc2len(dlc) - cfd->len; + if (MCP251XFD_SANITIZE_CAN && pad_len) + memset(hw_tx_obj->data + cfd->len, 0x0, pad_len); + /* Number of bytes to be written into the RAM of the controller */ len = sizeof(hw_tx_obj->id) + sizeof(hw_tx_obj->flags); if (MCP251XFD_SANITIZE_CAN) -- GitLab From e20b85c7eb2e91d9db166ac8b08eec61c0164e9b Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 21 Dec 2020 21:34:50 +0100 Subject: [PATCH 1553/2012] can: mcp251xfd: mcp251xfd_hw_rx_obj_to_skb(): don't copy data for RTR CAN frames in RX-path In Classical CAN there are RTR frames. RTR frames have the RTR bit set, may have a dlc != 0, but contain no data. This patch changes the RX-path to no copy any data for RTR frames, so that the data field in the CAN frame stays 0x0. Link: https://lore.kernel.org/r/20210114153448.1506901-4-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index aa992e71d7873..92816be4f3d44 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1474,7 +1474,8 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv, hw_rx_obj->flags)); } - memcpy(cfd->data, hw_rx_obj->data, cfd->len); + if (!(hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR)) + memcpy(cfd->data, hw_rx_obj->data, cfd->len); } static int -- GitLab From a68eda203676d7504dbf02f50366d81928ab45bf Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 21 Dec 2020 21:34:50 +0100 Subject: [PATCH 1554/2012] can: mcp251xfd: mcp251xfd_tx_obj_from_skb(): don't copy data for RTR CAN frames in TX-path In Classical CAN there are RTR frames. RTR frames have the RTR bit set, may have a dlc != 0, but contain no data. This patch optimizes the TX-path to not copy any data for RTR frames. Link: https://lore.kernel.org/r/20210114153448.1506901-5-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 92816be4f3d44..e6d98e172a47e 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2304,7 +2304,7 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv, union mcp251xfd_tx_obj_load_buf *load_buf; u8 dlc; u32 id, flags; - int pad_len, len; + int len_sanitized = 0, len; if (cfd->can_id & CAN_EFF_FLAG) { u32 sid, eid; @@ -2331,6 +2331,8 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv, if (cfd->can_id & CAN_RTR_FLAG) flags |= MCP251XFD_OBJ_FLAGS_RTR; + else + len_sanitized = canfd_sanitize_len(cfd->len); /* CANFD */ if (can_is_canfd_skb(skb)) { @@ -2356,14 +2358,18 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv, memcpy(hw_tx_obj->data, cfd->data, cfd->len); /* Clear unused data at end of CAN frame */ - pad_len = can_fd_dlc2len(dlc) - cfd->len; - if (MCP251XFD_SANITIZE_CAN && pad_len) - memset(hw_tx_obj->data + cfd->len, 0x0, pad_len); + if (MCP251XFD_SANITIZE_CAN && len_sanitized) { + int pad_len; + + pad_len = len_sanitized - cfd->len; + if (pad_len) + memset(hw_tx_obj->data + cfd->len, 0x0, pad_len); + } /* Number of bytes to be written into the RAM of the controller */ len = sizeof(hw_tx_obj->id) + sizeof(hw_tx_obj->flags); if (MCP251XFD_SANITIZE_CAN) - len += round_up(can_fd_dlc2len(dlc), sizeof(u32)); + len += round_up(len_sanitized, sizeof(u32)); else len += round_up(cfd->len, sizeof(u32)); -- GitLab From 86f1e3b1dd9f08408b12405059e2ab3cf9690066 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sun, 20 Dec 2020 18:47:51 +0100 Subject: [PATCH 1555/2012] can: mcp251xfd: add len8_dlc support This patch adds support for the Classical CAN raw DLC functionality to send and receive DLC values from 9 ... 15 to the mcp251xfd driver. Link: https://lore.kernel.org/r/20210114153448.1506901-6-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- .../net/can/spi/mcp251xfd/mcp251xfd-core.c | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index e6d98e172a47e..8f78a29db39b9 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1439,6 +1439,7 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv, struct sk_buff *skb) { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; + u8 dlc; if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_IDE) { u32 sid, eid; @@ -1454,9 +1455,10 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv, hw_rx_obj->id); } + dlc = FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC, hw_rx_obj->flags); + /* CANFD */ if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_FDF) { - u8 dlc; if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_ESI) cfd->flags |= CANFD_ESI; @@ -1464,14 +1466,13 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv, if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_BRS) cfd->flags |= CANFD_BRS; - dlc = FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC, hw_rx_obj->flags); cfd->len = can_fd_dlc2len(dlc); } else { if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR) cfd->can_id |= CAN_RTR_FLAG; - cfd->len = can_cc_dlc2len(FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC, - hw_rx_obj->flags)); + can_frame_set_cc_len((struct can_frame *)cfd, dlc, + priv->can.ctrlmode); } if (!(hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR)) @@ -2325,9 +2326,7 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv, * harm, only the lower 7 bits will be transferred into the * TEF object. */ - dlc = can_fd_len2dlc(cfd->len); - flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_SEQ_MCP2518FD_MASK, seq) | - FIELD_PREP(MCP251XFD_OBJ_FLAGS_DLC, dlc); + flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_SEQ_MCP2518FD_MASK, seq); if (cfd->can_id & CAN_RTR_FLAG) flags |= MCP251XFD_OBJ_FLAGS_RTR; @@ -2343,8 +2342,15 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv, if (cfd->flags & CANFD_BRS) flags |= MCP251XFD_OBJ_FLAGS_BRS; + + dlc = can_fd_len2dlc(cfd->len); + } else { + dlc = can_get_cc_dlc((struct can_frame *)cfd, + priv->can.ctrlmode); } + flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_DLC, dlc); + load_buf = &tx_obj->buf; if (priv->devtype_data.quirks & MCP251XFD_QUIRK_CRC_TX) hw_tx_obj = &load_buf->crc.hw_tx_obj; @@ -2896,7 +2902,8 @@ static int mcp251xfd_probe(struct spi_device *spi) priv->can.data_bittiming_const = &mcp251xfd_data_bittiming_const; priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_BERR_REPORTING | - CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO; + CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO | + CAN_CTRLMODE_CC_LEN8_DLC; priv->ndev = ndev; priv->spi = spi; priv->rx_int = rx_int; -- GitLab From 4162e18e949ba520d5116ac0323500355479a00e Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sun, 13 Dec 2020 17:25:15 +0100 Subject: [PATCH 1556/2012] can: mcp251xfd: add BQL support This patch adds BQL support to the driver. Support for netdev_xmit_more() will be added in a separate patch series. Link: https://lore.kernel.org/r/20210114153448.1506901-7-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- .../net/can/spi/mcp251xfd/mcp251xfd-core.c | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 8f78a29db39b9..3638b474d86b9 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -335,6 +335,8 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv) u8 len; int i, j; + netdev_reset_queue(priv->ndev); + /* TEF */ tef_ring = priv->tef; tef_ring->head = 0; @@ -1249,7 +1251,8 @@ mcp251xfd_handle_tefif_recover(const struct mcp251xfd_priv *priv, const u32 seq) static int mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv, - const struct mcp251xfd_hw_tef_obj *hw_tef_obj) + const struct mcp251xfd_hw_tef_obj *hw_tef_obj, + unsigned int *frame_len_ptr) { struct net_device_stats *stats = &priv->ndev->stats; u32 seq, seq_masked, tef_tail_masked; @@ -1271,7 +1274,8 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv, stats->tx_bytes += can_rx_offload_get_echo_skb(&priv->offload, mcp251xfd_get_tef_tail(priv), - hw_tef_obj->ts, NULL); + hw_tef_obj->ts, + frame_len_ptr); stats->tx_packets++; priv->tef->tail++; @@ -1329,6 +1333,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv, static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) { struct mcp251xfd_hw_tef_obj hw_tef_obj[MCP251XFD_TX_OBJ_NUM_MAX]; + unsigned int total_frame_len = 0; u8 tef_tail, len, l; int err, i; @@ -1350,7 +1355,9 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) } for (i = 0; i < len; i++) { - err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i]); + unsigned int frame_len; + + err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i], &frame_len); /* -EAGAIN means the Sequence Number in the TEF * doesn't match our tef_tail. This can happen if we * read the TEF objects too early. Leave loop let the @@ -1360,6 +1367,8 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) goto out_netif_wake_queue; if (err) return err; + + total_frame_len += frame_len; } out_netif_wake_queue: @@ -1390,6 +1399,7 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) return err; tx_ring->tail += len; + netdev_completed_queue(priv->ndev, len, total_frame_len); err = mcp251xfd_check_tef_tail(priv); if (err) @@ -2435,6 +2445,7 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, struct mcp251xfd_priv *priv = netdev_priv(ndev); struct mcp251xfd_tx_ring *tx_ring = priv->tx; struct mcp251xfd_tx_obj *tx_obj; + unsigned int frame_len; u8 tx_head; int err; @@ -2453,7 +2464,9 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, if (mcp251xfd_get_tx_free(tx_ring) == 0) netif_stop_queue(ndev); - can_put_echo_skb(skb, ndev, tx_head, 0); + frame_len = can_skb_get_frame_len(skb); + can_put_echo_skb(skb, ndev, tx_head, frame_len); + netdev_sent_queue(priv->ndev, frame_len); err = mcp251xfd_tx_obj_write(priv, tx_obj); if (err) -- GitLab From b552766c872f5b0d90323b24e4c9e8fa67486dd5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 Jan 2021 09:08:05 +0300 Subject: [PATCH 1557/2012] can: dev: prevent potential information leak in can_fill_info() The "bec" struct isn't necessarily always initialized. For example, the mcp251xfd_get_berr_counter() function doesn't initialize anything if the interface is down. Fixes: 52c793f24054 ("can: netlink support for bus-error reporting and counters") Link: https://lore.kernel.org/r/YAkaRdRJncsJO8Ve@mwanda Signed-off-by: Dan Carpenter Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 8b1ae023cb218..c73e2a65c9044 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -1163,7 +1163,7 @@ static int can_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); struct can_ctrlmode cm = {.flags = priv->ctrlmode}; - struct can_berr_counter bec; + struct can_berr_counter bec = { }; enum can_state state = priv->state; if (priv->do_get_state) -- GitLab From 87baa23e0236bc1c41ce744f524bf12dbe7fde66 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Mon, 11 Jan 2021 19:07:40 +0100 Subject: [PATCH 1558/2012] bus: mhi: core: Add helper API to return number of free TREs Introduce mhi_get_free_desc_count() API to return number of TREs available to queue buffer. MHI clients can use this API to know before hand if ring is full without calling queue API. Signed-off-by: Hemant Kumar Reviewed-by: Jeffrey Hugo Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/1610388462-16322-1-git-send-email-loic.poulain@linaro.org Signed-off-by: Manivannan Sadhasivam --- drivers/bus/mhi/core/main.c | 12 ++++++++++++ include/linux/mhi.h | 9 +++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index d34d7e90e38d9..1202433ecf981 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -260,6 +260,18 @@ int mhi_destroy_device(struct device *dev, void *data) return 0; } +int mhi_get_free_desc_count(struct mhi_device *mhi_dev, + enum dma_data_direction dir) +{ + struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl; + struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? + mhi_dev->ul_chan : mhi_dev->dl_chan; + struct mhi_ring *tre_ring = &mhi_chan->tre_ring; + + return get_nr_avail_ring_elements(mhi_cntrl, tre_ring); +} +EXPORT_SYMBOL_GPL(mhi_get_free_desc_count); + void mhi_notify(struct mhi_device *mhi_dev, enum mhi_callback cb_reason) { struct mhi_driver *mhi_drv; diff --git a/include/linux/mhi.h b/include/linux/mhi.h index 562862ff819c4..ece53a2522176 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -598,6 +598,15 @@ void mhi_set_mhi_state(struct mhi_controller *mhi_cntrl, */ void mhi_notify(struct mhi_device *mhi_dev, enum mhi_callback cb_reason); +/** + * mhi_get_free_desc_count - Get transfer ring length + * Get # of TD available to queue buffers + * @mhi_dev: Device associated with the channels + * @dir: Direction of the channel + */ +int mhi_get_free_desc_count(struct mhi_device *mhi_dev, + enum dma_data_direction dir); + /** * mhi_prepare_for_power_up - Do pre-initialization before power up. * This is optional, call this before power up if -- GitLab From f21916ec4826766463fe9fb55a5f43d2a365811d Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 22 Dec 2020 20:20:13 -0500 Subject: [PATCH 1559/2012] s390/vfio-ap: clean up vfio_ap resources when KVM pointer invalidated The vfio_ap device driver registers a group notifier with VFIO when the file descriptor for a VFIO mediated device for a KVM guest is opened to receive notification that the KVM pointer is set (VFIO_GROUP_NOTIFY_SET_KVM event). When the KVM pointer is set, the vfio_ap driver takes the following actions: 1. Stashes the KVM pointer in the vfio_ap_mdev struct that holds the state of the mediated device. 2. Calls the kvm_get_kvm() function to increment its reference counter. 3. Sets the function pointer to the function that handles interception of the instruction that enables/disables interrupt processing. 4. Sets the masks in the KVM guest's CRYCB to pass AP resources through to the guest. In order to avoid memory leaks, when the notifier is called to receive notification that the KVM pointer has been set to NULL, the vfio_ap device driver should reverse the actions taken when the KVM pointer was set. Fixes: 258287c994de ("s390: vfio-ap: implement mediated device open callback") Signed-off-by: Tony Krowiak Reviewed-by: Halil Pasic Reviewed-by: Cornelia Huck Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20201223012013.5418-1-akrowiak@linux.ibm.com Signed-off-by: Christian Borntraeger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/vfio_ap_ops.c | 49 ++++++++++++++++++------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index e0bde85187451..7339043906cf9 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1037,19 +1037,14 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, { struct ap_matrix_mdev *m; - mutex_lock(&matrix_dev->lock); - list_for_each_entry(m, &matrix_dev->mdev_list, node) { - if ((m != matrix_mdev) && (m->kvm == kvm)) { - mutex_unlock(&matrix_dev->lock); + if ((m != matrix_mdev) && (m->kvm == kvm)) return -EPERM; - } } matrix_mdev->kvm = kvm; kvm_get_kvm(kvm); kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook; - mutex_unlock(&matrix_dev->lock); return 0; } @@ -1083,35 +1078,52 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb, return NOTIFY_DONE; } +static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) +{ + kvm_arch_crypto_clear_masks(matrix_mdev->kvm); + matrix_mdev->kvm->arch.crypto.pqap_hook = NULL; + vfio_ap_mdev_reset_queues(matrix_mdev->mdev); + kvm_put_kvm(matrix_mdev->kvm); + matrix_mdev->kvm = NULL; +} + static int vfio_ap_mdev_group_notifier(struct notifier_block *nb, unsigned long action, void *data) { - int ret; + int ret, notify_rc = NOTIFY_OK; struct ap_matrix_mdev *matrix_mdev; if (action != VFIO_GROUP_NOTIFY_SET_KVM) return NOTIFY_OK; matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier); + mutex_lock(&matrix_dev->lock); if (!data) { - matrix_mdev->kvm = NULL; - return NOTIFY_OK; + if (matrix_mdev->kvm) + vfio_ap_mdev_unset_kvm(matrix_mdev); + goto notify_done; } ret = vfio_ap_mdev_set_kvm(matrix_mdev, data); - if (ret) - return NOTIFY_DONE; + if (ret) { + notify_rc = NOTIFY_DONE; + goto notify_done; + } /* If there is no CRYCB pointer, then we can't copy the masks */ - if (!matrix_mdev->kvm->arch.crypto.crycbd) - return NOTIFY_DONE; + if (!matrix_mdev->kvm->arch.crypto.crycbd) { + notify_rc = NOTIFY_DONE; + goto notify_done; + } kvm_arch_crypto_set_masks(matrix_mdev->kvm, matrix_mdev->matrix.apm, matrix_mdev->matrix.aqm, matrix_mdev->matrix.adm); - return NOTIFY_OK; +notify_done: + mutex_unlock(&matrix_dev->lock); + return notify_rc; } static void vfio_ap_irq_disable_apqn(int apqn) @@ -1222,13 +1234,8 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev) struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); mutex_lock(&matrix_dev->lock); - if (matrix_mdev->kvm) { - kvm_arch_crypto_clear_masks(matrix_mdev->kvm); - matrix_mdev->kvm->arch.crypto.pqap_hook = NULL; - vfio_ap_mdev_reset_queues(mdev); - kvm_put_kvm(matrix_mdev->kvm); - matrix_mdev->kvm = NULL; - } + if (matrix_mdev->kvm) + vfio_ap_mdev_unset_kvm(matrix_mdev); mutex_unlock(&matrix_dev->lock); vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, -- GitLab From 6c12a6384e0c0b96debd88b24028e58f2ebd417b Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 22 Dec 2020 20:15:53 -0500 Subject: [PATCH 1560/2012] s390/vfio-ap: No need to disable IRQ after queue reset The queues assigned to a matrix mediated device are currently reset when: * The VFIO_DEVICE_RESET ioctl is invoked * The mdev fd is closed by userspace (QEMU) * The mdev is removed from sysfs. Immediately after the reset of a queue, a call is made to disable interrupts for the queue. This is entirely unnecessary because the reset of a queue disables interrupts, so this will be removed. Furthermore, vfio_ap_irq_disable() does an unconditional PQAP/AQIC which can result in a specification exception (when the corresponding facility is not available), so this is actually a bugfix. Signed-off-by: Tony Krowiak [pasic@linux.ibm.com: minor rework before merging] Signed-off-by: Halil Pasic Fixes: ec89b55e3bce ("s390: ap: implement PAPQ AQIC interception in kernel") Cc: Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/vfio_ap_drv.c | 6 +- drivers/s390/crypto/vfio_ap_ops.c | 100 ++++++++++++++++---------- drivers/s390/crypto/vfio_ap_private.h | 12 ++-- 3 files changed, 69 insertions(+), 49 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index be2520cc010be..7dc72cb718b0e 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -71,15 +71,11 @@ static int vfio_ap_queue_dev_probe(struct ap_device *apdev) static void vfio_ap_queue_dev_remove(struct ap_device *apdev) { struct vfio_ap_queue *q; - int apid, apqi; mutex_lock(&matrix_dev->lock); q = dev_get_drvdata(&apdev->device); + vfio_ap_mdev_reset_queue(q, 1); dev_set_drvdata(&apdev->device, NULL); - apid = AP_QID_CARD(q->apqn); - apqi = AP_QID_QUEUE(q->apqn); - vfio_ap_mdev_reset_queue(apid, apqi, 1); - vfio_ap_irq_disable(q); kfree(q); mutex_unlock(&matrix_dev->lock); } diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 7339043906cf9..41fc2e4135fe1 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -25,6 +25,7 @@ #define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device" static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev); +static struct vfio_ap_queue *vfio_ap_find_queue(int apqn); static int match_apqn(struct device *dev, const void *data) { @@ -49,20 +50,15 @@ static struct vfio_ap_queue *vfio_ap_get_queue( int apqn) { struct vfio_ap_queue *q; - struct device *dev; if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm)) return NULL; if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm)) return NULL; - dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL, - &apqn, match_apqn); - if (!dev) - return NULL; - q = dev_get_drvdata(dev); - q->matrix_mdev = matrix_mdev; - put_device(dev); + q = vfio_ap_find_queue(apqn); + if (q) + q->matrix_mdev = matrix_mdev; return q; } @@ -119,13 +115,18 @@ static void vfio_ap_wait_for_irqclear(int apqn) */ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) { - if (q->saved_isc != VFIO_AP_ISC_INVALID && q->matrix_mdev) + if (!q) + return; + if (q->saved_isc != VFIO_AP_ISC_INVALID && + !WARN_ON(!(q->matrix_mdev && q->matrix_mdev->kvm))) { kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc); - if (q->saved_pfn && q->matrix_mdev) + q->saved_isc = VFIO_AP_ISC_INVALID; + } + if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) { vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &q->saved_pfn, 1); - q->saved_pfn = 0; - q->saved_isc = VFIO_AP_ISC_INVALID; + q->saved_pfn = 0; + } } /** @@ -144,7 +145,7 @@ static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q) * Returns if ap_aqic function failed with invalid, deconfigured or * checkstopped AP. */ -struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q) +static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q) { struct ap_qirq_ctrl aqic_gisa = {}; struct ap_queue_status status; @@ -1126,48 +1127,70 @@ notify_done: return notify_rc; } -static void vfio_ap_irq_disable_apqn(int apqn) +static struct vfio_ap_queue *vfio_ap_find_queue(int apqn) { struct device *dev; - struct vfio_ap_queue *q; + struct vfio_ap_queue *q = NULL; dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL, &apqn, match_apqn); if (dev) { q = dev_get_drvdata(dev); - vfio_ap_irq_disable(q); put_device(dev); } + + return q; } -int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi, +int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q, unsigned int retry) { struct ap_queue_status status; + int ret; int retry2 = 2; - int apqn = AP_MKQID(apid, apqi); - do { - status = ap_zapq(apqn); - switch (status.response_code) { - case AP_RESPONSE_NORMAL: - while (!status.queue_empty && retry2--) { - msleep(20); - status = ap_tapq(apqn, NULL); - } - WARN_ON_ONCE(retry2 <= 0); - return 0; - case AP_RESPONSE_RESET_IN_PROGRESS: - case AP_RESPONSE_BUSY: + if (!q) + return 0; + +retry_zapq: + status = ap_zapq(q->apqn); + switch (status.response_code) { + case AP_RESPONSE_NORMAL: + ret = 0; + break; + case AP_RESPONSE_RESET_IN_PROGRESS: + if (retry--) { msleep(20); - break; - default: - /* things are really broken, give up */ - return -EIO; + goto retry_zapq; } - } while (retry--); + ret = -EBUSY; + break; + case AP_RESPONSE_Q_NOT_AVAIL: + case AP_RESPONSE_DECONFIGURED: + case AP_RESPONSE_CHECKSTOPPED: + WARN_ON_ONCE(status.irq_enabled); + ret = -EBUSY; + goto free_resources; + default: + /* things are really broken, give up */ + WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n", + status.response_code); + return -EIO; + } + + /* wait for the reset to take effect */ + while (retry2--) { + if (status.queue_empty && !status.irq_enabled) + break; + msleep(20); + status = ap_tapq(q->apqn, NULL); + } + WARN_ON_ONCE(retry2 <= 0); - return -EBUSY; +free_resources: + vfio_ap_free_aqic_resources(q); + + return ret; } static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev) @@ -1175,13 +1198,15 @@ static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev) int ret; int rc = 0; unsigned long apid, apqi; + struct vfio_ap_queue *q; struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, matrix_mdev->matrix.apm_max + 1) { for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, matrix_mdev->matrix.aqm_max + 1) { - ret = vfio_ap_mdev_reset_queue(apid, apqi, 1); + q = vfio_ap_find_queue(AP_MKQID(apid, apqi)); + ret = vfio_ap_mdev_reset_queue(q, 1); /* * Regardless whether a queue turns out to be busy, or * is not operational, we need to continue resetting @@ -1189,7 +1214,6 @@ static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev) */ if (ret) rc = ret; - vfio_ap_irq_disable_apqn(AP_MKQID(apid, apqi)); } } diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index f46dde56b4644..28e9d99897682 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -88,11 +88,6 @@ struct ap_matrix_mdev { struct mdev_device *mdev; }; -extern int vfio_ap_mdev_register(void); -extern void vfio_ap_mdev_unregister(void); -int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi, - unsigned int retry); - struct vfio_ap_queue { struct ap_matrix_mdev *matrix_mdev; unsigned long saved_pfn; @@ -100,5 +95,10 @@ struct vfio_ap_queue { #define VFIO_AP_ISC_INVALID 0xff unsigned char saved_isc; }; -struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q); + +int vfio_ap_mdev_register(void); +void vfio_ap_mdev_unregister(void); +int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q, + unsigned int retry); + #endif /* _VFIO_AP_PRIVATE_H_ */ -- GitLab From e82080e1f456467cc185fe65ee69fe9f9bd0b576 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Wed, 13 Jan 2021 11:56:26 -0500 Subject: [PATCH 1561/2012] s390: uv: Fix sysfs max number of VCPUs reporting The number reported by the query is N-1 and I think people reading the sysfs file would expect N instead. For users creating VMs there's no actual difference because KVM's limit is currently below the UV's limit. Signed-off-by: Janosch Frank Fixes: a0f60f8431999 ("s390/protvirt: Add sysfs firmware interface for Ultravisor information") Cc: stable@vger.kernel.org Reviewed-by: Claudio Imbrenda Acked-by: Cornelia Huck Signed-off-by: Vasily Gorbik --- arch/s390/boot/uv.c | 2 +- arch/s390/include/asm/uv.h | 4 ++-- arch/s390/kernel/uv.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index a15c033f53ca4..87641dd65ccf9 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -35,7 +35,7 @@ void uv_query_info(void) uv_info.guest_cpu_stor_len = uvcb.cpu_stor_len; uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE); uv_info.max_num_sec_conf = uvcb.max_num_sec_conf; - uv_info.max_guest_cpus = uvcb.max_guest_cpus; + uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id; } #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 0325fc0469b7b..7b98d4caee779 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -96,7 +96,7 @@ struct uv_cb_qui { u32 max_num_sec_conf; u64 max_guest_stor_addr; u8 reserved88[158 - 136]; - u16 max_guest_cpus; + u16 max_guest_cpu_id; u8 reserveda0[200 - 160]; } __packed __aligned(8); @@ -273,7 +273,7 @@ struct uv_info { unsigned long guest_cpu_stor_len; unsigned long max_sec_stor_addr; unsigned int max_num_sec_conf; - unsigned short max_guest_cpus; + unsigned short max_guest_cpu_id; }; extern struct uv_info uv_info; diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 883bfed9f5c2c..b2d2ad1530676 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -368,7 +368,7 @@ static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, struct kobj_attribute *attr, char *page) { return scnprintf(page, PAGE_SIZE, "%d\n", - uv_info.max_guest_cpus); + uv_info.max_guest_cpu_id + 1); } static struct kobj_attribute uv_query_max_guest_cpus_attr = -- GitLab From a1df829ead5877d4a1061e976a50e2e665a16f24 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Thu, 21 Jan 2021 17:24:19 -0800 Subject: [PATCH 1562/2012] ACPI/IORT: Do not blindly trust DMA masks from firmware Address issue observed on real world system with suboptimal IORT table where DMA masks of PCI devices would get set to 0 as result. iort_dma_setup() would query the root complex'/named component IORT entry for a DMA mask, and use that over the one the device has been configured with earlier. Ideally we want to use the minimum mask of what the IORT contains for the root complex and what the device was configured with. Fixes: 5ac65e8c8941 ("ACPI/IORT: Support address size limit for root complexes") Signed-off-by: Moritz Fischer Reviewed-by: Robin Murphy Acked-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20210122012419.95010-1-mdf@kernel.org Signed-off-by: Catalin Marinas --- drivers/acpi/arm64/iort.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index d4eac6d7e9fbc..2494138a6905e 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1107,6 +1107,11 @@ static int nc_dma_get_range(struct device *dev, u64 *size) ncomp = (struct acpi_iort_named_component *)node->node_data; + if (!ncomp->memory_address_limit) { + pr_warn(FW_BUG "Named component missing memory address limit\n"); + return -EINVAL; + } + *size = ncomp->memory_address_limit >= 64 ? U64_MAX : 1ULL<memory_address_limit; @@ -1126,6 +1131,11 @@ static int rc_dma_get_range(struct device *dev, u64 *size) rc = (struct acpi_iort_root_complex *)node->node_data; + if (!rc->memory_address_limit) { + pr_warn(FW_BUG "Root complex missing memory address limit\n"); + return -EINVAL; + } + *size = rc->memory_address_limit >= 64 ? U64_MAX : 1ULL<memory_address_limit; @@ -1173,8 +1183,8 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) end = dmaaddr + size - 1; mask = DMA_BIT_MASK(ilog2(end) + 1); dev->bus_dma_limit = end; - dev->coherent_dma_mask = mask; - *dev->dma_mask = mask; + dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask); + *dev->dma_mask = min(*dev->dma_mask, mask); } *dma_addr = dmaaddr; -- GitLab From 2e92493637a09547734f92c62a2471f6f0cb9a2c Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 25 Jan 2021 14:42:07 +0100 Subject: [PATCH 1563/2012] x86/xen: avoid warning in Xen pv guest with CONFIG_AMD_MEM_ENCRYPT enabled When booting a kernel which has been built with CONFIG_AMD_MEM_ENCRYPT enabled as a Xen pv guest a warning is issued for each processor: [ 5.964347] ------------[ cut here ]------------ [ 5.968314] WARNING: CPU: 0 PID: 1 at /home/gross/linux/head/arch/x86/xen/enlighten_pv.c:660 get_trap_addr+0x59/0x90 [ 5.972321] Modules linked in: [ 5.976313] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 5.11.0-rc5-default #75 [ 5.980313] Hardware name: Dell Inc. OptiPlex 9020/0PC5F7, BIOS A05 12/05/2013 [ 5.984313] RIP: e030:get_trap_addr+0x59/0x90 [ 5.988313] Code: 42 10 83 f0 01 85 f6 74 04 84 c0 75 1d b8 01 00 00 00 c3 48 3d 00 80 83 82 72 08 48 3d 20 81 83 82 72 0c b8 01 00 00 00 eb db <0f> 0b 31 c0 c3 48 2d 00 80 83 82 48 ba 72 1c c7 71 1c c7 71 1c 48 [ 5.992313] RSP: e02b:ffffc90040033d38 EFLAGS: 00010202 [ 5.996313] RAX: 0000000000000001 RBX: ffffffff82a141d0 RCX: ffffffff8222ec38 [ 6.000312] RDX: ffffffff8222ec38 RSI: 0000000000000005 RDI: ffffc90040033d40 [ 6.004313] RBP: ffff8881003984a0 R08: 0000000000000007 R09: ffff888100398000 [ 6.008312] R10: 0000000000000007 R11: ffffc90040246000 R12: ffff8884082182a8 [ 6.012313] R13: 0000000000000100 R14: 000000000000001d R15: ffff8881003982d0 [ 6.016316] FS: 0000000000000000(0000) GS:ffff888408200000(0000) knlGS:0000000000000000 [ 6.020313] CS: e030 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6.024313] CR2: ffffc900020ef000 CR3: 000000000220a000 CR4: 0000000000050660 [ 6.028314] Call Trace: [ 6.032313] cvt_gate_to_trap.part.7+0x3f/0x90 [ 6.036313] ? asm_exc_double_fault+0x30/0x30 [ 6.040313] xen_convert_trap_info+0x87/0xd0 [ 6.044313] xen_pv_cpu_up+0x17a/0x450 [ 6.048313] bringup_cpu+0x2b/0xc0 [ 6.052313] ? cpus_read_trylock+0x50/0x50 [ 6.056313] cpuhp_invoke_callback+0x80/0x4c0 [ 6.060313] _cpu_up+0xa7/0x140 [ 6.064313] cpu_up+0x98/0xd0 [ 6.068313] bringup_nonboot_cpus+0x4f/0x60 [ 6.072313] smp_init+0x26/0x79 [ 6.076313] kernel_init_freeable+0x103/0x258 [ 6.080313] ? rest_init+0xd0/0xd0 [ 6.084313] kernel_init+0xa/0x110 [ 6.088313] ret_from_fork+0x1f/0x30 [ 6.092313] ---[ end trace be9ecf17dceeb4f3 ]--- Reason is that there is no Xen pv trap entry for X86_TRAP_VC. Fix that by adding a generic trap handler for unknown traps and wire all unknown bare metal handlers to this generic handler, which will just crash the system in case such a trap will ever happen. Fixes: 0786138c78e793 ("x86/sev-es: Add a Runtime #VC Exception Handler") Cc: # v5.10 Signed-off-by: Juergen Gross Reviewed-by: Andrew Cooper Signed-off-by: Juergen Gross --- arch/x86/include/asm/idtentry.h | 1 + arch/x86/xen/enlighten_pv.c | 15 ++++++++++++++- arch/x86/xen/xen-asm.S | 1 + 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index b2442eb0ac2f8..eb01c2618a9df 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -616,6 +616,7 @@ DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication); #ifdef CONFIG_XEN_PV DECLARE_IDTENTRY_XENCB(X86_TRAP_OTHER, exc_xen_hypervisor_callback); +DECLARE_IDTENTRY_RAW(X86_TRAP_OTHER, exc_xen_unknown_trap); #endif /* Device interrupts common/spurious */ diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 4409306364dc3..9a5a50cdaab59 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -583,6 +583,13 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_debug) exc_debug(regs); } +DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap) +{ + /* This should never happen and there is no way to handle it. */ + pr_err("Unknown trap in Xen PV mode."); + BUG(); +} + struct trap_array_entry { void (*orig)(void); void (*xen)(void); @@ -631,6 +638,7 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist) { unsigned int nr; bool ist_okay = false; + bool found = false; /* * Replace trap handler addresses by Xen specific ones. @@ -645,6 +653,7 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist) if (*addr == entry->orig) { *addr = entry->xen; ist_okay = entry->ist_okay; + found = true; break; } } @@ -655,9 +664,13 @@ static bool __ref get_trap_addr(void **addr, unsigned int ist) nr = (*addr - (void *)early_idt_handler_array[0]) / EARLY_IDT_HANDLER_SIZE; *addr = (void *)xen_early_idt_handler_array[nr]; + found = true; } - if (WARN_ON(ist != 0 && !ist_okay)) + if (!found) + *addr = (void *)xen_asm_exc_xen_unknown_trap; + + if (WARN_ON(found && ist != 0 && !ist_okay)) return false; return true; diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 1cb0e84b91610..53cf8aa35032d 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -178,6 +178,7 @@ xen_pv_trap asm_exc_simd_coprocessor_error #ifdef CONFIG_IA32_EMULATION xen_pv_trap entry_INT80_compat #endif +xen_pv_trap asm_exc_xen_unknown_trap xen_pv_trap asm_exc_xen_hypervisor_callback __INIT -- GitLab From 8dc932d3e8afb65e12eba7495f046c83884c49bf Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 26 Jan 2021 21:59:07 +0200 Subject: [PATCH 1564/2012] Revert "block: simplify set_init_blocksize" to regain lost performance The cited commit introduced a serious regression with SATA write speed, as found by bisecting. This patch reverts this commit, which restores write speed back to the values observed before this commit. The performance tests were done on a Helios4 NAS (2nd batch) with 4 HDDs (WD8003FFBX) using dd (bs=1M count=2000). "Direct" is a test with a single HDD, the rest are different RAID levels built over the first partitions of 4 HDDs. Test results are in MB/s, R is read, W is write. | Direct | RAID0 | RAID10 f2 | RAID10 n2 | RAID6 ----------------+--------+-------+-----------+-----------+-------- 9011495c9466 | R:256 | R:313 | R:276 | R:313 | R:323 (before faulty) | W:254 | W:253 | W:195 | W:204 | W:117 ----------------+--------+-------+-----------+-----------+-------- 5ff9f19231a0 | R:257 | R:398 | R:312 | R:344 | R:391 (faulty commit) | W:154 | W:122 | W:67.7 | W:66.6 | W:67.2 ----------------+--------+-------+-----------+-----------+-------- 5.10.10 | R:256 | R:401 | R:312 | R:356 | R:375 unpatched | W:149 | W:123 | W:64 | W:64.1 | W:61.5 ----------------+--------+-------+-----------+-----------+-------- 5.10.10 | R:255 | R:396 | R:312 | R:340 | R:393 patched | W:247 | W:274 | W:220 | W:225 | W:121 Applying this patch doesn't hurt read performance, while improves the write speed by 1.5x - 3.5x (more impact on RAID tests). The write speed is restored back to the state before the faulty commit, and even a bit higher in RAID tests (which aren't HDD-bound on this device) - that is likely related to other optimizations done between the faulty commit and 5.10.10 which also improved the read speed. Signed-off-by: Maxim Mikityanskiy Fixes: 5ff9f19231a0 ("block: simplify set_init_blocksize") Cc: Christoph Hellwig Cc: Jens Axboe Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 3b8963e228a1b..235b5042672e9 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -130,7 +130,15 @@ EXPORT_SYMBOL(truncate_bdev_range); static void set_init_blocksize(struct block_device *bdev) { - bdev->bd_inode->i_blkbits = blksize_bits(bdev_logical_block_size(bdev)); + unsigned int bsize = bdev_logical_block_size(bdev); + loff_t size = i_size_read(bdev->bd_inode); + + while (bsize < PAGE_SIZE) { + if (size & bsize) + break; + bsize <<= 1; + } + bdev->bd_inode->i_blkbits = blksize_bits(bsize); } int set_blocksize(struct block_device *bdev, int size) -- GitLab From 6195ba09822c87cad09189bbf550d0fbe714687a Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Wed, 27 Jan 2021 15:14:09 +0800 Subject: [PATCH 1565/2012] io_uring: fix flush cqring overflow list while TASK_INTERRUPTIBLE Abaci reported the follow warning: [ 27.073425] do not call blocking ops when !TASK_RUNNING; state=1 set at [] prepare_to_wait_exclusive+0x3a/0xc0 [ 27.075805] WARNING: CPU: 0 PID: 951 at kernel/sched/core.c:7853 __might_sleep+0x80/0xa0 [ 27.077604] Modules linked in: [ 27.078379] CPU: 0 PID: 951 Comm: a.out Not tainted 5.11.0-rc3+ #1 [ 27.079637] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 [ 27.080852] RIP: 0010:__might_sleep+0x80/0xa0 [ 27.081835] Code: 65 48 8b 04 25 80 71 01 00 48 8b 90 c0 15 00 00 48 8b 70 18 48 c7 c7 08 39 95 82 c6 05 f9 5f de 08 01 48 89 d1 e8 00 c6 fa ff 0b eb bf 41 0f b6 f5 48 c7 c7 40 23 c9 82 e8 f3 48 ec 00 eb a7 [ 27.084521] RSP: 0018:ffffc90000fe3ce8 EFLAGS: 00010286 [ 27.085350] RAX: 0000000000000000 RBX: ffffffff82956083 RCX: 0000000000000000 [ 27.086348] RDX: ffff8881057a0000 RSI: ffffffff8118cc9e RDI: ffff88813bc28570 [ 27.087598] RBP: 00000000000003a7 R08: 0000000000000001 R09: 0000000000000001 [ 27.088819] R10: ffffc90000fe3e00 R11: 00000000fffef9f0 R12: 0000000000000000 [ 27.089819] R13: 0000000000000000 R14: ffff88810576eb80 R15: ffff88810576e800 [ 27.091058] FS: 00007f7b144cf740(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 [ 27.092775] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 27.093796] CR2: 00000000022da7b8 CR3: 000000010b928002 CR4: 00000000003706f0 [ 27.094778] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 27.095780] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 27.097011] Call Trace: [ 27.097685] __mutex_lock+0x5d/0xa30 [ 27.098565] ? prepare_to_wait_exclusive+0x71/0xc0 [ 27.099412] ? io_cqring_overflow_flush.part.101+0x6d/0x70 [ 27.100441] ? lockdep_hardirqs_on_prepare+0xe9/0x1c0 [ 27.101537] ? _raw_spin_unlock_irqrestore+0x2d/0x40 [ 27.102656] ? trace_hardirqs_on+0x46/0x110 [ 27.103459] ? io_cqring_overflow_flush.part.101+0x6d/0x70 [ 27.104317] io_cqring_overflow_flush.part.101+0x6d/0x70 [ 27.105113] io_cqring_wait+0x36e/0x4d0 [ 27.105770] ? find_held_lock+0x28/0xb0 [ 27.106370] ? io_uring_remove_task_files+0xa0/0xa0 [ 27.107076] __x64_sys_io_uring_enter+0x4fb/0x640 [ 27.107801] ? rcu_read_lock_sched_held+0x59/0xa0 [ 27.108562] ? lockdep_hardirqs_on_prepare+0xe9/0x1c0 [ 27.109684] ? syscall_enter_from_user_mode+0x26/0x70 [ 27.110731] do_syscall_64+0x2d/0x40 [ 27.111296] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 27.112056] RIP: 0033:0x7f7b13dc8239 [ 27.112663] Code: 01 00 48 81 c4 80 00 00 00 e9 f1 fe ff ff 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 3d 01 f0 ff ff 73 01 c3 48 8b 0d 27 ec 2c 00 f7 d8 64 89 01 48 [ 27.115113] RSP: 002b:00007ffd6d7f5c88 EFLAGS: 00000286 ORIG_RAX: 00000000000001aa [ 27.116562] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7b13dc8239 [ 27.117961] RDX: 000000000000478e RSI: 0000000000000000 RDI: 0000000000000003 [ 27.118925] RBP: 00007ffd6d7f5cb0 R08: 0000000020000040 R09: 0000000000000008 [ 27.119773] R10: 0000000000000001 R11: 0000000000000286 R12: 0000000000400480 [ 27.120614] R13: 00007ffd6d7f5d90 R14: 0000000000000000 R15: 0000000000000000 [ 27.121490] irq event stamp: 5635 [ 27.121946] hardirqs last enabled at (5643): [] console_unlock+0x5c4/0x740 [ 27.123476] hardirqs last disabled at (5652): [] console_unlock+0x4e7/0x740 [ 27.125192] softirqs last enabled at (5272): [] __do_softirq+0x3c5/0x5aa [ 27.126430] softirqs last disabled at (5267): [] asm_call_irq_on_stack+0xf/0x20 [ 27.127634] ---[ end trace 289d7e28fa60f928 ]--- This is caused by calling io_cqring_overflow_flush() which may sleep after calling prepare_to_wait_exclusive() which set task state to TASK_INTERRUPTIBLE Reported-by: Abaci Fixes: 6c503150ae33 ("io_uring: patch up IOPOLL overflow_flush sync") Reviewed-by: Pavel Begunkov Signed-off-by: Hao Xu Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index c218deaf73a9f..ae388cc528436 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7268,14 +7268,18 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, TASK_INTERRUPTIBLE); /* make sure we run task_work before checking for signals */ ret = io_run_task_work_sig(); - if (ret > 0) + if (ret > 0) { + finish_wait(&ctx->wait, &iowq.wq); continue; + } else if (ret < 0) break; if (io_should_wake(&iowq)) break; - if (test_bit(0, &ctx->cq_check_overflow)) + if (test_bit(0, &ctx->cq_check_overflow)) { + finish_wait(&ctx->wait, &iowq.wq); continue; + } if (uts) { timeout = schedule_timeout(timeout); if (timeout == 0) { -- GitLab From d17405d52bacd14fe7fdbb10c0434934ea496914 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 25 Jan 2021 14:13:06 +1300 Subject: [PATCH 1566/2012] dma-mapping: benchmark: fix kernel crash when dma_map_single fails if dma_map_single() fails, kernel will give the below oops since task_struct has been destroyed and we are running into the memory corruption due to use-after-free in kthread_stop(): [ 48.095310] Unable to handle kernel paging request at virtual address 000000c473548040 [ 48.095736] Mem abort info: [ 48.095864] ESR = 0x96000004 [ 48.096025] EC = 0x25: DABT (current EL), IL = 32 bits [ 48.096268] SET = 0, FnV = 0 [ 48.096401] EA = 0, S1PTW = 0 [ 48.096538] Data abort info: [ 48.096659] ISV = 0, ISS = 0x00000004 [ 48.096820] CM = 0, WnR = 0 [ 48.097079] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000104639000 [ 48.098099] [000000c473548040] pgd=0000000000000000, p4d=0000000000000000 [ 48.098832] Internal error: Oops: 96000004 [#1] PREEMPT SMP [ 48.099232] Modules linked in: [ 48.099387] CPU: 0 PID: 2 Comm: kthreadd Tainted: G W [ 48.099887] Hardware name: linux,dummy-virt (DT) [ 48.100078] pstate: 60000005 (nZCv daif -PAN -UAO -TCO BTYPE=--) [ 48.100516] pc : __kmalloc_node+0x214/0x368 [ 48.100944] lr : __kmalloc_node+0x1f4/0x368 [ 48.101458] sp : ffff800011f0bb80 [ 48.101843] x29: ffff800011f0bb80 x28: ffff0000c0098ec0 [ 48.102330] x27: 0000000000000000 x26: 00000000001d4600 [ 48.102648] x25: ffff0000c0098ec0 x24: ffff800011b6a000 [ 48.102988] x23: 00000000ffffffff x22: ffff0000c0098ec0 [ 48.103333] x21: ffff8000101d7a54 x20: 0000000000000dc0 [ 48.103657] x19: ffff0000c0001e00 x18: 0000000000000000 [ 48.104069] x17: 0000000000000000 x16: 0000000000000000 [ 48.105449] x15: 000001aa0304e7b9 x14: 00000000000003b1 [ 48.106401] x13: ffff8000122d5000 x12: ffff80001228d000 [ 48.107296] x11: ffff0000c0154340 x10: 0000000000000000 [ 48.107862] x9 : ffff80000fffffff x8 : ffff0000c473527f [ 48.108326] x7 : ffff800011e62f58 x6 : ffff0000c01c8ed8 [ 48.108778] x5 : ffff0000c0098ec0 x4 : 0000000000000000 [ 48.109223] x3 : 00000000001d4600 x2 : 0000000000000040 [ 48.109656] x1 : 0000000000000001 x0 : ff0000c473548000 [ 48.110104] Call trace: [ 48.110287] __kmalloc_node+0x214/0x368 [ 48.110493] __vmalloc_node_range+0xc4/0x298 [ 48.110805] copy_process+0x2c8/0x15c8 [ 48.111133] kernel_clone+0x5c/0x3c0 [ 48.111373] kernel_thread+0x64/0x90 [ 48.111604] kthreadd+0x158/0x368 [ 48.111810] ret_from_fork+0x10/0x30 [ 48.112336] Code: 17ffffe9 b9402a62 b94008a1 11000421 (f8626802) [ 48.112884] ---[ end trace d4890e21e75419d5 ]--- Signed-off-by: Barry Song Signed-off-by: Christoph Hellwig --- kernel/dma/map_benchmark.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index b1496e744c687..1b1b8ff875cb3 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -147,8 +147,10 @@ static int do_map_benchmark(struct map_benchmark_data *map) atomic64_set(&map->sum_sq_unmap, 0); atomic64_set(&map->loops, 0); - for (i = 0; i < threads; i++) + for (i = 0; i < threads; i++) { + get_task_struct(tsk[i]); wake_up_process(tsk[i]); + } msleep_interruptible(map->bparam.seconds * 1000); @@ -183,6 +185,8 @@ static int do_map_benchmark(struct map_benchmark_data *map) } out: + for (i = 0; i < threads; i++) + put_task_struct(tsk[i]); put_device(map->dev); kfree(tsk); return ret; -- GitLab From 89c7cb1608ac3c7ecc19436469f35ed12da97e1d Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 19 Jan 2021 18:52:03 +0800 Subject: [PATCH 1567/2012] of/device: Update dma_range_map only when dev has valid dma-ranges The commit e0d072782c73 ("dma-mapping: introduce DMA range map, supplanting dma_pfn_offset") always update dma_range_map even though it was already set, like in the sunxi_mbus driver. the issue is reported at [1]. This patch avoid this(Updating it only when dev has valid dma-ranges). Meanwhile, dma_range_map contains the devices' dma_ranges information, This patch moves dma_range_map before of_iommu_configure. The iommu driver may need to know the dma_address requirements of its iommu consumer devices. [1] https://lore.kernel.org/linux-arm-kernel/5c7946f3-b56e-da00-a750-be097c7ceb32@arm.com/ CC: Frank Rowand Fixes: e0d072782c73 ("dma-mapping: introduce DMA range map, supplanting dma_pfn_offset"), Suggested-by: Robin Murphy Signed-off-by: Yong Wu Signed-off-by: Paul Kocialkowski Reviewed-by: Rob Herring Reviewed-by: Robin Murphy Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20210119105203.15530-1-yong.wu@mediatek.com --- drivers/of/device.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/of/device.c b/drivers/of/device.c index aedfaaafd3e7e..1122daa8e2736 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -162,9 +162,11 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, mask = DMA_BIT_MASK(ilog2(end) + 1); dev->coherent_dma_mask &= mask; *dev->dma_mask &= mask; - /* ...but only set bus limit if we found valid dma-ranges earlier */ - if (!ret) + /* ...but only set bus limit and range map if we found valid dma-ranges earlier */ + if (!ret) { dev->bus_dma_limit = end; + dev->dma_range_map = map; + } coherent = of_dma_is_coherent(np); dev_dbg(dev, "device is%sdma coherent\n", @@ -172,6 +174,9 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, iommu = of_iommu_configure(dev, np, id); if (PTR_ERR(iommu) == -EPROBE_DEFER) { + /* Don't touch range map if it wasn't set from a valid dma-ranges */ + if (!ret) + dev->dma_range_map = NULL; kfree(map); return -EPROBE_DEFER; } @@ -181,7 +186,6 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, arch_setup_dma_ops(dev, dma_start, size, iommu, coherent); - dev->dma_range_map = map; return 0; } EXPORT_SYMBOL_GPL(of_dma_configure_id); -- GitLab From 38ec7c6b6bd69b9ccc1873b9f465d4f16b46b26e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 27 Jan 2021 21:59:42 +0100 Subject: [PATCH 1568/2012] virt_wifi: fix deadlock on RTNL Fix a regression where everything in virt_wifi would just hang. This happened due to overlapping changes between commit a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") which had originally needed to change the locking, but then I introduced commit 2fe8ef106238 ("cfg80211: change netdev registration/unregistration semantics") instead. virt_wifi somehow fell through the cracks when I undid all the previous locking changes. Fix it now. Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") Reported-by: syzbot+3d2d5e6cc3fb15c6a0fd@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20210127215941.2d6a97b09784.I4f1fac32f67045171be50931f44d77e150911bee@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/virt_wifi.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c index 4b455a4ae15b8..c878097f0ddaf 100644 --- a/drivers/net/wireless/virt_wifi.c +++ b/drivers/net/wireless/virt_wifi.c @@ -537,9 +537,7 @@ static int virt_wifi_newlink(struct net *src_net, struct net_device *dev, dev->ieee80211_ptr->iftype = NL80211_IFTYPE_STATION; dev->ieee80211_ptr->wiphy = common_wiphy; - wiphy_lock(common_wiphy); err = register_netdevice(dev); - wiphy_unlock(common_wiphy); if (err) { dev_err(&priv->lowerdev->dev, "can't register_netdevice: %d\n", err); @@ -562,9 +560,7 @@ static int virt_wifi_newlink(struct net *src_net, struct net_device *dev, return 0; unregister_netdev: - wiphy_lock(common_wiphy); unregister_netdevice(dev); - wiphy_unlock(common_wiphy); free_wireless_dev: kfree(dev->ieee80211_ptr); dev->ieee80211_ptr = NULL; @@ -590,9 +586,7 @@ static void virt_wifi_dellink(struct net_device *dev, netdev_rx_handler_unregister(priv->lowerdev); netdev_upper_dev_unlink(priv->lowerdev, dev); - wiphy_lock(common_wiphy); unregister_netdevice_queue(dev, head); - wiphy_unlock(common_wiphy); module_put(THIS_MODULE); /* Deleting the wiphy is handled in the module destructor. */ @@ -631,9 +625,7 @@ static int virt_wifi_event(struct notifier_block *this, unsigned long event, upper_dev = priv->upperdev; upper_dev->rtnl_link_ops->dellink(upper_dev, &list_kill); - wiphy_lock(common_wiphy); unregister_netdevice_many(&list_kill); - wiphy_unlock(common_wiphy); break; } -- GitLab From d3b9b45f7e981bcc6355414c63633fe33d95660c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 26 Jan 2021 16:44:09 +0100 Subject: [PATCH 1569/2012] mac80211: minstrel_ht: fix regression in the max_prob_rate fix Since mi->max_prob_rate is overwritten after the loop that calls minstrel_ht_set_best_prob_rate, the new best rate needs to be written to *dest Fixes: a7fca4e4037f ("mac80211: minstrel_ht: fix max probability rate selection") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210126154409.6755-1-nbd@nbd.name Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index e35948f4e1bf9..bfa550068de4b 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -537,7 +537,7 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 *dest, u16 index) cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, mrs->prob_avg); if (cur_tp_avg > tmp_tp_avg) - mi->max_prob_rate = index; + *dest = index; max_gpr_tp_avg = minstrel_ht_get_tp_avg(mi, max_gpr_group, max_gpr_idx, -- GitLab From 4f16d25c68ec844299a4df6ecbb0234eaf88a935 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 25 Jan 2021 17:28:18 +0100 Subject: [PATCH 1570/2012] netfilter: nftables: add nft_parse_register_load() and use it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This new function combines the netlink register attribute parser and the load validation function. This update requires to replace: enum nft_registers sreg:8; in many of the expression private areas otherwise compiler complains with: error: cannot take address of bit-field ‘sreg’ when passing the register field as reference. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- include/net/netfilter/nf_tables_core.h | 6 ++--- include/net/netfilter/nft_meta.h | 2 +- net/ipv4/netfilter/nft_dup_ipv4.c | 18 ++++++------- net/ipv6/netfilter/nft_dup_ipv6.c | 18 ++++++------- net/netfilter/nf_tables_api.c | 18 +++++++++++-- net/netfilter/nft_bitwise.c | 10 ++++---- net/netfilter/nft_byteorder.c | 6 ++--- net/netfilter/nft_cmp.c | 8 +++--- net/netfilter/nft_ct.c | 5 ++-- net/netfilter/nft_dup_netdev.c | 6 ++--- net/netfilter/nft_dynset.c | 12 ++++----- net/netfilter/nft_exthdr.c | 6 ++--- net/netfilter/nft_fwd_netdev.c | 18 ++++++------- net/netfilter/nft_hash.c | 10 +++++--- net/netfilter/nft_lookup.c | 6 ++--- net/netfilter/nft_masq.c | 18 ++++++------- net/netfilter/nft_meta.c | 3 +-- net/netfilter/nft_nat.c | 35 +++++++++++--------------- net/netfilter/nft_objref.c | 6 ++--- net/netfilter/nft_payload.c | 4 +-- net/netfilter/nft_queue.c | 12 ++++----- net/netfilter/nft_range.c | 6 ++--- net/netfilter/nft_redir.c | 18 ++++++------- net/netfilter/nft_tproxy.c | 14 +++++------ 25 files changed, 132 insertions(+), 135 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f4af8362d2348..033b31aa38ccf 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -203,7 +203,7 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); unsigned int nft_parse_register(const struct nlattr *attr); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); -int nft_validate_register_load(enum nft_registers reg, unsigned int len); +int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len); int nft_validate_register_store(const struct nft_ctx *ctx, enum nft_registers reg, const struct nft_data *data, diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 8657e6815b07c..b7aff03a3f0f9 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -26,14 +26,14 @@ void nf_tables_core_module_exit(void); struct nft_bitwise_fast_expr { u32 mask; u32 xor; - enum nft_registers sreg:8; + u8 sreg; enum nft_registers dreg:8; }; struct nft_cmp_fast_expr { u32 data; u32 mask; - enum nft_registers sreg:8; + u8 sreg; u8 len; bool inv; }; @@ -67,7 +67,7 @@ struct nft_payload_set { enum nft_payload_bases base:8; u8 offset; u8 len; - enum nft_registers sreg:8; + u8 sreg; u8 csum_type; u8 csum_offset; u8 csum_flags; diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 07e2fd507963a..946fa8c83798e 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -8,7 +8,7 @@ struct nft_meta { enum nft_meta_keys key:8; union { enum nft_registers dreg:8; - enum nft_registers sreg:8; + u8 sreg; }; }; diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index bcdb37f86a949..aeb631760eb9e 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -13,8 +13,8 @@ #include struct nft_dup_ipv4 { - enum nft_registers sreg_addr:8; - enum nft_registers sreg_dev:8; + u8 sreg_addr; + u8 sreg_dev; }; static void nft_dup_ipv4_eval(const struct nft_expr *expr, @@ -40,16 +40,16 @@ static int nft_dup_ipv4_init(const struct nft_ctx *ctx, if (tb[NFTA_DUP_SREG_ADDR] == NULL) return -EINVAL; - priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]); - err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in_addr)); + err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr, + sizeof(struct in_addr)); if (err < 0) return err; - if (tb[NFTA_DUP_SREG_DEV] != NULL) { - priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); - return nft_validate_register_load(priv->sreg_dev, sizeof(int)); - } - return 0; + if (tb[NFTA_DUP_SREG_DEV]) + err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], + &priv->sreg_dev, sizeof(int)); + + return err; } static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 8b5193efb1f1b..3a00d95e964e9 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -13,8 +13,8 @@ #include struct nft_dup_ipv6 { - enum nft_registers sreg_addr:8; - enum nft_registers sreg_dev:8; + u8 sreg_addr; + u8 sreg_dev; }; static void nft_dup_ipv6_eval(const struct nft_expr *expr, @@ -38,16 +38,16 @@ static int nft_dup_ipv6_init(const struct nft_ctx *ctx, if (tb[NFTA_DUP_SREG_ADDR] == NULL) return -EINVAL; - priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]); - err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in6_addr)); + err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr, + sizeof(struct in6_addr)); if (err < 0) return err; - if (tb[NFTA_DUP_SREG_DEV] != NULL) { - priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); - return nft_validate_register_load(priv->sreg_dev, sizeof(int)); - } - return 0; + if (tb[NFTA_DUP_SREG_DEV]) + err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], + &priv->sreg_dev, sizeof(int)); + + return err; } static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 15c467f1a9dd9..1e82ebba230dc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8634,7 +8634,7 @@ EXPORT_SYMBOL_GPL(nft_dump_register); * Validate that the input register is one of the general purpose * registers and that the length of the load is within the bounds. */ -int nft_validate_register_load(enum nft_registers reg, unsigned int len) +static int nft_validate_register_load(enum nft_registers reg, unsigned int len) { if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) return -EINVAL; @@ -8645,7 +8645,21 @@ int nft_validate_register_load(enum nft_registers reg, unsigned int len) return 0; } -EXPORT_SYMBOL_GPL(nft_validate_register_load); + +int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len) +{ + u32 reg; + int err; + + reg = nft_parse_register(attr); + err = nft_validate_register_load(reg, len); + if (err < 0) + return err; + + *sreg = reg; + return 0; +} +EXPORT_SYMBOL_GPL(nft_parse_register_load); /** * nft_validate_register_store - validate an expressions' register store diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index bbd773d743773..2157970b3cd37 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -16,7 +16,7 @@ #include struct nft_bitwise { - enum nft_registers sreg:8; + u8 sreg; enum nft_registers dreg:8; enum nft_bitwise_ops op:8; u8 len; @@ -169,8 +169,8 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, priv->len = len; - priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]); - err = nft_validate_register_load(priv->sreg, priv->len); + err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg, + priv->len); if (err < 0) return err; @@ -315,8 +315,8 @@ static int nft_bitwise_fast_init(const struct nft_ctx *ctx, struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); int err; - priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]); - err = nft_validate_register_load(priv->sreg, sizeof(u32)); + err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg, + sizeof(u32)); if (err < 0) return err; diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 12bed3f7bbc6d..0960563cd5a19 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -16,7 +16,7 @@ #include struct nft_byteorder { - enum nft_registers sreg:8; + u8 sreg; enum nft_registers dreg:8; enum nft_byteorder_ops op:8; u8 len; @@ -131,14 +131,14 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, return -EINVAL; } - priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]); err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len); if (err < 0) return err; priv->len = len; - err = nft_validate_register_load(priv->sreg, priv->len); + err = nft_parse_register_load(tb[NFTA_BYTEORDER_SREG], &priv->sreg, + priv->len); if (err < 0) return err; diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 00e563a72d3d7..3640eea8a87b9 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -18,7 +18,7 @@ struct nft_cmp_expr { struct nft_data data; - enum nft_registers sreg:8; + u8 sreg; u8 len; enum nft_cmp_ops op:8; }; @@ -87,8 +87,7 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return err; } - priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]); - err = nft_validate_register_load(priv->sreg, desc.len); + err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; @@ -174,8 +173,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, if (err < 0) return err; - priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]); - err = nft_validate_register_load(priv->sreg, desc.len); + err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 8bcd49f147971..c1d7a3c615d8e 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -28,7 +28,7 @@ struct nft_ct { enum ip_conntrack_dir dir:8; union { enum nft_registers dreg:8; - enum nft_registers sreg:8; + u8 sreg; }; }; @@ -600,8 +600,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, } } - priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]); - err = nft_validate_register_load(priv->sreg, len); + err = nft_parse_register_load(tb[NFTA_CT_SREG], &priv->sreg, len); if (err < 0) goto err1; diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index 40788b3f1071a..bbf3fcba3df40 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -14,7 +14,7 @@ #include struct nft_dup_netdev { - enum nft_registers sreg_dev:8; + u8 sreg_dev; }; static void nft_dup_netdev_eval(const struct nft_expr *expr, @@ -40,8 +40,8 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx, if (tb[NFTA_DUP_SREG_DEV] == NULL) return -EINVAL; - priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]); - return nft_validate_register_load(priv->sreg_dev, sizeof(int)); + return nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev, + sizeof(int)); } static int nft_dup_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 0b053f75cd604..b20c4c7ea57bd 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -16,8 +16,8 @@ struct nft_dynset { struct nft_set *set; struct nft_set_ext_tmpl tmpl; enum nft_dynset_ops op:8; - enum nft_registers sreg_key:8; - enum nft_registers sreg_data:8; + u8 sreg_key; + u8 sreg_data; bool invert; bool expr; u8 num_exprs; @@ -219,8 +219,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return err; } - priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); - err = nft_validate_register_load(priv->sreg_key, set->klen); + err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key, + set->klen); if (err < 0) return err; @@ -230,8 +230,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (set->dtype == NFT_DATA_VERDICT) return -EOPNOTSUPP; - priv->sreg_data = nft_parse_register(tb[NFTA_DYNSET_SREG_DATA]); - err = nft_validate_register_load(priv->sreg_data, set->dlen); + err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_DATA], + &priv->sreg_data, set->dlen); if (err < 0) return err; } else if (set->flags & NFT_SET_MAP) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 3c48cdc8935df..9c2b3bde1ac9a 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -20,7 +20,7 @@ struct nft_exthdr { u8 len; u8 op; enum nft_registers dreg:8; - enum nft_registers sreg:8; + u8 sreg; u8 flags; }; @@ -400,11 +400,11 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; priv->len = len; - priv->sreg = nft_parse_register(tb[NFTA_EXTHDR_SREG]); priv->flags = flags; priv->op = op; - return nft_validate_register_load(priv->sreg, priv->len); + return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg, + priv->len); } static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index b77985986b24e..cd59afde5b2f8 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -18,7 +18,7 @@ #include struct nft_fwd_netdev { - enum nft_registers sreg_dev:8; + u8 sreg_dev; }; static void nft_fwd_netdev_eval(const struct nft_expr *expr, @@ -50,8 +50,8 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx, if (tb[NFTA_FWD_SREG_DEV] == NULL) return -EINVAL; - priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]); - return nft_validate_register_load(priv->sreg_dev, sizeof(int)); + return nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, + sizeof(int)); } static int nft_fwd_netdev_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -78,8 +78,8 @@ static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx, } struct nft_fwd_neigh { - enum nft_registers sreg_dev:8; - enum nft_registers sreg_addr:8; + u8 sreg_dev; + u8 sreg_addr; u8 nfproto; }; @@ -157,8 +157,6 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx, !tb[NFTA_FWD_NFPROTO]) return -EINVAL; - priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]); - priv->sreg_addr = nft_parse_register(tb[NFTA_FWD_SREG_ADDR]); priv->nfproto = ntohl(nla_get_be32(tb[NFTA_FWD_NFPROTO])); switch (priv->nfproto) { @@ -172,11 +170,13 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - err = nft_validate_register_load(priv->sreg_dev, sizeof(int)); + err = nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev, + sizeof(int)); if (err < 0) return err; - return nft_validate_register_load(priv->sreg_addr, addr_len); + return nft_parse_register_load(tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr, + addr_len); } static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 96371d878e7e5..7ee6c6da50ae9 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -14,7 +14,7 @@ #include struct nft_jhash { - enum nft_registers sreg:8; + u8 sreg; enum nft_registers dreg:8; u8 len; bool autogen_seed:1; @@ -83,7 +83,6 @@ static int nft_jhash_init(const struct nft_ctx *ctx, if (tb[NFTA_HASH_OFFSET]) priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); - priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]); priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); err = nft_parse_u32_check(tb[NFTA_HASH_LEN], U8_MAX, &len); @@ -94,6 +93,10 @@ static int nft_jhash_init(const struct nft_ctx *ctx, priv->len = len; + err = nft_parse_register_load(tb[NFTA_HASH_SREG], &priv->sreg, len); + if (err < 0) + return err; + priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); if (priv->modulus < 1) return -ERANGE; @@ -108,8 +111,7 @@ static int nft_jhash_init(const struct nft_ctx *ctx, get_random_bytes(&priv->seed, sizeof(priv->seed)); } - return nft_validate_register_load(priv->sreg, len) && - nft_validate_register_store(ctx, priv->dreg, NULL, + return nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, sizeof(u32)); } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index f1363b8aabba8..9e87b6d39f517 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -17,7 +17,7 @@ struct nft_lookup { struct nft_set *set; - enum nft_registers sreg:8; + u8 sreg; enum nft_registers dreg:8; bool invert; struct nft_set_binding binding; @@ -76,8 +76,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (IS_ERR(set)) return PTR_ERR(set); - priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]); - err = nft_validate_register_load(priv->sreg, set->klen); + err = nft_parse_register_load(tb[NFTA_LOOKUP_SREG], &priv->sreg, + set->klen); if (err < 0) return err; diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 71390b7270405..9953e80537536 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -15,8 +15,8 @@ struct nft_masq { u32 flags; - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; + u8 sreg_proto_min; + u8 sreg_proto_max; }; static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = { @@ -54,19 +54,15 @@ static int nft_masq_init(const struct nft_ctx *ctx, } if (tb[NFTA_MASQ_REG_PROTO_MIN]) { - priv->sreg_proto_min = - nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MIN]); - - err = nft_validate_register_load(priv->sreg_proto_min, plen); + err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN], + &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_MASQ_REG_PROTO_MAX]) { - priv->sreg_proto_max = - nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MAX]); - - err = nft_validate_register_load(priv->sreg_proto_max, - plen); + err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MAX], + &priv->sreg_proto_max, + plen); if (err < 0) return err; } else { diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index bf4b3ad5314c3..65e231ec1884e 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -661,8 +661,7 @@ int nft_meta_set_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->sreg = nft_parse_register(tb[NFTA_META_SREG]); - err = nft_validate_register_load(priv->sreg, len); + err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len); if (err < 0) return err; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 4bcf33b049c47..0840c635b752e 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -21,10 +21,10 @@ #include struct nft_nat { - enum nft_registers sreg_addr_min:8; - enum nft_registers sreg_addr_max:8; - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; + u8 sreg_addr_min; + u8 sreg_addr_max; + u8 sreg_proto_min; + u8 sreg_proto_max; enum nf_nat_manip_type type:8; u8 family; u16 flags; @@ -206,18 +206,15 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, priv->family = family; if (tb[NFTA_NAT_REG_ADDR_MIN]) { - priv->sreg_addr_min = - nft_parse_register(tb[NFTA_NAT_REG_ADDR_MIN]); - err = nft_validate_register_load(priv->sreg_addr_min, alen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MIN], + &priv->sreg_addr_min, alen); if (err < 0) return err; if (tb[NFTA_NAT_REG_ADDR_MAX]) { - priv->sreg_addr_max = - nft_parse_register(tb[NFTA_NAT_REG_ADDR_MAX]); - - err = nft_validate_register_load(priv->sreg_addr_max, - alen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MAX], + &priv->sreg_addr_max, + alen); if (err < 0) return err; } else { @@ -229,19 +226,15 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, plen = sizeof_field(struct nf_nat_range, min_addr.all); if (tb[NFTA_NAT_REG_PROTO_MIN]) { - priv->sreg_proto_min = - nft_parse_register(tb[NFTA_NAT_REG_PROTO_MIN]); - - err = nft_validate_register_load(priv->sreg_proto_min, plen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN], + &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_NAT_REG_PROTO_MAX]) { - priv->sreg_proto_max = - nft_parse_register(tb[NFTA_NAT_REG_PROTO_MAX]); - - err = nft_validate_register_load(priv->sreg_proto_max, - plen); + err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MAX], + &priv->sreg_proto_max, + plen); if (err < 0) return err; } else { diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 5f9207a9f4851..bc104d36d3bb2 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -95,7 +95,7 @@ static const struct nft_expr_ops nft_objref_ops = { struct nft_objref_map { struct nft_set *set; - enum nft_registers sreg:8; + u8 sreg; struct nft_set_binding binding; }; @@ -137,8 +137,8 @@ static int nft_objref_map_init(const struct nft_ctx *ctx, if (!(set->flags & NFT_SET_OBJECT)) return -EINVAL; - priv->sreg = nft_parse_register(tb[NFTA_OBJREF_SET_SREG]); - err = nft_validate_register_load(priv->sreg, set->klen); + err = nft_parse_register_load(tb[NFTA_OBJREF_SET_SREG], &priv->sreg, + set->klen); if (err < 0) return err; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 47d4e0e216514..d4e88db4116df 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -658,7 +658,6 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); - priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]); if (tb[NFTA_PAYLOAD_CSUM_TYPE]) priv->csum_type = @@ -691,7 +690,8 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - return nft_validate_register_load(priv->sreg, priv->len); + return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg, + priv->len); } static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 23265d757acbc..9ba1de51ac070 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -19,10 +19,10 @@ static u32 jhash_initval __read_mostly; struct nft_queue { - enum nft_registers sreg_qnum:8; - u16 queuenum; - u16 queues_total; - u16 flags; + u8 sreg_qnum; + u16 queuenum; + u16 queues_total; + u16 flags; }; static void nft_queue_eval(const struct nft_expr *expr, @@ -111,8 +111,8 @@ static int nft_queue_sreg_init(const struct nft_ctx *ctx, struct nft_queue *priv = nft_expr_priv(expr); int err; - priv->sreg_qnum = nft_parse_register(tb[NFTA_QUEUE_SREG_QNUM]); - err = nft_validate_register_load(priv->sreg_qnum, sizeof(u32)); + err = nft_parse_register_load(tb[NFTA_QUEUE_SREG_QNUM], + &priv->sreg_qnum, sizeof(u32)); if (err < 0) return err; diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 89efcc5a533d2..e4a1c44d7f513 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -15,7 +15,7 @@ struct nft_range_expr { struct nft_data data_from; struct nft_data data_to; - enum nft_registers sreg:8; + u8 sreg; u8 len; enum nft_range_ops op:8; }; @@ -86,8 +86,8 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr goto err2; } - priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]); - err = nft_validate_register_load(priv->sreg, desc_from.len); + err = nft_parse_register_load(tb[NFTA_RANGE_SREG], &priv->sreg, + desc_from.len); if (err < 0) goto err2; diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 2056051c0af0d..ba09890dddb50 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -14,8 +14,8 @@ #include struct nft_redir { - enum nft_registers sreg_proto_min:8; - enum nft_registers sreg_proto_max:8; + u8 sreg_proto_min; + u8 sreg_proto_max; u16 flags; }; @@ -50,19 +50,15 @@ static int nft_redir_init(const struct nft_ctx *ctx, plen = sizeof_field(struct nf_nat_range, min_addr.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { - priv->sreg_proto_min = - nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MIN]); - - err = nft_validate_register_load(priv->sreg_proto_min, plen); + err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN], + &priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_REDIR_REG_PROTO_MAX]) { - priv->sreg_proto_max = - nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MAX]); - - err = nft_validate_register_load(priv->sreg_proto_max, - plen); + err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MAX], + &priv->sreg_proto_max, + plen); if (err < 0) return err; } else { diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index d67f83a0958d3..43a5a780a6d3b 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -13,9 +13,9 @@ #endif struct nft_tproxy { - enum nft_registers sreg_addr:8; - enum nft_registers sreg_port:8; - u8 family; + u8 sreg_addr; + u8 sreg_port; + u8 family; }; static void nft_tproxy_eval_v4(const struct nft_expr *expr, @@ -247,15 +247,15 @@ static int nft_tproxy_init(const struct nft_ctx *ctx, } if (tb[NFTA_TPROXY_REG_ADDR]) { - priv->sreg_addr = nft_parse_register(tb[NFTA_TPROXY_REG_ADDR]); - err = nft_validate_register_load(priv->sreg_addr, alen); + err = nft_parse_register_load(tb[NFTA_TPROXY_REG_ADDR], + &priv->sreg_addr, alen); if (err < 0) return err; } if (tb[NFTA_TPROXY_REG_PORT]) { - priv->sreg_port = nft_parse_register(tb[NFTA_TPROXY_REG_PORT]); - err = nft_validate_register_load(priv->sreg_port, sizeof(u16)); + err = nft_parse_register_load(tb[NFTA_TPROXY_REG_PORT], + &priv->sreg_port, sizeof(u16)); if (err < 0) return err; } -- GitLab From 345023b0db315648ccc3c1a36aee88304a8b4d91 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 25 Jan 2021 18:27:22 +0100 Subject: [PATCH 1571/2012] netfilter: nftables: add nft_parse_register_store() and use it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This new function combines the netlink register attribute parser and the store validation function. This update requires to replace: enum nft_registers dreg:8; in many of the expression private areas otherwise compiler complains with: error: cannot take address of bit-field ‘dreg’ when passing the register field as reference. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 8 +++--- include/net/netfilter/nf_tables_core.h | 6 ++--- include/net/netfilter/nft_fib.h | 2 +- include/net/netfilter/nft_meta.h | 2 +- net/bridge/netfilter/nft_meta_bridge.c | 5 ++-- net/netfilter/nf_tables_api.c | 34 ++++++++++++++++++++++---- net/netfilter/nft_bitwise.c | 13 +++++----- net/netfilter/nft_byteorder.c | 8 +++--- net/netfilter/nft_ct.c | 7 +++--- net/netfilter/nft_exthdr.c | 8 +++--- net/netfilter/nft_fib.c | 5 ++-- net/netfilter/nft_hash.c | 17 ++++++------- net/netfilter/nft_immediate.c | 6 ++--- net/netfilter/nft_lookup.c | 8 +++--- net/netfilter/nft_meta.c | 5 ++-- net/netfilter/nft_numgen.c | 15 +++++------- net/netfilter/nft_osf.c | 8 +++--- net/netfilter/nft_payload.c | 6 ++--- net/netfilter/nft_rt.c | 7 +++--- net/netfilter/nft_socket.c | 7 +++--- net/netfilter/nft_tunnel.c | 8 +++--- net/netfilter/nft_xfrm.c | 7 +++--- 22 files changed, 100 insertions(+), 92 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 033b31aa38ccf..78d174517749a 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -204,10 +204,10 @@ unsigned int nft_parse_register(const struct nlattr *attr); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len); -int nft_validate_register_store(const struct nft_ctx *ctx, - enum nft_registers reg, - const struct nft_data *data, - enum nft_data_types type, unsigned int len); +int nft_parse_register_store(const struct nft_ctx *ctx, + const struct nlattr *attr, u8 *dreg, + const struct nft_data *data, + enum nft_data_types type, unsigned int len); /** * struct nft_userdata - user defined data associated with an object diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index b7aff03a3f0f9..fd10a7862fdc6 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -27,7 +27,7 @@ struct nft_bitwise_fast_expr { u32 mask; u32 xor; u8 sreg; - enum nft_registers dreg:8; + u8 dreg; }; struct nft_cmp_fast_expr { @@ -40,7 +40,7 @@ struct nft_cmp_fast_expr { struct nft_immediate_expr { struct nft_data data; - enum nft_registers dreg:8; + u8 dreg; u8 dlen; }; @@ -60,7 +60,7 @@ struct nft_payload { enum nft_payload_bases base:8; u8 offset; u8 len; - enum nft_registers dreg:8; + u8 dreg; }; struct nft_payload_set { diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index 628b6fa579cd8..237f3757637e1 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -5,7 +5,7 @@ #include struct nft_fib { - enum nft_registers dreg:8; + u8 dreg; u8 result; u32 flags; }; diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 946fa8c83798e..2dce55c736f40 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -7,7 +7,7 @@ struct nft_meta { enum nft_meta_keys key:8; union { - enum nft_registers dreg:8; + u8 dreg; u8 sreg; }; }; diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index 8e8ffac037cd4..97805ec424c19 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -87,9 +87,8 @@ static int nft_meta_bridge_get_init(const struct nft_ctx *ctx, return nft_meta_get_init(ctx, expr, tb); } - priv->dreg = nft_parse_register(tb[NFTA_META_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } static struct nft_expr_type nft_meta_bridge_type; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1e82ebba230dc..c23163ffb5a1f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4438,6 +4438,12 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, return nft_delset(&ctx, set); } +static int nft_validate_register_store(const struct nft_ctx *ctx, + enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type, + unsigned int len); + static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, @@ -8675,10 +8681,11 @@ EXPORT_SYMBOL_GPL(nft_parse_register_load); * A value of NULL for the data means that its runtime gathered * data. */ -int nft_validate_register_store(const struct nft_ctx *ctx, - enum nft_registers reg, - const struct nft_data *data, - enum nft_data_types type, unsigned int len) +static int nft_validate_register_store(const struct nft_ctx *ctx, + enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type, + unsigned int len) { int err; @@ -8710,7 +8717,24 @@ int nft_validate_register_store(const struct nft_ctx *ctx, return 0; } } -EXPORT_SYMBOL_GPL(nft_validate_register_store); + +int nft_parse_register_store(const struct nft_ctx *ctx, + const struct nlattr *attr, u8 *dreg, + const struct nft_data *data, + enum nft_data_types type, unsigned int len) +{ + int err; + u32 reg; + + reg = nft_parse_register(attr); + err = nft_validate_register_store(ctx, reg, data, type, len); + if (err < 0) + return err; + + *dreg = reg; + return 0; +} +EXPORT_SYMBOL_GPL(nft_parse_register_store); static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { [NFTA_VERDICT_CODE] = { .type = NLA_U32 }, diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 2157970b3cd37..47b0dba95054f 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -17,7 +17,7 @@ struct nft_bitwise { u8 sreg; - enum nft_registers dreg:8; + u8 dreg; enum nft_bitwise_ops op:8; u8 len; struct nft_data mask; @@ -174,9 +174,9 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, if (err < 0) return err; - priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + err = nft_parse_register_store(ctx, tb[NFTA_BITWISE_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); if (err < 0) return err; @@ -320,9 +320,8 @@ static int nft_bitwise_fast_init(const struct nft_ctx *ctx, if (err < 0) return err; - priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + err = nft_parse_register_store(ctx, tb[NFTA_BITWISE_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); if (err < 0) return err; diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 0960563cd5a19..9d5947ab8d4ef 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -17,7 +17,7 @@ struct nft_byteorder { u8 sreg; - enum nft_registers dreg:8; + u8 dreg; enum nft_byteorder_ops op:8; u8 len; u8 size; @@ -142,9 +142,9 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, if (err < 0) return err; - priv->dreg = nft_parse_register(tb[NFTA_BYTEORDER_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + return nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); } static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index c1d7a3c615d8e..882fe8648653d 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -27,7 +27,7 @@ struct nft_ct { enum nft_ct_keys key:8; enum ip_conntrack_dir dir:8; union { - enum nft_registers dreg:8; + u8 dreg; u8 sreg; }; }; @@ -498,9 +498,8 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, } } - priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + err = nft_parse_register_store(ctx, tb[NFTA_CT_DREG], &priv->dreg, NULL, + NFT_DATA_VALUE, len); if (err < 0) return err; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 9c2b3bde1ac9a..f64f0017e9a53 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -19,7 +19,7 @@ struct nft_exthdr { u8 offset; u8 len; u8 op; - enum nft_registers dreg:8; + u8 dreg; u8 sreg; u8 flags; }; @@ -350,12 +350,12 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; priv->len = len; - priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]); priv->flags = flags; priv->op = op; - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + return nft_parse_register_store(ctx, tb[NFTA_EXTHDR_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); } static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 4dfdaeaf09a5b..b10ce732b337c 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -86,7 +86,6 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; priv->result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); - priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]); switch (priv->result) { case NFT_FIB_RESULT_OIF: @@ -106,8 +105,8 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; } - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + err = nft_parse_register_store(ctx, tb[NFTA_FIB_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); if (err < 0) return err; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 7ee6c6da50ae9..f829f5289e162 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -15,7 +15,7 @@ struct nft_jhash { u8 sreg; - enum nft_registers dreg:8; + u8 dreg; u8 len; bool autogen_seed:1; u32 modulus; @@ -38,7 +38,7 @@ static void nft_jhash_eval(const struct nft_expr *expr, } struct nft_symhash { - enum nft_registers dreg:8; + u8 dreg; u32 modulus; u32 offset; }; @@ -83,8 +83,6 @@ static int nft_jhash_init(const struct nft_ctx *ctx, if (tb[NFTA_HASH_OFFSET]) priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); - priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); - err = nft_parse_u32_check(tb[NFTA_HASH_LEN], U8_MAX, &len); if (err < 0) return err; @@ -111,8 +109,8 @@ static int nft_jhash_init(const struct nft_ctx *ctx, get_random_bytes(&priv->seed, sizeof(priv->seed)); } - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); } static int nft_symhash_init(const struct nft_ctx *ctx, @@ -128,8 +126,6 @@ static int nft_symhash_init(const struct nft_ctx *ctx, if (tb[NFTA_HASH_OFFSET]) priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); - priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); - priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); if (priv->modulus < 1) return -ERANGE; @@ -137,8 +133,9 @@ static int nft_symhash_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + sizeof(u32)); } static int nft_jhash_dump(struct sk_buff *skb, diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index c63eb3b171784..90c64d27ae532 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -48,9 +48,9 @@ static int nft_immediate_init(const struct nft_ctx *ctx, priv->dlen = desc.len; - priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, &priv->data, - desc.type, desc.len); + err = nft_parse_register_store(ctx, tb[NFTA_IMMEDIATE_DREG], + &priv->dreg, &priv->data, desc.type, + desc.len); if (err < 0) goto err1; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 9e87b6d39f517..b0f558b4fea54 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -18,7 +18,7 @@ struct nft_lookup { struct nft_set *set; u8 sreg; - enum nft_registers dreg:8; + u8 dreg; bool invert; struct nft_set_binding binding; }; @@ -100,9 +100,9 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (!(set->flags & NFT_SET_MAP)) return -EINVAL; - priv->dreg = nft_parse_register(tb[NFTA_LOOKUP_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - set->dtype, set->dlen); + err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG], + &priv->dreg, NULL, set->dtype, + set->dlen); if (err < 0) return err; } else if (set->flags & NFT_SET_MAP) diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 65e231ec1884e..a7e01e9952f17 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -535,9 +535,8 @@ int nft_meta_get_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_META_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } EXPORT_SYMBOL_GPL(nft_meta_get_init); diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index f1fc824f97370..722cac1e90e0e 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -16,7 +16,7 @@ static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state); struct nft_ng_inc { - enum nft_registers dreg:8; + u8 dreg; u32 modulus; atomic_t counter; u32 offset; @@ -66,11 +66,10 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx, if (priv->offset + priv->modulus - 1 < priv->offset) return -EOVERFLOW; - priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); atomic_set(&priv->counter, priv->modulus - 1); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); } static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, @@ -100,7 +99,7 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) } struct nft_ng_random { - enum nft_registers dreg:8; + u8 dreg; u32 modulus; u32 offset; }; @@ -140,10 +139,8 @@ static int nft_ng_random_init(const struct nft_ctx *ctx, prandom_init_once(&nft_numgen_prandom_state); - priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); - - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, sizeof(u32)); + return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, sizeof(u32)); } static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index c261d57a666ab..ac61f708b82d2 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -6,7 +6,7 @@ #include struct nft_osf { - enum nft_registers dreg:8; + u8 dreg; u8 ttl; u32 flags; }; @@ -78,9 +78,9 @@ static int nft_osf_init(const struct nft_ctx *ctx, priv->flags = flags; } - priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]); - err = nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN); + err = nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, + NFT_OSF_MAXGENRELEN); if (err < 0) return err; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index d4e88db4116df..cb1c8c2318803 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -144,10 +144,10 @@ static int nft_payload_init(const struct nft_ctx *ctx, priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); - priv->dreg = nft_parse_register(tb[NFTA_PAYLOAD_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, priv->len); + return nft_parse_register_store(ctx, tb[NFTA_PAYLOAD_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); } static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index 7cfcb0e2f7ee1..bcd01a63e38f1 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -15,7 +15,7 @@ struct nft_rt { enum nft_rt_keys key:8; - enum nft_registers dreg:8; + u8 dreg; }; static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst) @@ -141,9 +141,8 @@ static int nft_rt_get_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_RT_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_RT_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } static int nft_rt_get_dump(struct sk_buff *skb, diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index a28aca5124cef..c9b8a2b03b713 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -10,7 +10,7 @@ struct nft_socket { enum nft_socket_keys key:8; union { - enum nft_registers dreg:8; + u8 dreg; }; }; @@ -133,9 +133,8 @@ static int nft_socket_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_SOCKET_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_SOCKET_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } static int nft_socket_dump(struct sk_buff *skb, diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index d3eb953d0333b..3b27926d5382c 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -15,7 +15,7 @@ struct nft_tunnel { enum nft_tunnel_keys key:8; - enum nft_registers dreg:8; + u8 dreg; enum nft_tunnel_mode mode:8; }; @@ -93,8 +93,6 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx, return -EOPNOTSUPP; } - priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]); - if (tb[NFTA_TUNNEL_MODE]) { priv->mode = ntohl(nla_get_be32(tb[NFTA_TUNNEL_MODE])); if (priv->mode > NFT_TUNNEL_MODE_MAX) @@ -103,8 +101,8 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx, priv->mode = NFT_TUNNEL_MODE_NONE; } - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_TUNNEL_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } static int nft_tunnel_get_dump(struct sk_buff *skb, diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 06d5cabf1d7c4..cbbbc4ecad3ae 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -24,7 +24,7 @@ static const struct nla_policy nft_xfrm_policy[NFTA_XFRM_MAX + 1] = { struct nft_xfrm { enum nft_xfrm_keys key:8; - enum nft_registers dreg:8; + u8 dreg; u8 dir; u8 spnum; }; @@ -86,9 +86,8 @@ static int nft_xfrm_get_init(const struct nft_ctx *ctx, priv->spnum = spnum; - priv->dreg = nft_parse_register(tb[NFTA_XFRM_DREG]); - return nft_validate_register_store(ctx, priv->dreg, NULL, - NFT_DATA_VALUE, len); + return nft_parse_register_store(ctx, tb[NFTA_XFRM_DREG], &priv->dreg, + NULL, NFT_DATA_VALUE, len); } /* Return true if key asks for daddr/saddr and current -- GitLab From 08a01c11a5bb3de9b0a9c9b2685867e50eda9910 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 25 Jan 2021 23:19:17 +0100 Subject: [PATCH 1572/2012] netfilter: nftables: statify nft_parse_register() This function is not used anymore by any extension, statify it. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 - net/netfilter/nf_tables_api.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 78d174517749a..99d4571fef460 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -200,7 +200,6 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) } int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); -unsigned int nft_parse_register(const struct nlattr *attr); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c23163ffb5a1f..1d76d07592bf8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8595,7 +8595,7 @@ EXPORT_SYMBOL_GPL(nft_parse_u32_check); * Registers used to be 128 bit wide, these register numbers will be * mapped to the corresponding 32 bit register numbers. */ -unsigned int nft_parse_register(const struct nlattr *attr) +static unsigned int nft_parse_register(const struct nlattr *attr) { unsigned int reg; @@ -8607,7 +8607,6 @@ unsigned int nft_parse_register(const struct nlattr *attr) return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; } } -EXPORT_SYMBOL_GPL(nft_parse_register); /** * nft_dump_register - dump a register value to a netlink attribute -- GitLab From 20776b465c0c249f5e5b5b4fe077cd24ef1cda86 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 25 Jan 2021 13:41:16 +0100 Subject: [PATCH 1573/2012] net: switchdev: don't set port_obj_info->handled true when -EOPNOTSUPP It's not true that switchdev_port_obj_notify() only inspects the ->handled field of "struct switchdev_notifier_port_obj_info" if call_switchdev_blocking_notifiers() returns 0 - there's a WARN_ON() triggering for a non-zero return combined with ->handled not being true. But the real problem here is that -EOPNOTSUPP is not being properly handled. The wrapper functions switchdev_handle_port_obj_add() et al change a return value of -EOPNOTSUPP to 0, and the treatment of ->handled in switchdev_port_obj_notify() seems to be designed to change that back to -EOPNOTSUPP in case nobody actually acted on the notifier (i.e., everybody returned -EOPNOTSUPP). Currently, as soon as some device down the stack passes the check_cb() check, ->handled gets set to true, which means that switchdev_port_obj_notify() cannot actually ever return -EOPNOTSUPP. This, for example, means that the detection of hardware offload support in the MRP code is broken: switchdev_port_obj_add() used by br_mrp_switchdev_send_ring_test() always returns 0, so since the MRP code thinks the generation of MRP test frames has been offloaded, no such frames are actually put on the wire. Similarly, br_mrp_switchdev_set_ring_role() also always returns 0, causing mrp->ring_role_offloaded to be set to 1. To fix this, continue to set ->handled true if any callback returns success or any error distinct from -EOPNOTSUPP. But if all the callbacks return -EOPNOTSUPP, make sure that ->handled stays false, so the logic in switchdev_port_obj_notify() can propagate that information. Fixes: 9a9f26e8f7ea ("bridge: mrp: Connect MRP API with the switchdev API") Fixes: f30f0601eb93 ("switchdev: Add helpers to aid traversal through lower devices") Reviewed-by: Petr Machata Signed-off-by: Rasmus Villemoes Link: https://lore.kernel.org/r/20210125124116.102928-1-rasmus.villemoes@prevas.dk Signed-off-by: Jakub Kicinski --- net/switchdev/switchdev.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 23d8685453627..2c1ffc9ba2eb2 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -460,10 +460,11 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev, extack = switchdev_notifier_info_to_extack(&port_obj_info->info); if (check_cb(dev)) { - /* This flag is only checked if the return value is success. */ - port_obj_info->handled = true; - return add_cb(dev, port_obj_info->obj, port_obj_info->trans, - extack); + err = add_cb(dev, port_obj_info->obj, port_obj_info->trans, + extack); + if (err != -EOPNOTSUPP) + port_obj_info->handled = true; + return err; } /* Switch ports might be stacked under e.g. a LAG. Ignore the @@ -515,9 +516,10 @@ static int __switchdev_handle_port_obj_del(struct net_device *dev, int err = -EOPNOTSUPP; if (check_cb(dev)) { - /* This flag is only checked if the return value is success. */ - port_obj_info->handled = true; - return del_cb(dev, port_obj_info->obj); + err = del_cb(dev, port_obj_info->obj); + if (err != -EOPNOTSUPP) + port_obj_info->handled = true; + return err; } /* Switch ports might be stacked under e.g. a LAG. Ignore the @@ -568,9 +570,10 @@ static int __switchdev_handle_port_attr_set(struct net_device *dev, int err = -EOPNOTSUPP; if (check_cb(dev)) { - port_attr_info->handled = true; - return set_cb(dev, port_attr_info->attr, - port_attr_info->trans); + err = set_cb(dev, port_attr_info->attr, port_attr_info->trans); + if (err != -EOPNOTSUPP) + port_attr_info->handled = true; + return err; } /* Switch ports might be stacked under e.g. a LAG. Ignore the -- GitLab From 275b1e88cabb34dbcbe99756b67e9939d34a99b6 Mon Sep 17 00:00:00 2001 From: Di Zhu Date: Mon, 25 Jan 2021 20:42:29 +0800 Subject: [PATCH 1574/2012] pktgen: fix misuse of BUG_ON() in pktgen_thread_worker() pktgen create threads for all online cpus and bond these threads to relevant cpu repecivtily. when this thread firstly be woken up, it will compare cpu currently running with the cpu specified at the time of creation and if the two cpus are not equal, BUG_ON() will take effect causing panic on the system. Notice that these threads could be migrated to other cpus before start running because of the cpu hotplug after these threads have created. so the BUG_ON() used here seems unreasonable and we can replace it with WARN_ON() to just printf a warning other than panic the system. Signed-off-by: Di Zhu Link: https://lore.kernel.org/r/20210125124229.19334-1-zhudi21@huawei.com Signed-off-by: Jakub Kicinski --- net/core/pktgen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 105978604ffdb..3fba429f1f57b 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3464,7 +3464,7 @@ static int pktgen_thread_worker(void *arg) struct pktgen_dev *pkt_dev = NULL; int cpu = t->cpu; - BUG_ON(smp_processor_id() != cpu); + WARN_ON(smp_processor_id() != cpu); init_waitqueue_head(&t->queue); complete(&t->start_done); -- GitLab From 17ce76c4985f166572c7870b86b41dae7b95cb9e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 25 Jan 2021 17:55:12 +0100 Subject: [PATCH 1575/2012] r8169: remove not needed call to rtl_wol_enable_rx from rtl_shutdown rtl_wol_enable_rx() is called via the following call chain if WoL is enabled: rtl8169_down() -> rtl_prepare_power_down() -> rtl_wol_enable_rx() Therefore we don't have to call this function here. Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/34ce78e2-596c-e2ac-16aa-c550fa624c22@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index fb67d8f797ec5..475e6f01ea103 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4850,10 +4850,8 @@ static void rtl_shutdown(struct pci_dev *pdev) rtl_rar_set(tp, tp->dev->perm_addr); if (system_state == SYSTEM_POWER_OFF) { - if (tp->saved_wolopts) { - rtl_wol_enable_rx(tp); + if (tp->saved_wolopts) rtl_wol_shutdown_quirk(tp); - } pci_wake_from_d3(pdev, tp->saved_wolopts); pci_set_power_state(pdev, PCI_D3hot); -- GitLab From 50a13bc3945c9004cf418d181d55ec54cfe4c8ec Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Mon, 25 Jan 2021 10:59:00 -0800 Subject: [PATCH 1576/2012] mptcp: support MPJoin with IPv4 mapped in v6 sk With an IPv4 mapped in v6 socket, we were trying to call inet6_bind() with an IPv4 address resulting in a -EINVAL error because the given addr_len -- size of the address structure -- was too short. We now make sure to use address structures for the same family as the MPTCP socket for both the bind() and the connect(). It means we convert v4 addresses to v4 mapped in v6 or the opposite if needed. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/122 Co-developed-by: Geliang Tang Signed-off-by: Geliang Tang Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 721059916c968..586156281e5a0 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1085,21 +1085,31 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped) #endif static void mptcp_info2sockaddr(const struct mptcp_addr_info *info, - struct sockaddr_storage *addr) + struct sockaddr_storage *addr, + unsigned short family) { memset(addr, 0, sizeof(*addr)); - addr->ss_family = info->family; + addr->ss_family = family; if (addr->ss_family == AF_INET) { struct sockaddr_in *in_addr = (struct sockaddr_in *)addr; - in_addr->sin_addr = info->addr; + if (info->family == AF_INET) + in_addr->sin_addr = info->addr; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + else if (ipv6_addr_v4mapped(&info->addr6)) + in_addr->sin_addr.s_addr = info->addr6.s6_addr32[3]; +#endif in_addr->sin_port = info->port; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (addr->ss_family == AF_INET6) { struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr; - in6_addr->sin6_addr = info->addr6; + if (info->family == AF_INET) + ipv6_addr_set_v4mapped(info->addr.s_addr, + &in6_addr->sin6_addr); + else + in6_addr->sin6_addr = info->addr6; in6_addr->sin6_port = info->port; } #endif @@ -1143,11 +1153,11 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, subflow->remote_key = msk->remote_key; subflow->local_key = msk->local_key; subflow->token = msk->token; - mptcp_info2sockaddr(loc, &addr); + mptcp_info2sockaddr(loc, &addr, ssk->sk_family); addrlen = sizeof(struct sockaddr_in); #if IS_ENABLED(CONFIG_MPTCP_IPV6) - if (loc->family == AF_INET6) + if (addr.ss_family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); #endif ssk->sk_bound_dev_if = loc->ifindex; @@ -1163,7 +1173,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, subflow->remote_id = remote_id; subflow->request_join = 1; subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP); - mptcp_info2sockaddr(remote, &addr); + mptcp_info2sockaddr(remote, &addr, ssk->sk_family); mptcp_add_pending_subflow(msk, subflow); err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); -- GitLab From 7b9b0f7e1230adde95be4958469d60c6fc06bc36 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Mon, 25 Jan 2021 10:59:01 -0800 Subject: [PATCH 1577/2012] mptcp: pm nl: support IPv4 mapped in v6 addresses On one side, we can allow the creation of subflows between v4 mapped in v6 and v4 addresses. For that we look for v4mapped addresses between the local address we want to select and the remote one. On the other side, we also properly deal with received v4mapped addresses, either announced ones or set via Netlink. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/122 Suggested-by: Mat Martineau Co-developed-by: Geliang Tang Signed-off-by: Geliang Tang Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 9b1f6298bbdba..f0afff6ba015f 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -60,15 +60,20 @@ static bool addresses_equal(const struct mptcp_addr_info *a, { bool addr_equals = false; - if (a->family != b->family) - return false; - - if (a->family == AF_INET) - addr_equals = a->addr.s_addr == b->addr.s_addr; + if (a->family == b->family) { + if (a->family == AF_INET) + addr_equals = a->addr.s_addr == b->addr.s_addr; #if IS_ENABLED(CONFIG_MPTCP_IPV6) - else - addr_equals = !ipv6_addr_cmp(&a->addr6, &b->addr6); + else + addr_equals = !ipv6_addr_cmp(&a->addr6, &b->addr6); + } else if (a->family == AF_INET) { + if (ipv6_addr_v4mapped(&b->addr6)) + addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3]; + } else if (b->family == AF_INET) { + if (ipv6_addr_v4mapped(&a->addr6)) + addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr; #endif + } if (!addr_equals) return false; @@ -137,6 +142,7 @@ select_local_address(const struct pm_nl_pernet *pernet, struct mptcp_sock *msk) { struct mptcp_pm_addr_entry *entry, *ret = NULL; + struct sock *sk = (struct sock *)msk; rcu_read_lock(); __mptcp_flush_join_list(msk); @@ -144,11 +150,20 @@ select_local_address(const struct pm_nl_pernet *pernet, if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) continue; + if (entry->addr.family != sk->sk_family) { +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if ((entry->addr.family == AF_INET && + !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) || + (sk->sk_family == AF_INET && + !ipv6_addr_v4mapped(&entry->addr.addr6))) +#endif + continue; + } + /* avoid any address already in use by subflows and * pending join */ - if (entry->addr.family == ((struct sock *)msk)->sk_family && - !lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) { + if (!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) { ret = entry; break; } -- GitLab From 1f2f1931b2a8864ed30e445e360ba793c2ac5224 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Mon, 25 Jan 2021 10:59:02 -0800 Subject: [PATCH 1578/2012] mptcp: pm nl: reduce variable scope To avoid confusions like when working on the previous patch, better to declare and assign this variable only where it is needed. Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index f0afff6ba015f..83976b9ee99bc 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -325,7 +325,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { - struct mptcp_addr_info remote = { 0 }; struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *local; struct pm_nl_pernet *pernet; @@ -359,13 +358,14 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* check if should create a new subflow */ if (msk->pm.local_addr_used < msk->pm.local_addr_max && msk->pm.subflows < msk->pm.subflows_max) { - remote_address((struct sock_common *)sk, &remote); - local = select_local_address(pernet, msk); if (local) { + struct mptcp_addr_info remote = { 0 }; + msk->pm.local_addr_used++; msk->pm.subflows++; check_work_pending(msk); + remote_address((struct sock_common *)sk, &remote); spin_unlock_bh(&msk->pm.lock); __mptcp_subflow_connect(sk, &local->addr, &remote); spin_lock_bh(&msk->pm.lock); -- GitLab From a6094788031d4eaf69f01d512df27bce2a08d156 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 25 Jan 2021 10:59:03 -0800 Subject: [PATCH 1579/2012] selftests: mptcp: add IPv4-mapped IPv6 testcases Here, we make sure we support IPv4-mapped in IPv6 addresses in different contexts: - a v4-mapped address is received by the PM and can be used as v4. - a v4 address is received by the PM and can be used even with a v4 mapped socket. We also make sure we don't try to establish subflows between v4 and v6 addresses, e.g. if a real v6 address ends with a valid v4 address. Co-developed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f74cd993b168e..be34b9ccbd202 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -790,6 +790,81 @@ chk_join_nr "remove subflow and signal IPv6" 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 +# subflow IPv4-mapped to IPv4-mapped +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow +run_tests $ns1 $ns2 "::ffff:10.0.1.1" +chk_join_nr "single subflow IPv4-mapped" 1 1 1 + +# signal address IPv4-mapped with IPv4-mapped sk +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal +run_tests $ns1 $ns2 "::ffff:10.0.1.1" +chk_join_nr "signal address IPv4-mapped" 1 1 1 +chk_add_nr 1 1 + +# subflow v4-map-v6 +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 "::ffff:10.0.1.1" +chk_join_nr "single subflow v4-map-v6" 1 1 1 + +# signal address v4-map-v6 +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +run_tests $ns1 $ns2 "::ffff:10.0.1.1" +chk_join_nr "signal address v4-map-v6" 1 1 1 +chk_add_nr 1 1 + +# subflow v6-map-v4 +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "single subflow v6-map-v4" 1 1 1 + +# signal address v6-map-v4 +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "signal address v6-map-v4" 1 1 1 +chk_add_nr 1 1 + +# no subflow IPv6 to v4 address +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "no JOIN with diff families v4-v6" 0 0 0 + +# no subflow IPv6 to v4 address even if v6 has a valid v4 at the end +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "no JOIN with diff families v4-v6-2" 0 0 0 + +# no subflow IPv4 to v6 address, no need to slow down too then +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 dead:beef:1::1 +chk_join_nr "no JOIN with diff families v6-v4" 0 0 0 + # single subflow, backup reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -- GitLab From 9c2cadefde4872cb486f4933003e72cdfb64a2b4 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Mon, 25 Jan 2021 10:59:04 -0800 Subject: [PATCH 1580/2012] selftests: increase timeout to 10 min On slow systems with kernel debug settings, we can reach the current timeout when all tests are executed. Likely some tests need be improved to remove some 'sleep' and wait (less) for a specific action. This can also improve stability. Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/settings | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings index 026384c189c91..a62d2fa1275c6 100644 --- a/tools/testing/selftests/net/mptcp/settings +++ b/tools/testing/selftests/net/mptcp/settings @@ -1 +1 @@ -timeout=450 +timeout=600 -- GitLab From 69783429cd1386306071c19c0f635423a50cc804 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 26 Jan 2021 08:14:21 +0900 Subject: [PATCH 1581/2012] net: sysctl: remove redundant #ifdef CONFIG_NET CONFIG_NET is a bool option, and this file is compiled only when CONFIG_NET=y. Remove #ifdef CONFIG_NET, which we know it is always met. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20210125231421.105936-1-masahiroy@kernel.org Signed-off-by: Jakub Kicinski --- net/core/sysctl_net_core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d86d8d11cfe4a..4567de519603b 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -309,7 +309,6 @@ proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, #endif static struct ctl_table net_core_table[] = { -#ifdef CONFIG_NET { .procname = "wmem_max", .data = &sysctl_wmem_max, @@ -507,7 +506,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = set_default_qdisc }, #endif -#endif /* CONFIG_NET */ { .procname = "netdev_budget", .data = &netdev_budget, -- GitLab From 8b5f4eb3ab700046b9f41d53544791fa02852551 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 26 Jan 2021 08:16:55 +0900 Subject: [PATCH 1582/2012] net: move CONFIG_NET guard to top Makefile When CONFIG_NET is disabled, nothing under the net/ directory is compiled. Move the CONFIG_NET guard to the top Makefile so the net/ directory is entirely skipped. When Kbuild visits net/Makefile, CONFIG_NET is obvioulsy 'y' because CONFIG_NET is a bool option. Clean up net/Makefile. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20210125231659.106201-1-masahiroy@kernel.org Signed-off-by: Jakub Kicinski --- Makefile | 3 ++- net/Makefile | 11 ++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index b0e4767735dca..61357f7eb55f5 100644 --- a/Makefile +++ b/Makefile @@ -649,7 +649,8 @@ ifeq ($(KBUILD_EXTMOD),) core-y := init/ usr/ drivers-y := drivers/ sound/ drivers-$(CONFIG_SAMPLES) += samples/ -drivers-y += net/ virt/ +drivers-$(CONFIG_NET) += net/ +drivers-y += virt/ libs-y := lib/ endif # KBUILD_EXTMOD diff --git a/net/Makefile b/net/Makefile index d96b0aa8f39f7..6fa3b2e26cabc 100644 --- a/net/Makefile +++ b/net/Makefile @@ -6,20 +6,19 @@ # Rewritten to use lists instead of if-statements. # -obj-$(CONFIG_NET) := devres.o socket.o core/ +obj-y := devres.o socket.o core/ -tmp-$(CONFIG_COMPAT) := compat.o -obj-$(CONFIG_NET) += $(tmp-y) +obj-$(CONFIG_COMPAT) += compat.o # LLC has to be linked before the files in net/802/ obj-$(CONFIG_LLC) += llc/ -obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ ethtool/ +obj-y += ethernet/ 802/ sched/ netlink/ bpf/ ethtool/ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_TLS) += tls/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX_SCM) += unix/ -obj-$(CONFIG_NET) += ipv6/ +obj-y += ipv6/ obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ @@ -63,9 +62,7 @@ obj-$(CONFIG_6LOWPAN) += 6lowpan/ obj-$(CONFIG_IEEE802154) += ieee802154/ obj-$(CONFIG_MAC802154) += mac802154/ -ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o -endif obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ obj-$(CONFIG_CEPH_LIB) += ceph/ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ -- GitLab From 1e328ed559201c4a3733b148b0afe3cfb4ad8b45 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 26 Jan 2021 08:16:56 +0900 Subject: [PATCH 1583/2012] net: dcb: use obj-$(CONFIG_DCB) form in net/Makefile CONFIG_DCB is a bool option. Change the ifeq conditional to the standard obj-$(CONFIG_DCB) form. Use obj-y in net/dcb/Makefile because Kbuild visits this Makefile only when CONFIG_DCB=y. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20210125231659.106201-2-masahiroy@kernel.org Signed-off-by: Jakub Kicinski --- net/Makefile | 4 +--- net/dcb/Makefile | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/net/Makefile b/net/Makefile index 6fa3b2e26cabc..a7e38bd463a44 100644 --- a/net/Makefile +++ b/net/Makefile @@ -55,9 +55,7 @@ obj-$(CONFIG_SMC) += smc/ obj-$(CONFIG_RFKILL) += rfkill/ obj-$(CONFIG_NET_9P) += 9p/ obj-$(CONFIG_CAIF) += caif/ -ifneq ($(CONFIG_DCB),) -obj-y += dcb/ -endif +obj-$(CONFIG_DCB) += dcb/ obj-$(CONFIG_6LOWPAN) += 6lowpan/ obj-$(CONFIG_IEEE802154) += ieee802154/ obj-$(CONFIG_MAC802154) += mac802154/ diff --git a/net/dcb/Makefile b/net/dcb/Makefile index 3016e5a7716a0..2c0fa16ee2a92 100644 --- a/net/dcb/Makefile +++ b/net/dcb/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_DCB) += dcbnl.o dcbevent.o +obj-y += dcbnl.o dcbevent.o -- GitLab From 0cfd99b487f1f1b0661a966c2182115d2989e5c3 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 26 Jan 2021 08:16:57 +0900 Subject: [PATCH 1584/2012] net: switchdev: use obj-$(CONFIG_NET_SWITCHDEV) form in net/Makefile CONFIG_NET_SWITCHDEV is a bool option. Change the ifeq conditional to the standard obj-$(CONFIG_NET_SWITCHDEV) form. Use obj-y in net/switchdev/Makefile because Kbuild visits this Makefile only when CONFIG_NET_SWITCHDEV=y. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20210125231659.106201-3-masahiroy@kernel.org Signed-off-by: Jakub Kicinski --- net/Makefile | 4 +--- net/switchdev/Makefile | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/net/Makefile b/net/Makefile index a7e38bd463a44..a18547c97cbb8 100644 --- a/net/Makefile +++ b/net/Makefile @@ -72,9 +72,7 @@ obj-$(CONFIG_VSOCKETS) += vmw_vsock/ obj-$(CONFIG_MPLS) += mpls/ obj-$(CONFIG_NET_NSH) += nsh/ obj-$(CONFIG_HSR) += hsr/ -ifneq ($(CONFIG_NET_SWITCHDEV),) -obj-y += switchdev/ -endif +obj-$(CONFIG_NET_SWITCHDEV) += switchdev/ ifneq ($(CONFIG_NET_L3_MASTER_DEV),) obj-y += l3mdev/ endif diff --git a/net/switchdev/Makefile b/net/switchdev/Makefile index bd69a3136e76a..c5561d7f3a7c5 100644 --- a/net/switchdev/Makefile +++ b/net/switchdev/Makefile @@ -3,4 +3,4 @@ # Makefile for the Switch device API # -obj-$(CONFIG_NET_SWITCHDEV) += switchdev.o +obj-y += switchdev.o -- GitLab From d32f834cd6873d9a5ed18ad028700f60d1688cf3 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 26 Jan 2021 08:16:58 +0900 Subject: [PATCH 1585/2012] net: l3mdev: use obj-$(CONFIG_NET_L3_MASTER_DEV) form in net/Makefile CONFIG_NET_L3_MASTER_DEV is a bool option. Change the ifeq conditional to the standard obj-$(CONFIG_NET_L3_MASTER_DEV) form. Use obj-y in net/l3mdev/Makefile because Kbuild visits this Makefile only when CONFIG_NET_L3_MASTER_DEV=y. Signed-off-by: Masahiro Yamada Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210125231659.106201-4-masahiroy@kernel.org Signed-off-by: Jakub Kicinski --- net/Makefile | 4 +--- net/l3mdev/Makefile | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/net/Makefile b/net/Makefile index a18547c97cbb8..9ca9572188feb 100644 --- a/net/Makefile +++ b/net/Makefile @@ -73,9 +73,7 @@ obj-$(CONFIG_MPLS) += mpls/ obj-$(CONFIG_NET_NSH) += nsh/ obj-$(CONFIG_HSR) += hsr/ obj-$(CONFIG_NET_SWITCHDEV) += switchdev/ -ifneq ($(CONFIG_NET_L3_MASTER_DEV),) -obj-y += l3mdev/ -endif +obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev/ obj-$(CONFIG_QRTR) += qrtr/ obj-$(CONFIG_NET_NCSI) += ncsi/ obj-$(CONFIG_XDP_SOCKETS) += xdp/ diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile index 59755a9e2f9bb..9e7da0acc58cd 100644 --- a/net/l3mdev/Makefile +++ b/net/l3mdev/Makefile @@ -3,4 +3,4 @@ # Makefile for the L3 device API # -obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o +obj-y += l3mdev.o -- GitLab From 864e898ba3f6a7974d35efb604a1345d50e45f91 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 26 Jan 2021 08:20:26 +0900 Subject: [PATCH 1586/2012] net: remove redundant 'depends on NET' These Kconfig files are included from net/Kconfig, inside the if NET ... endif. Remove 'depends on NET', which we know it is already met. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20210125232026.106855-1-masahiroy@kernel.org Signed-off-by: Jakub Kicinski --- net/9p/Kconfig | 1 - net/batman-adv/Kconfig | 1 - net/bluetooth/Kconfig | 2 +- net/bpfilter/Kconfig | 2 +- net/can/Kconfig | 1 - net/dns_resolver/Kconfig | 2 +- net/ife/Kconfig | 1 - net/llc/Kconfig | 1 - net/netfilter/Kconfig | 2 +- net/netfilter/ipvs/Kconfig | 2 +- net/nfc/Kconfig | 1 - net/psample/Kconfig | 1 - 12 files changed, 5 insertions(+), 12 deletions(-) diff --git a/net/9p/Kconfig b/net/9p/Kconfig index 3d11fec3a8dc7..64468c49791f1 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -4,7 +4,6 @@ # menuconfig NET_9P - depends on NET tristate "Plan 9 Resource Sharing Support (9P2000)" help If you say Y here, you will get experimental support for diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 993afd5ff7bba..43ae3dcbbbeba 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -9,7 +9,6 @@ config BATMAN_ADV tristate "B.A.T.M.A.N. Advanced Meshing Protocol" - depends on NET select LIBCRC32C help B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index 64e669acd42f9..400c5130dc0a0 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -5,7 +5,7 @@ menuconfig BT tristate "Bluetooth subsystem support" - depends on NET && !S390 + depends on !S390 depends on RFKILL || !RFKILL select CRC16 select CRYPTO diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig index 8ad0233ce4978..3d4a214624583 100644 --- a/net/bpfilter/Kconfig +++ b/net/bpfilter/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only menuconfig BPFILTER bool "BPF based packet filtering framework (BPFILTER)" - depends on NET && BPF && INET + depends on BPF && INET select USERMODE_DRIVER help This builds experimental bpfilter framework that is aiming to diff --git a/net/can/Kconfig b/net/can/Kconfig index 7c9958df91d35..a9ac5ffab286a 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -4,7 +4,6 @@ # menuconfig CAN - depends on NET tristate "CAN bus subsystem support" help Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig index 255df9b6e9e8d..155b061634092 100644 --- a/net/dns_resolver/Kconfig +++ b/net/dns_resolver/Kconfig @@ -4,7 +4,7 @@ # config DNS_RESOLVER tristate "DNS Resolver support" - depends on NET && KEYS + depends on KEYS help Saying Y here will include support for the DNS Resolver key type which can be used to make upcalls to perform DNS lookups in diff --git a/net/ife/Kconfig b/net/ife/Kconfig index bcf650564db4a..de36a5b91e50a 100644 --- a/net/ife/Kconfig +++ b/net/ife/Kconfig @@ -4,7 +4,6 @@ # menuconfig NET_IFE - depends on NET tristate "Inter-FE based on IETF ForCES InterFE LFB" default n help diff --git a/net/llc/Kconfig b/net/llc/Kconfig index b0e646ac47eb5..7f79f5e134f9b 100644 --- a/net/llc/Kconfig +++ b/net/llc/Kconfig @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config LLC tristate - depends on NET config LLC2 tristate "ANSI/IEEE 802.2 LLC type 2 Support" diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 49fbef0d99bef..1a92063c73a41 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only menu "Core Netfilter Configuration" - depends on NET && INET && NETFILTER + depends on INET && NETFILTER config NETFILTER_INGRESS bool "Netfilter ingress support" diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index eb0e329f9b8d6..c39a1e35c104b 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -4,7 +4,7 @@ # menuconfig IP_VS tristate "IP virtual server support" - depends on NET && INET && NETFILTER + depends on INET && NETFILTER depends on (NF_CONNTRACK || NF_CONNTRACK=n) help IP Virtual Server support will let you build a high-performance diff --git a/net/nfc/Kconfig b/net/nfc/Kconfig index 96b91674dd37b..466a0279b93e1 100644 --- a/net/nfc/Kconfig +++ b/net/nfc/Kconfig @@ -4,7 +4,6 @@ # menuconfig NFC - depends on NET depends on RFKILL || !RFKILL tristate "NFC subsystem support" default n diff --git a/net/psample/Kconfig b/net/psample/Kconfig index 028f514a9c601..be0b839209ba0 100644 --- a/net/psample/Kconfig +++ b/net/psample/Kconfig @@ -4,7 +4,6 @@ # menuconfig PSAMPLE - depends on NET tristate "Packet-sampling netlink channel" default n help -- GitLab From 3f96d644976825986a93b7b9fe6a9900a80f2e11 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 26 Jan 2021 03:02:14 +0300 Subject: [PATCH 1587/2012] net: decnet: fix netdev refcount leaking on error path On building the route there is an assumption that the destination could be local. In this case loopback_dev is used to get the address. If the address is still cannot be retrieved dn_route_output_slow returns EADDRNOTAVAIL with loopback_dev reference taken. Cannot find hash for the fixes tag because this code was introduced long time ago. I don't think that this bug has ever fired but the patch is done just to have a consistent code base. Signed-off-by: Vadim Fedorenko Link: https://lore.kernel.org/r/1611619334-20955-1-git-send-email-vfedorenko@novek.ru Signed-off-by: Jakub Kicinski --- net/decnet/dn_route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 4cac31d22a502..2193ae529e752 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1035,7 +1035,7 @@ source_ok: fld.saddr = dnet_select_source(dev_out, 0, RT_SCOPE_HOST); if (!fld.daddr) - goto out; + goto done; } fld.flowidn_oif = LOOPBACK_IFINDEX; res.type = RTN_LOCAL; -- GitLab From 1d96006dccf0994dd91f327f59493afd6398b01d Mon Sep 17 00:00:00 2001 From: Jiapeng Zhong Date: Tue, 26 Jan 2021 16:13:03 +0800 Subject: [PATCH 1588/2012] rocker: Simplify the calculation of variables Fix the following coccicheck warnings: ./drivers/net/ethernet/rocker/rocker_ofdpa.c:926:34-36: WARNING !A || A && B is equivalent to !A || B. Reported-by: Abaci Robot Signed-off-by: Jiapeng Zhong Link: https://lore.kernel.org/r/1611648783-3916-1-git-send-email-abaci-bugfix@linux.alibaba.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/rocker/rocker_ofdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index d067da1ef0706..967a634ee9ac1 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -923,7 +923,7 @@ static int ofdpa_flow_tbl_bridge(struct ofdpa_port *ofdpa_port, struct ofdpa_flow_tbl_entry *entry; u32 priority; bool vlan_bridging = !!vlan_id; - bool dflt = !eth_dst || (eth_dst && eth_dst_mask); + bool dflt = !eth_dst || eth_dst_mask; bool wild = false; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); -- GitLab From 89268b056ed116e13ba39f46481ad8bf5eef7bc4 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 26 Jan 2021 11:35:32 +0200 Subject: [PATCH 1589/2012] net: bridge: multicast: add per-port EHT hosts limit Add a default limit of 512 for number of tracked EHT hosts per-port. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 1 + net/bridge/br_multicast_eht.c | 7 +++++++ net/bridge/br_private.h | 2 ++ net/bridge/br_private_mcast_eht.h | 26 ++++++++++++++++++++++++++ 4 files changed, 36 insertions(+) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 3aa2833f60c7c..6f672eb7ff33e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1608,6 +1608,7 @@ static void br_mc_disabled_update(struct net_device *dev, bool value) int br_multicast_add_port(struct net_bridge_port *port) { port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; + port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT; timer_setup(&port->multicast_router_timer, br_multicast_router_expired, 0); diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c index ff9b3ba37cabb..445768c8495f1 100644 --- a/net/bridge/br_multicast_eht.c +++ b/net/bridge/br_multicast_eht.c @@ -127,6 +127,8 @@ static void __eht_destroy_host(struct net_bridge_group_eht_host *eht_host) { WARN_ON(!hlist_empty(&eht_host->set_entries)); + br_multicast_eht_hosts_dec(eht_host->pg); + rb_erase(&eht_host->rb_node, &eht_host->pg->eht_host_tree); RB_CLEAR_NODE(&eht_host->rb_node); kfree(eht_host); @@ -257,6 +259,9 @@ __eht_lookup_create_host(struct net_bridge_port_group *pg, return this; } + if (br_multicast_eht_hosts_over_limit(pg)) + return NULL; + eht_host = kzalloc(sizeof(*eht_host), GFP_ATOMIC); if (!eht_host) return NULL; @@ -269,6 +274,8 @@ __eht_lookup_create_host(struct net_bridge_port_group *pg, rb_link_node(&eht_host->rb_node, parent, link); rb_insert_color(&eht_host->rb_node, &pg->eht_host_tree); + br_multicast_eht_hosts_inc(pg); + return eht_host; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 0e26ba623006c..d242ba668e473 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -310,6 +310,8 @@ struct net_bridge_port { #if IS_ENABLED(CONFIG_IPV6) struct bridge_mcast_own_query ip6_own_query; #endif /* IS_ENABLED(CONFIG_IPV6) */ + u32 multicast_eht_hosts_limit; + u32 multicast_eht_hosts_cnt; unsigned char multicast_router; struct bridge_mcast_stats __percpu *mcast_stats; struct timer_list multicast_router_timer; diff --git a/net/bridge/br_private_mcast_eht.h b/net/bridge/br_private_mcast_eht.h index 9daffa3ad8d5c..b2c8d988721f7 100644 --- a/net/bridge/br_private_mcast_eht.h +++ b/net/bridge/br_private_mcast_eht.h @@ -4,6 +4,8 @@ #ifndef _BR_PRIVATE_MCAST_EHT_H_ #define _BR_PRIVATE_MCAST_EHT_H_ +#define BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT 512 + union net_bridge_eht_addr { __be32 ip4; #if IS_ENABLED(CONFIG_IPV6) @@ -47,6 +49,7 @@ struct net_bridge_group_eht_set { struct net_bridge_mcast_gc mcast_gc; }; +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg); bool br_multicast_eht_handle(struct net_bridge_port_group *pg, void *h_addr, @@ -62,4 +65,27 @@ br_multicast_eht_should_del_pg(const struct net_bridge_port_group *pg) RB_EMPTY_ROOT(&pg->eht_host_tree)); } +static inline bool +br_multicast_eht_hosts_over_limit(const struct net_bridge_port_group *pg) +{ + const struct net_bridge_port *p = pg->key.port; + + return !!(p->multicast_eht_hosts_cnt >= p->multicast_eht_hosts_limit); +} + +static inline void br_multicast_eht_hosts_inc(struct net_bridge_port_group *pg) +{ + struct net_bridge_port *p = pg->key.port; + + p->multicast_eht_hosts_cnt++; +} + +static inline void br_multicast_eht_hosts_dec(struct net_bridge_port_group *pg) +{ + struct net_bridge_port *p = pg->key.port; + + p->multicast_eht_hosts_cnt--; +} +#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */ + #endif /* _BR_PRIVATE_MCAST_EHT_H_ */ -- GitLab From 2dba407f994e5b0eb3b70a8cb280e014ec4a7ff3 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 26 Jan 2021 11:35:33 +0200 Subject: [PATCH 1590/2012] net: bridge: multicast: make tracked EHT hosts limit configurable Add two new port attributes which make EHT hosts limit configurable and export the current number of tracked EHT hosts: - IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT: configure/retrieve current limit - IFLA_BRPORT_MCAST_EHT_HOSTS_CNT: current number of tracked hosts Setting IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT to 0 is currently not allowed. Note that we have to increase RTNL_SLAVE_MAX_TYPE to 38 minimum, I've increased it to 40 to have space for two more future entries. v2: move br_multicast_eht_set_hosts_limit() to br_multicast_eht.c, no functional change Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- include/uapi/linux/if_link.h | 2 ++ net/bridge/br_multicast_eht.c | 15 +++++++++++++++ net/bridge/br_netlink.c | 19 ++++++++++++++++++- net/bridge/br_private_mcast_eht.h | 2 ++ net/bridge/br_sysfs_if.c | 26 ++++++++++++++++++++++++++ net/core/rtnetlink.c | 2 +- 6 files changed, 64 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2bd0d8bbcdb25..eb8018c3a7372 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -525,6 +525,8 @@ enum { IFLA_BRPORT_BACKUP_PORT, IFLA_BRPORT_MRP_RING_OPEN, IFLA_BRPORT_MRP_IN_OPEN, + IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, + IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c index 445768c8495f1..fea38b9a72688 100644 --- a/net/bridge/br_multicast_eht.c +++ b/net/bridge/br_multicast_eht.c @@ -861,3 +861,18 @@ bool br_multicast_eht_handle(struct net_bridge_port_group *pg, out: return changed; } + +int br_multicast_eht_set_hosts_limit(struct net_bridge_port *p, + u32 eht_hosts_limit) +{ + struct net_bridge *br = p->br; + + if (!eht_hosts_limit) + return -EINVAL; + + spin_lock_bh(&br->multicast_lock); + p->multicast_eht_hosts_limit = eht_hosts_limit; + spin_unlock_bh(&br->multicast_lock); + + return 0; +} diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 762f273802cda..bd3962da345a3 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -18,6 +18,7 @@ #include "br_private_stp.h" #include "br_private_cfm.h" #include "br_private_tunnel.h" +#include "br_private_mcast_eht.h" static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg, u32 filter_mask) @@ -199,6 +200,8 @@ static inline size_t br_port_info_size(void) + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */ + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_RING_OPEN */ + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_IN_OPEN */ + + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT */ + + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_EHT_HOSTS_CNT */ + 0; } @@ -283,7 +286,11 @@ static int br_port_fill_attrs(struct sk_buff *skb, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING if (nla_put_u8(skb, IFLA_BRPORT_MULTICAST_ROUTER, - p->multicast_router)) + p->multicast_router) || + nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, + p->multicast_eht_hosts_limit) || + nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, + p->multicast_eht_hosts_cnt)) return -EMSGSIZE; #endif @@ -820,6 +827,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 }, [IFLA_BRPORT_ISOLATED] = { .type = NLA_U8 }, [IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 }, + [IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 }, }; /* Change the state of the port and notify spanning tree */ @@ -955,6 +963,15 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) if (err) return err; } + + if (tb[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT]) { + u32 hlimit; + + hlimit = nla_get_u32(tb[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT]); + err = br_multicast_eht_set_hosts_limit(p, hlimit); + if (err) + return err; + } #endif if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) { diff --git a/net/bridge/br_private_mcast_eht.h b/net/bridge/br_private_mcast_eht.h index b2c8d988721f7..f89049f4892c9 100644 --- a/net/bridge/br_private_mcast_eht.h +++ b/net/bridge/br_private_mcast_eht.h @@ -57,6 +57,8 @@ bool br_multicast_eht_handle(struct net_bridge_port_group *pg, u32 nsrcs, size_t addr_size, int grec_type); +int br_multicast_eht_set_hosts_limit(struct net_bridge_port *p, + u32 eht_hosts_limit); static inline bool br_multicast_eht_should_del_pg(const struct net_bridge_port_group *pg) diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 7a59cdddd3ce3..b66305fae26b0 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -16,6 +16,7 @@ #include #include "br_private.h" +#include "br_private_mcast_eht.h" struct brport_attribute { struct attribute attr; @@ -245,6 +246,29 @@ static int store_multicast_router(struct net_bridge_port *p, static BRPORT_ATTR(multicast_router, 0644, show_multicast_router, store_multicast_router); +static ssize_t show_multicast_eht_hosts_limit(struct net_bridge_port *p, + char *buf) +{ + return sprintf(buf, "%u\n", p->multicast_eht_hosts_limit); +} + +static int store_multicast_eht_hosts_limit(struct net_bridge_port *p, + unsigned long v) +{ + return br_multicast_eht_set_hosts_limit(p, v); +} +static BRPORT_ATTR(multicast_eht_hosts_limit, 0644, + show_multicast_eht_hosts_limit, + store_multicast_eht_hosts_limit); + +static ssize_t show_multicast_eht_hosts_cnt(struct net_bridge_port *p, + char *buf) +{ + return sprintf(buf, "%u\n", p->multicast_eht_hosts_cnt); +} +static BRPORT_ATTR(multicast_eht_hosts_cnt, 0444, show_multicast_eht_hosts_cnt, + NULL); + BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE); BRPORT_ATTR_FLAG(multicast_to_unicast, BR_MULTICAST_TO_UNICAST); #endif @@ -274,6 +298,8 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_multicast_router, &brport_attr_multicast_fast_leave, &brport_attr_multicast_to_unicast, + &brport_attr_multicast_eht_hosts_limit, + &brport_attr_multicast_eht_hosts_cnt, #endif &brport_attr_proxyarp, &brport_attr_proxyarp_wifi, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3d6ab194d0f58..c313aaf2bce1b 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -55,7 +55,7 @@ #include #define RTNL_MAX_TYPE 50 -#define RTNL_SLAVE_MAX_TYPE 36 +#define RTNL_SLAVE_MAX_TYPE 40 struct rtnl_link { rtnl_doit_func doit; -- GitLab From b770753c7b08f1f6008d0d364180fc123f7b25e2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 26 Jan 2021 18:18:44 -0800 Subject: [PATCH 1591/2012] MAINTAINERS: add missing header for bonding include/net/bonding.h is missing from bonding entry. Link: https://lore.kernel.org/r/20210127021844.4071706-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1a5facde5a908..964d0c03d4388 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3239,6 +3239,7 @@ L: netdev@vger.kernel.org S: Supported W: http://sourceforge.net/projects/bonding/ F: drivers/net/bonding/ +F: include/net/bonding.h F: include/uapi/linux/if_bonding.h BOSCH SENSORTEC BMA400 ACCELEROMETER IIO DRIVER -- GitLab From 2a9063b7fface7e665e9be62e14aa8b0ed207e2f Mon Sep 17 00:00:00 2001 From: Hoang Huu Le Date: Wed, 27 Jan 2021 09:51:23 +0700 Subject: [PATCH 1592/2012] tipc: remove duplicated code in tipc_msg_create Remove a duplicate code checking for header size in tipc_msg_create() as it's already being done in tipc_msg_init(). Acked-by: Jon Maloy Signed-off-by: Hoang Huu Le Link: https://lore.kernel.org/r/20210127025123.6390-1-hoang.h.le@dektech.com.au Signed-off-by: Jakub Kicinski --- net/tipc/msg.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 2aca86021df5a..e9263280a2d4a 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -117,10 +117,6 @@ struct sk_buff *tipc_msg_create(uint user, uint type, msg_set_origport(msg, oport); msg_set_destport(msg, dport); msg_set_errcode(msg, errcode); - if (hdr_sz > SHORT_H_SIZE) { - msg_set_orignode(msg, onode); - msg_set_destnode(msg, dnode); - } return buf; } -- GitLab From e41aec79e62fa50f940cf222d1e9577f14e149dc Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Wed, 27 Jan 2021 19:34:42 -0600 Subject: [PATCH 1593/2012] ibmvnic: Ensure that CRQ entry read are correctly ordered Ensure that received Command-Response Queue (CRQ) entries are properly read in order by the driver. dma_rmb barrier has been added before accessing the CRQ descriptor to ensure the entire descriptor is read before processing. Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol") Signed-off-by: Lijun Pan Link: https://lore.kernel.org/r/20210128013442.88319-1-ljp@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 9778c83150f1c..8820c98ea891e 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -5084,6 +5084,12 @@ static void ibmvnic_tasklet(struct tasklet_struct *t) while (!done) { /* Pull all the valid messages off the CRQ */ while ((crq = ibmvnic_next_crq(adapter)) != NULL) { + /* This barrier makes sure ibmvnic_next_crq()'s + * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded + * before ibmvnic_handle_crq()'s + * switch(gen_crq->first) and switch(gen_crq->cmd). + */ + dma_rmb(); ibmvnic_handle_crq(crq, adapter); crq->generic.first = 0; } -- GitLab From 63368a7416df144b713ef1a887dba11796907e05 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Wed, 27 Jan 2021 01:32:10 +0100 Subject: [PATCH 1594/2012] net: dsa: mv88e6xxx: Make global2 support mandatory Early generations of the mv88e6xxx did not have the global 2 registers. In order to keep the driver slim, it was decided to make the code for these registers optional. Over time, more generations of switches have been added, always supporting global 2 and adding more and more registers. No effort has been made to keep these additional registers also optional to slim the driver down when used for older generations. Optional global 2 now just gives additional development and maintenance burden for no real gain. Make global 2 support always compiled in. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210127003210.663173-1-andrew@lunn.ch Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/Kconfig | 12 -- drivers/net/dsa/mv88e6xxx/Makefile | 6 +- drivers/net/dsa/mv88e6xxx/chip.c | 4 - drivers/net/dsa/mv88e6xxx/global2.h | 194 ---------------------------- 4 files changed, 3 insertions(+), 213 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig index b17540926c116..05af632b0f597 100644 --- a/drivers/net/dsa/mv88e6xxx/Kconfig +++ b/drivers/net/dsa/mv88e6xxx/Kconfig @@ -9,21 +9,9 @@ config NET_DSA_MV88E6XXX This driver adds support for most of the Marvell 88E6xxx models of Ethernet switch chips, except 88E6060. -config NET_DSA_MV88E6XXX_GLOBAL2 - bool "Switch Global 2 Registers support" - default y - depends on NET_DSA_MV88E6XXX - help - This registers set at internal SMI address 0x1C provides extended - features like EEPROM interface, trunking, cross-chip setup, etc. - - It is required on most chips. If the chip you compile the support for - doesn't have such registers set, say N here. In doubt, say Y. - config NET_DSA_MV88E6XXX_PTP bool "PTP support for Marvell 88E6xxx" default n - depends on NET_DSA_MV88E6XXX_GLOBAL2 depends on PTP_1588_CLOCK help Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile index 4b080b448ce74..c8eca2b6f9594 100644 --- a/drivers/net/dsa/mv88e6xxx/Makefile +++ b/drivers/net/dsa/mv88e6xxx/Makefile @@ -5,9 +5,9 @@ mv88e6xxx-objs += devlink.o mv88e6xxx-objs += global1.o mv88e6xxx-objs += global1_atu.o mv88e6xxx-objs += global1_vtu.o -mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o -mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_avb.o -mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_scratch.o +mv88e6xxx-objs += global2.o +mv88e6xxx-objs += global2_avb.o +mv88e6xxx-objs += global2_scratch.o mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += hwtstamp.o mv88e6xxx-objs += phy.o mv88e6xxx-objs += port.o diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 5143649479442..b99f27b8c0848 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -5227,10 +5227,6 @@ static int mv88e6xxx_detect(struct mv88e6xxx_chip *chip) /* Update the compatible info with the probed one */ chip->info = info; - err = mv88e6xxx_g2_require(chip); - if (err) - return err; - dev_info(chip->dev, "switch 0x%x detected: %s, revision %u\n", chip->info->prod_num, chip->info->name, rev); diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index 253a79582a1d0..4127f82275ada 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -296,13 +296,6 @@ #define MV88E6352_G2_SCRATCH_GPIO_PCTL_TRIG 1 #define MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ 2 -#ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 - -static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) -{ - return 0; -} - int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val); int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val); int mv88e6xxx_g2_wait_bit(struct mv88e6xxx_chip *chip, int reg, @@ -370,191 +363,4 @@ int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip, int mv88e6xxx_g2_atu_stats_set(struct mv88e6xxx_chip *chip, u16 kind, u16 bin); int mv88e6xxx_g2_atu_stats_get(struct mv88e6xxx_chip *chip, u16 *stats); -#else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ - -static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) -{ - if (chip->info->global2_addr) { - dev_err(chip->dev, "this chip requires CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 enabled\n"); - return -EOPNOTSUPP; - } - - return 0; -} - -static inline int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_wait_bit(struct mv88e6xxx_chip *chip, - int reg, int bit, int val) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip, - int port) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6390_g2_irl_init_all(struct mv88e6xxx_chip *chip, - int port) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, - struct mii_bus *bus, - int addr, int reg, u16 *val) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, - struct mii_bus *bus, - int addr, int reg, u16 val) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, - u8 *addr) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_get_eeprom8(struct mv88e6xxx_chip *chip, - struct ethtool_eeprom *eeprom, - u8 *data) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_set_eeprom8(struct mv88e6xxx_chip *chip, - struct ethtool_eeprom *eeprom, - u8 *data) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip, - struct ethtool_eeprom *eeprom, - u8 *data) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, - struct ethtool_eeprom *eeprom, - u8 *data) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_pvt_write(struct mv88e6xxx_chip *chip, - int src_dev, int src_port, u16 data) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_misc_4_bit_port(struct mv88e6xxx_chip *chip) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip) -{ - return -EOPNOTSUPP; -} - -static inline void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip) -{ -} - -static inline int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip, - struct mii_bus *bus) -{ - return 0; -} - -static inline void mv88e6xxx_g2_irq_mdio_free(struct mv88e6xxx_chip *chip, - struct mii_bus *bus) -{ -} - -static inline int mv88e6185_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6352_g2_mgmt_rsvd2cpu(struct mv88e6xxx_chip *chip) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip) -{ - return -EOPNOTSUPP; -} - -static const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops = {}; -static const struct mv88e6xxx_irq_ops mv88e6250_watchdog_ops = {}; -static const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {}; - -static const struct mv88e6xxx_avb_ops mv88e6165_avb_ops = {}; -static const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {}; -static const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {}; - -static const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {}; - -static inline int mv88e6xxx_g2_scratch_gpio_set_smi(struct mv88e6xxx_chip *chip, - bool external) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, - int num, bool hash, u16 mask) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, - int id, u16 map) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, - int target, int port) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_atu_stats_set(struct mv88e6xxx_chip *chip, - u16 kind, u16 bin) -{ - return -EOPNOTSUPP; -} - -static inline int mv88e6xxx_g2_atu_stats_get(struct mv88e6xxx_chip *chip, - u16 *stats) -{ - return -EOPNOTSUPP; -} - -#endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ - #endif /* _MV88E6XXX_GLOBAL2_H */ -- GitLab From e78ab164591ffd55d2771401ed0d9b083dad55fa Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:06 -0800 Subject: [PATCH 1595/2012] devlink: Add DMAC filter generic packet trap Add packet trap that can report packets that were dropped due to destination MAC filtering. Signed-off-by: Aya Levin Reviewed-by: Ido Schimmel Reviewed-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- Documentation/networking/devlink/devlink-trap.rst | 5 +++++ include/net/devlink.h | 3 +++ net/core/devlink.c | 1 + 3 files changed, 9 insertions(+) diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst index d875f3e1e9cf6..935b6397e8cf6 100644 --- a/Documentation/networking/devlink/devlink-trap.rst +++ b/Documentation/networking/devlink/devlink-trap.rst @@ -480,6 +480,11 @@ be added to the following table: - ``drop`` - Traps packets that the device decided to drop in case they hit a blackhole nexthop + * - ``dmac_filter`` + - ``drop`` + - Traps incoming packets that the device decided to drop because + the destination MAC is not configured in the MAC table and + the interface is not in promiscuous mode Driver-specific Packet Traps ============================ diff --git a/include/net/devlink.h b/include/net/devlink.h index d12ed2854c34b..426b98e74b6e0 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -838,6 +838,7 @@ enum devlink_trap_generic_id { DEVLINK_TRAP_GENERIC_ID_GTP_PARSING, DEVLINK_TRAP_GENERIC_ID_ESP_PARSING, DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_NEXTHOP, + DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER, /* Add new generic trap IDs above */ __DEVLINK_TRAP_GENERIC_ID_MAX, @@ -1063,6 +1064,8 @@ enum devlink_trap_group_generic_id { "esp_parsing" #define DEVLINK_TRAP_GENERIC_NAME_BLACKHOLE_NEXTHOP \ "blackhole_nexthop" +#define DEVLINK_TRAP_GENERIC_NAME_DMAC_FILTER \ + "dest_mac_filter" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \ "l2_drops" diff --git a/net/core/devlink.c b/net/core/devlink.c index 72ea798797627..f6e6445b9801e 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -9512,6 +9512,7 @@ static const struct devlink_trap devlink_trap_generic[] = { DEVLINK_TRAP(GTP_PARSING, DROP), DEVLINK_TRAP(ESP_PARSING, DROP), DEVLINK_TRAP(BLACKHOLE_NEXTHOP, DROP), + DEVLINK_TRAP(DMAC_FILTER, DROP), }; #define DEVLINK_TRAP_GROUP(_id) \ -- GitLab From 3d347b1b19da20f973d1d3c6bb60c11185606afd Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:07 -0800 Subject: [PATCH 1596/2012] net/mlx5: Add support for devlink traps in mlx5 core driver Add devlink traps infra-structure to mlx5 core driver. Add traps list to mlx5_priv and corresponding API: - mlx5_devlink_trap_report() to wrap trap reports to devlink - mlx5_devlink_trap_get_num_active() to decide whether to open/close trap resources. Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 84 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/devlink.h | 16 ++++ .../net/ethernet/mellanox/mlx5/core/main.c | 2 + include/linux/mlx5/driver.h | 1 + 4 files changed, 103 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 3261d0dc11044..f04afaf785cb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -168,6 +168,56 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a return 0; } +static struct mlx5_devlink_trap *mlx5_find_trap_by_id(struct mlx5_core_dev *dev, int trap_id) +{ + struct mlx5_devlink_trap *dl_trap; + + list_for_each_entry(dl_trap, &dev->priv.traps, list) + if (dl_trap->trap.id == trap_id) + return dl_trap; + + return NULL; +} + +static int mlx5_devlink_trap_init(struct devlink *devlink, const struct devlink_trap *trap, + void *trap_ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_devlink_trap *dl_trap; + + dl_trap = kzalloc(sizeof(*dl_trap), GFP_KERNEL); + if (!dl_trap) + return -ENOMEM; + + dl_trap->trap.id = trap->id; + dl_trap->trap.action = DEVLINK_TRAP_ACTION_DROP; + dl_trap->item = trap_ctx; + + if (mlx5_find_trap_by_id(dev, trap->id)) { + kfree(dl_trap); + mlx5_core_err(dev, "Devlink trap: Trap 0x%x already found", trap->id); + return -EEXIST; + } + + list_add_tail(&dl_trap->list, &dev->priv.traps); + return 0; +} + +static void mlx5_devlink_trap_fini(struct devlink *devlink, const struct devlink_trap *trap, + void *trap_ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_devlink_trap *dl_trap; + + dl_trap = mlx5_find_trap_by_id(dev, trap->id); + if (!dl_trap) { + mlx5_core_err(dev, "Devlink trap: Missing trap id 0x%x", trap->id); + return; + } + list_del(&dl_trap->list); + kfree(dl_trap); +} + static const struct devlink_ops mlx5_devlink_ops = { #ifdef CONFIG_MLX5_ESWITCH .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, @@ -186,8 +236,42 @@ static const struct devlink_ops mlx5_devlink_ops = { .reload_limits = BIT(DEVLINK_RELOAD_LIMIT_NO_RESET), .reload_down = mlx5_devlink_reload_down, .reload_up = mlx5_devlink_reload_up, + .trap_init = mlx5_devlink_trap_init, + .trap_fini = mlx5_devlink_trap_fini, }; +void mlx5_devlink_trap_report(struct mlx5_core_dev *dev, int trap_id, struct sk_buff *skb, + struct devlink_port *dl_port) +{ + struct devlink *devlink = priv_to_devlink(dev); + struct mlx5_devlink_trap *dl_trap; + + dl_trap = mlx5_find_trap_by_id(dev, trap_id); + if (!dl_trap) { + mlx5_core_err(dev, "Devlink trap: Report on invalid trap id 0x%x", trap_id); + return; + } + + if (dl_trap->trap.action != DEVLINK_TRAP_ACTION_TRAP) { + mlx5_core_dbg(dev, "Devlink trap: Trap id %d has action %d", trap_id, + dl_trap->trap.action); + return; + } + devlink_trap_report(devlink, skb, dl_trap->item, dl_port, NULL); +} + +int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev) +{ + struct mlx5_devlink_trap *dl_trap; + int count = 0; + + list_for_each_entry(dl_trap, &dev->priv.traps, list) + if (dl_trap->trap.action == DEVLINK_TRAP_ACTION_TRAP) + count++; + + return count; +} + struct devlink *mlx5_devlink_alloc(void) { return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index f0de327a59bed..a9829006fa789 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -12,6 +12,22 @@ enum mlx5_devlink_param_id { MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, }; +struct mlx5_trap_ctx { + int id; + int action; +}; + +struct mlx5_devlink_trap { + struct mlx5_trap_ctx trap; + void *item; + struct list_head list; +}; + +struct mlx5_core_dev; +void mlx5_devlink_trap_report(struct mlx5_core_dev *dev, int trap_id, struct sk_buff *skb, + struct devlink_port *dl_port); +int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev); + struct devlink *mlx5_devlink_alloc(void); void mlx5_devlink_free(struct devlink *devlink); int mlx5_devlink_register(struct devlink *devlink, struct device *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index ca6f2fc39ea0a..bfedf064db1ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1305,6 +1305,8 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) priv->dbg_root = debugfs_create_dir(dev_name(dev->device), mlx5_debugfs_root); + INIT_LIST_HEAD(&priv->traps); + err = mlx5_health_init(dev); if (err) goto err_health_init; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f93bfe7473aa7..c4615dc51b6f8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -564,6 +564,7 @@ struct mlx5_priv { int host_pf_pages; struct mlx5_core_health health; + struct list_head traps; /* start: qp staff */ struct dentry *qp_debugfs; -- GitLab From 82e6c96f04e13c72d91777455836ffd012853caa Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:08 -0800 Subject: [PATCH 1597/2012] net/mlx5: Register to devlink ingress VLAN filter trap Add traps registration to mlx5_core devlink register/unregister flow. This patch registers INGRESS_VLAN_FILTER trap. Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index f04afaf785cb1..8c2f886ac78e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -442,6 +442,48 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) #endif } +#define MLX5_TRAP_DROP(_id, _group_id) \ + DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \ + DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) + +static const struct devlink_trap mlx5_traps_arr[] = { + MLX5_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS), +}; + +static const struct devlink_trap_group mlx5_trap_groups_arr[] = { + DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 0), +}; + +static int mlx5_devlink_traps_register(struct devlink *devlink) +{ + struct mlx5_core_dev *core_dev = devlink_priv(devlink); + int err; + + err = devlink_trap_groups_register(devlink, mlx5_trap_groups_arr, + ARRAY_SIZE(mlx5_trap_groups_arr)); + if (err) + return err; + + err = devlink_traps_register(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr), + &core_dev->priv); + if (err) + goto err_trap_group; + return 0; + +err_trap_group: + devlink_trap_groups_unregister(devlink, mlx5_trap_groups_arr, + ARRAY_SIZE(mlx5_trap_groups_arr)); + return err; +} + +static void mlx5_devlink_traps_unregister(struct devlink *devlink) +{ + devlink_traps_unregister(devlink, mlx5_traps_arr, ARRAY_SIZE(mlx5_traps_arr)); + devlink_trap_groups_unregister(devlink, mlx5_trap_groups_arr, + ARRAY_SIZE(mlx5_trap_groups_arr)); +} + int mlx5_devlink_register(struct devlink *devlink, struct device *dev) { int err; @@ -456,8 +498,16 @@ int mlx5_devlink_register(struct devlink *devlink, struct device *dev) goto params_reg_err; mlx5_devlink_set_params_init_values(devlink); devlink_params_publish(devlink); + + err = mlx5_devlink_traps_register(devlink); + if (err) + goto traps_reg_err; + return 0; +traps_reg_err: + devlink_params_unregister(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); params_reg_err: devlink_unregister(devlink); return err; @@ -465,6 +515,7 @@ params_reg_err: void mlx5_devlink_unregister(struct devlink *devlink) { + mlx5_devlink_traps_unregister(devlink); devlink_params_unregister(devlink, mlx5_devlink_params, ARRAY_SIZE(mlx5_devlink_params)); devlink_unregister(devlink); -- GitLab From f679247f25b65cf71298e25d6850bc4bac2c9802 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:09 -0800 Subject: [PATCH 1598/2012] net/mlx5: Register to devlink DMAC filter trap Core driver is registered to the devlink traps service, which enables the admin to redeem packets that were destined to be dropped due to a particular reason. Register to DMAC filter, allow visibility of packets that were filtered out by the MAC table. Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 8c2f886ac78e8..f081eff9be25d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -449,6 +449,7 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) static const struct devlink_trap mlx5_traps_arr[] = { MLX5_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS), + MLX5_TRAP_DROP(DMAC_FILTER, L2_DROPS), }; static const struct devlink_trap_group mlx5_trap_groups_arr[] = { -- GitLab From 3eac5d949afeca60982165e6fc4cece6f5882843 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:10 -0800 Subject: [PATCH 1599/2012] net/mlx5: Rename events notifier header Change the naming of events notifier head to clarify that it handles only firmware events. Coming patches in the set, add event notifier for software events. Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/events.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 3ce17c3d7a001..054c0bc36d241 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -23,7 +23,7 @@ static int temp_warn(struct notifier_block *, unsigned long, void *); static int port_module(struct notifier_block *, unsigned long, void *); static int pcie_core(struct notifier_block *, unsigned long, void *); -/* handler which forwards the event to events->nh, driver notifiers */ +/* handler which forwards the event to events->fw_nh, driver notifiers */ static int forward_event(struct notifier_block *, unsigned long, void *); static struct mlx5_nb events_nbs_ref[] = { @@ -55,8 +55,8 @@ struct mlx5_events { struct mlx5_core_dev *dev; struct workqueue_struct *wq; struct mlx5_event_nb notifiers[ARRAY_SIZE(events_nbs_ref)]; - /* driver notifier chain */ - struct atomic_notifier_head nh; + /* driver notifier chain for fw events */ + struct atomic_notifier_head fw_nh; /* port module events stats */ struct mlx5_pme_stats pme_stats; /*pcie_core*/ @@ -331,7 +331,7 @@ static int forward_event(struct notifier_block *nb, unsigned long event, void *d mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n", eqe_type_str(eqe->type), eqe->sub_type); - atomic_notifier_call_chain(&events->nh, event, data); + atomic_notifier_call_chain(&events->fw_nh, event, data); return NOTIFY_OK; } @@ -342,7 +342,7 @@ int mlx5_events_init(struct mlx5_core_dev *dev) if (!events) return -ENOMEM; - ATOMIC_INIT_NOTIFIER_HEAD(&events->nh); + ATOMIC_INIT_NOTIFIER_HEAD(&events->fw_nh); events->dev = dev; dev->priv.events = events; events->wq = create_singlethread_workqueue("mlx5_events"); @@ -383,11 +383,14 @@ void mlx5_events_stop(struct mlx5_core_dev *dev) flush_workqueue(events->wq); } +/* This API is used only for processing and forwarding firmware + * events to mlx5 consumer. + */ int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) { struct mlx5_events *events = dev->priv.events; - return atomic_notifier_chain_register(&events->nh, nb); + return atomic_notifier_chain_register(&events->fw_nh, nb); } EXPORT_SYMBOL(mlx5_notifier_register); @@ -395,11 +398,11 @@ int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *n { struct mlx5_events *events = dev->priv.events; - return atomic_notifier_chain_unregister(&events->nh, nb); + return atomic_notifier_chain_unregister(&events->fw_nh, nb); } EXPORT_SYMBOL(mlx5_notifier_unregister); int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data) { - return atomic_notifier_call_chain(&events->nh, event, data); + return atomic_notifier_call_chain(&events->fw_nh, event, data); } -- GitLab From 241dc159391fb9d351362d911a39dff84074cc92 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:11 -0800 Subject: [PATCH 1600/2012] net/mlx5: Notify on trap action by blocking event In order to allow mlx5 core driver to trigger synchronous operations to its consumers, add a blocking events handler. Add wrappers to blocking_notifier_[call_chain/chain_register/chain_unregister]. Add trap callback for action set and notify about this change. Following patches in the set add a listener for this event. Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 36 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/events.c | 28 +++++++++++++++ include/linux/mlx5/device.h | 4 +++ include/linux/mlx5/driver.h | 15 ++++++++ 4 files changed, 83 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index f081eff9be25d..c47291467cb0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -218,6 +218,41 @@ static void mlx5_devlink_trap_fini(struct devlink *devlink, const struct devlink kfree(dl_trap); } +static int mlx5_devlink_trap_action_set(struct devlink *devlink, + const struct devlink_trap *trap, + enum devlink_trap_action action, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + enum devlink_trap_action action_orig; + struct mlx5_devlink_trap *dl_trap; + int err = 0; + + dl_trap = mlx5_find_trap_by_id(dev, trap->id); + if (!dl_trap) { + mlx5_core_err(dev, "Devlink trap: Set action on invalid trap id 0x%x", trap->id); + err = -EINVAL; + goto out; + } + + if (action != DEVLINK_TRAP_ACTION_DROP && action != DEVLINK_TRAP_ACTION_TRAP) { + err = -EOPNOTSUPP; + goto out; + } + + if (action == dl_trap->trap.action) + goto out; + + action_orig = dl_trap->trap.action; + dl_trap->trap.action = action; + err = mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_TYPE_TRAP, + &dl_trap->trap); + if (err) + dl_trap->trap.action = action_orig; +out: + return err; +} + static const struct devlink_ops mlx5_devlink_ops = { #ifdef CONFIG_MLX5_ESWITCH .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, @@ -238,6 +273,7 @@ static const struct devlink_ops mlx5_devlink_ops = { .reload_up = mlx5_devlink_reload_up, .trap_init = mlx5_devlink_trap_init, .trap_fini = mlx5_devlink_trap_fini, + .trap_action_set = mlx5_devlink_trap_action_set, }; void mlx5_devlink_trap_report(struct mlx5_core_dev *dev, int trap_id, struct sk_buff *skb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 054c0bc36d241..670f25f5ffd1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -61,6 +61,8 @@ struct mlx5_events { struct mlx5_pme_stats pme_stats; /*pcie_core*/ struct work_struct pcie_core_work; + /* driver notifier chain for sw events */ + struct blocking_notifier_head sw_nh; }; static const char *eqe_type_str(u8 type) @@ -351,6 +353,7 @@ int mlx5_events_init(struct mlx5_core_dev *dev) return -ENOMEM; } INIT_WORK(&events->pcie_core_work, mlx5_pcie_event); + BLOCKING_INIT_NOTIFIER_HEAD(&events->sw_nh); return 0; } @@ -406,3 +409,28 @@ int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, voi { return atomic_notifier_call_chain(&events->fw_nh, event, data); } + +/* This API is used only for processing and forwarding driver-specific + * events to mlx5 consumers. + */ +int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return blocking_notifier_chain_register(&events->sw_nh, nb); +} + +int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) +{ + struct mlx5_events *events = dev->priv.events; + + return blocking_notifier_chain_unregister(&events->sw_nh, nb); +} + +int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event, + void *data) +{ + struct mlx5_events *events = dev->priv.events; + + return blocking_notifier_call_chain(&events->sw_nh, event, data); +} diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index f1de49d64a988..77ba54d38772d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -359,6 +359,10 @@ enum mlx5_event { MLX5_EVENT_TYPE_MAX = 0x100, }; +enum mlx5_driver_event { + MLX5_DRIVER_EVENT_TYPE_TRAP = 0, +}; + enum { MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE = 0x0, MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE = 0x1, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c4615dc51b6f8..45df5b465ba84 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1073,11 +1073,26 @@ enum { MAX_MR_CACHE_ENTRIES }; +/* Async-atomic event notifier used by mlx5 core to forward FW + * evetns recived from event queue to mlx5 consumers. + * Optimise event queue dipatching. + */ int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb); int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb); + +/* Async-atomic event notifier used for forwarding + * evetns from the event queue into the to mlx5 events dispatcher, + * eswitch, clock and others. + */ int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb); int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb); +/* Blocking event notifier used to forward SW events, used for slow path */ +int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb); +int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb); +int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event, + void *data); + int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); -- GitLab From 1c46d7409f301592731f941a7ec6c51cb6b54b0b Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:12 -0800 Subject: [PATCH 1601/2012] net/mlx5e: Optimize promiscuous mode Change steering flow to optimize traffic in promiscuous mode. On demand, add a high priority table containing a catch-all rule. All incoming packets are caught by this rule and steered directly to the TTC table. Prior to this change, packets in promiscuous mode may suffer from up to 4 steering hops before reaching TTC table. In addition, this patch will allow us adding a catch-all rule at the end of MAC table to serve MAC trap, with no impact on promiscuous mode performance. Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Reviewed-by: Maor Gottlieb Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en/fs.h | 10 +- .../net/ethernet/mellanox/mlx5/core/en_fs.c | 120 +++++++++++++----- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 4 +- 3 files changed, 100 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 5749557749b0b..abe57f032b2de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -44,6 +44,11 @@ struct mlx5e_l2_rule { #define MLX5E_L2_ADDR_HASH_SIZE BIT(BITS_PER_BYTE) +struct mlx5e_promisc_table { + struct mlx5e_flow_table ft; + struct mlx5_flow_handle *rule; +}; + struct mlx5e_vlan_table { struct mlx5e_flow_table ft; DECLARE_BITMAP(active_cvlans, VLAN_N_VID); @@ -62,7 +67,6 @@ struct mlx5e_l2_table { struct hlist_head netdev_mc[MLX5E_L2_ADDR_HASH_SIZE]; struct mlx5e_l2_rule broadcast; struct mlx5e_l2_rule allmulti; - struct mlx5e_l2_rule promisc; bool broadcast_enabled; bool allmulti_enabled; bool promisc_enabled; @@ -126,7 +130,8 @@ struct mlx5e_ttc_table { /* NIC prio FTS */ enum { - MLX5E_VLAN_FT_LEVEL = 0, + MLX5E_PROMISC_FT_LEVEL, + MLX5E_VLAN_FT_LEVEL, MLX5E_L2_FT_LEVEL, MLX5E_TTC_FT_LEVEL, MLX5E_INNER_TTC_FT_LEVEL, @@ -241,6 +246,7 @@ struct mlx5e_flow_steering { struct mlx5e_ethtool_steering ethtool; #endif struct mlx5e_tc_table tc; + struct mlx5e_promisc_table promisc; struct mlx5e_vlan_table vlan; struct mlx5e_l2_table l2; struct mlx5e_ttc_table ttc; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index e02e5895703d5..a2db550c982ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -46,7 +46,6 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, enum { MLX5E_FULLMATCH = 0, MLX5E_ALLMULTI = 1, - MLX5E_PROMISC = 2, }; enum { @@ -596,6 +595,83 @@ static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv) mlx5e_apply_netdev_addr(priv); } +#define MLX5E_PROMISC_GROUP0_SIZE BIT(0) +#define MLX5E_PROMISC_TABLE_SIZE MLX5E_PROMISC_GROUP0_SIZE + +static int mlx5e_add_promisc_rule(struct mlx5e_priv *priv) +{ + struct mlx5_flow_table *ft = priv->fs.promisc.ft.t; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_handle **rule_p; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fs.ttc.ft.t; + + rule_p = &priv->fs.promisc.rule; + *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + if (IS_ERR(*rule_p)) { + err = PTR_ERR(*rule_p); + *rule_p = NULL; + netdev_err(priv->netdev, "%s: add promiscuous rule failed\n", __func__); + } + kvfree(spec); + return err; +} + +static int mlx5e_create_promisc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_flow_table *ft = &priv->fs.promisc.ft; + struct mlx5_flow_table_attr ft_attr = {}; + int err; + + ft_attr.max_fte = MLX5E_PROMISC_TABLE_SIZE; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.level = MLX5E_PROMISC_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_auto_grouped_flow_table(priv->fs.ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + netdev_err(priv->netdev, "fail to create promisc table err=%d\n", err); + return err; + } + + err = mlx5e_add_promisc_rule(priv); + if (err) + goto err_destroy_promisc_table; + + return 0; + +err_destroy_promisc_table: + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; + + return err; +} + +static void mlx5e_del_promisc_rule(struct mlx5e_priv *priv) +{ + if (WARN(!priv->fs.promisc.rule, "Trying to remove non-existing promiscuous rule")) + return; + mlx5_del_flow_rules(priv->fs.promisc.rule); + priv->fs.promisc.rule = NULL; +} + +static void mlx5e_destroy_promisc_table(struct mlx5e_priv *priv) +{ + if (WARN(!priv->fs.promisc.ft.t, "Trying to remove non-existing promiscuous table")) + return; + mlx5e_del_promisc_rule(priv); + mlx5_destroy_flow_table(priv->fs.promisc.ft.t); + priv->fs.promisc.ft.t = NULL; +} + void mlx5e_set_rx_mode_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, @@ -615,14 +691,15 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) bool disable_allmulti = ea->allmulti_enabled && !allmulti_enabled; bool enable_broadcast = !ea->broadcast_enabled && broadcast_enabled; bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; + int err; if (enable_promisc) { - if (!priv->channels.params.vlan_strip_disable) + err = mlx5e_create_promisc_table(priv); + if (err) + enable_promisc = false; + if (!priv->channels.params.vlan_strip_disable && !err) netdev_warn_once(ndev, "S-tagged traffic will be dropped while C-tag vlan stripping is enabled\n"); - mlx5e_add_l2_flow_rule(priv, &ea->promisc, MLX5E_PROMISC); - if (!priv->fs.vlan.cvlan_filter_disabled) - mlx5e_add_any_vid_rules(priv); } if (enable_allmulti) mlx5e_add_l2_flow_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); @@ -635,11 +712,8 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) mlx5e_del_l2_flow_rule(priv, &ea->broadcast); if (disable_allmulti) mlx5e_del_l2_flow_rule(priv, &ea->allmulti); - if (disable_promisc) { - if (!priv->fs.vlan.cvlan_filter_disabled) - mlx5e_del_any_vid_rules(priv); - mlx5e_del_l2_flow_rule(priv, &ea->promisc); - } + if (disable_promisc) + mlx5e_destroy_promisc_table(priv); ea->promisc_enabled = promisc_enabled; ea->allmulti_enabled = allmulti_enabled; @@ -1306,9 +1380,6 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, mc_dmac[0] = 0x01; mv_dmac[0] = 0x01; break; - - case MLX5E_PROMISC: - break; } ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); @@ -1324,13 +1395,11 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, return err; } -#define MLX5E_NUM_L2_GROUPS 3 -#define MLX5E_L2_GROUP1_SIZE BIT(0) -#define MLX5E_L2_GROUP2_SIZE BIT(15) -#define MLX5E_L2_GROUP3_SIZE BIT(0) +#define MLX5E_NUM_L2_GROUPS 2 +#define MLX5E_L2_GROUP1_SIZE BIT(15) +#define MLX5E_L2_GROUP2_SIZE BIT(0) #define MLX5E_L2_TABLE_SIZE (MLX5E_L2_GROUP1_SIZE +\ - MLX5E_L2_GROUP2_SIZE +\ - MLX5E_L2_GROUP3_SIZE) + MLX5E_L2_GROUP2_SIZE) static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -1353,20 +1422,11 @@ static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table) mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); mc_dmac = MLX5_ADDR_OF(fte_match_param, mc, outer_headers.dmac_47_16); - /* Flow Group for promiscuous */ - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_L2_GROUP1_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; - ft->num_groups++; - /* Flow Group for full match */ eth_broadcast_addr(mc_dmac); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_L2_GROUP2_SIZE; + ix += MLX5E_L2_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) @@ -1377,7 +1437,7 @@ static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table) eth_zero_addr(mc_dmac); mc_dmac[0] = 0x01; MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_L2_GROUP3_SIZE; + ix += MLX5E_L2_GROUP2_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index b899539a07860..3dbd63b9845d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -105,8 +105,8 @@ #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) -/* Vlan, mac, ttc, inner ttc, {aRFS/accel and esp/esp_err} */ -#define KERNEL_NIC_PRIO_NUM_LEVELS 6 +/* Promiscuous, Vlan, mac, ttc, inner ttc, {aRFS/accel and esp/esp_err} */ +#define KERNEL_NIC_PRIO_NUM_LEVELS 7 #define KERNEL_NIC_NUM_PRIOS 1 /* One more level for tc */ #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) -- GitLab From e2a1a00498aea4e3bf31b65b8691d2e7fc7e3693 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:13 -0800 Subject: [PATCH 1602/2012] net/mlx5e: Add flow steering VLAN trap rule Add flow group to the VLAN table to hold the catch-all VLAN rule. Add API which adds/removes VLAN trap rule. This rule catches packets that were destined to be dropped due to no-match with previous VLAN rules. The trap rule steer these packets to the trap tir related to the trap-RQ. Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en/fs.h | 3 + .../net/ethernet/mellanox/mlx5/core/en_fs.c | 64 ++++++++++++++++++- 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index abe57f032b2de..688183a03e23d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -58,6 +58,7 @@ struct mlx5e_vlan_table { struct mlx5_flow_handle *untagged_rule; struct mlx5_flow_handle *any_cvlan_rule; struct mlx5_flow_handle *any_svlan_rule; + struct mlx5_flow_handle *trap_rule; bool cvlan_filter_disabled; }; @@ -294,6 +295,8 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv); void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt); +int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num); +void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv); #endif /* __MLX5E_FLOW_STEER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index a2db550c982ec..b7637a2ffd125 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -305,6 +305,53 @@ static int mlx5e_add_any_vid_rules(struct mlx5e_priv *priv) return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_STAG_VID, 0); } +static struct mlx5_flow_handle * +mlx5e_add_trap_rule(struct mlx5_flow_table *ft, int trap_id, int tir_num) +{ + struct mlx5_flow_destination dest = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + spec->flow_context.flags |= FLOW_CONTEXT_HAS_TAG; + spec->flow_context.flow_tag = trap_id; + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tir_num; + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); + kvfree(spec); + return rule; +} + +int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num) +{ + struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; + struct mlx5_flow_handle *rule; + int err; + + rule = mlx5e_add_trap_rule(ft, trap_id, tir_num); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + priv->fs.vlan.trap_rule = NULL; + netdev_err(priv->netdev, "%s: add VLAN trap rule failed, err %d\n", + __func__, err); + return err; + } + priv->fs.vlan.trap_rule = rule; + return 0; +} + +void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv) +{ + if (priv->fs.vlan.trap_rule) { + mlx5_del_flow_rules(priv->fs.vlan.trap_rule); + priv->fs.vlan.trap_rule = NULL; + } +} + void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv) { if (!priv->fs.vlan.cvlan_filter_disabled) @@ -418,6 +465,8 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) WARN_ON_ONCE(!(test_bit(MLX5E_STATE_DESTROYING, &priv->state))); + mlx5e_remove_vlan_trap(priv); + /* must be called after DESTROY bit is set and * set_rx_mode is called and flushed */ @@ -1495,15 +1544,17 @@ err_destroy_flow_table: return err; } -#define MLX5E_NUM_VLAN_GROUPS 4 +#define MLX5E_NUM_VLAN_GROUPS 5 #define MLX5E_VLAN_GROUP0_SIZE BIT(12) #define MLX5E_VLAN_GROUP1_SIZE BIT(12) #define MLX5E_VLAN_GROUP2_SIZE BIT(1) #define MLX5E_VLAN_GROUP3_SIZE BIT(0) +#define MLX5E_VLAN_GROUP_TRAP_SIZE BIT(0) /* must be last */ #define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ MLX5E_VLAN_GROUP1_SIZE +\ MLX5E_VLAN_GROUP2_SIZE +\ - MLX5E_VLAN_GROUP3_SIZE) + MLX5E_VLAN_GROUP3_SIZE +\ + MLX5E_VLAN_GROUP_TRAP_SIZE) static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in, int inlen) @@ -1558,6 +1609,15 @@ static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in goto err_destroy_groups; ft->num_groups++; + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_VLAN_GROUP_TRAP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + return 0; err_destroy_groups: -- GitLab From ceef1b66bddaaee3124f66cd0279189e29bd3f56 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:14 -0800 Subject: [PATCH 1603/2012] net/mlx5e: Add flow steering DMAC trap rule Add flow group to the L2 table to hold the catch-all DMAC rule. Add API which adds/removes DMAC trap rule. This rule catches packets that were destined to be dropped due to no-match with previous DMAC rules. The trap rule steer these packets to the trap tir related to the trap-RQ. Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en/fs.h | 3 ++ .../net/ethernet/mellanox/mlx5/core/en_fs.c | 42 ++++++++++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 688183a03e23d..a16297e7e2ace 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -68,6 +68,7 @@ struct mlx5e_l2_table { struct hlist_head netdev_mc[MLX5E_L2_ADDR_HASH_SIZE]; struct mlx5e_l2_rule broadcast; struct mlx5e_l2_rule allmulti; + struct mlx5_flow_handle *trap_rule; bool broadcast_enabled; bool allmulti_enabled; bool promisc_enabled; @@ -297,6 +298,8 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt); int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num); void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv); +int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num); +void mlx5e_remove_mac_trap(struct mlx5e_priv *priv); #endif /* __MLX5E_FLOW_STEER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index b7637a2ffd125..16ce7756ac43f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -352,6 +352,32 @@ void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv) } } +int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num) +{ + struct mlx5_flow_table *ft = priv->fs.l2.ft.t; + struct mlx5_flow_handle *rule; + int err; + + rule = mlx5e_add_trap_rule(ft, trap_id, tir_num); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + priv->fs.l2.trap_rule = NULL; + netdev_err(priv->netdev, "%s: add MAC trap rule failed, err %d\n", + __func__, err); + return err; + } + priv->fs.l2.trap_rule = rule; + return 0; +} + +void mlx5e_remove_mac_trap(struct mlx5e_priv *priv) +{ + if (priv->fs.l2.trap_rule) { + mlx5_del_flow_rules(priv->fs.l2.trap_rule); + priv->fs.l2.trap_rule = NULL; + } +} + void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv) { if (!priv->fs.vlan.cvlan_filter_disabled) @@ -1444,11 +1470,13 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, return err; } -#define MLX5E_NUM_L2_GROUPS 2 +#define MLX5E_NUM_L2_GROUPS 3 #define MLX5E_L2_GROUP1_SIZE BIT(15) #define MLX5E_L2_GROUP2_SIZE BIT(0) +#define MLX5E_L2_GROUP_TRAP_SIZE BIT(0) /* must be last */ #define MLX5E_L2_TABLE_SIZE (MLX5E_L2_GROUP1_SIZE +\ - MLX5E_L2_GROUP2_SIZE) + MLX5E_L2_GROUP2_SIZE +\ + MLX5E_L2_GROUP_TRAP_SIZE) static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -1493,6 +1521,16 @@ static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table) goto err_destroy_groups; ft->num_groups++; + /* Flow Group for l2 traps */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_L2_GROUP_TRAP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err_destroy_groups; + ft->num_groups++; + kvfree(in); return 0; -- GitLab From cf74760932602fb25d16c57e49dbc445c81d0ff1 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:15 -0800 Subject: [PATCH 1604/2012] net/mlx5e: Expose RX dma info helpers In order to support RQs outside of channel context, change mlx5e_init_di_list() signature to accept NUMA node instead of cpu. In addition, expose dma info helpers as API. This API will be used for RQ's creation in other files in downstream patches. Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 ++++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 26e578a973e57..dc4895a1fa9bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1072,6 +1072,8 @@ void mlx5e_destroy_q_counters(struct mlx5e_priv *priv); int mlx5e_open_drop_rq(struct mlx5e_priv *priv, struct mlx5e_rq *drop_rq); void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq); +int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node); +void mlx5e_free_di_list(struct mlx5e_rq *rq); int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b9a175982801b..bed2f1a6d7303 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -343,13 +343,11 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) prev->last_in_page = true; } -static int mlx5e_init_di_list(struct mlx5e_rq *rq, - int wq_sz, int cpu) +int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node) { int len = wq_sz << rq->wqe.info.log_num_frags; - rq->wqe.di = kvzalloc_node(array_size(len, sizeof(*rq->wqe.di)), - GFP_KERNEL, cpu_to_node(cpu)); + rq->wqe.di = kvzalloc_node(array_size(len, sizeof(*rq->wqe.di)), GFP_KERNEL, node); if (!rq->wqe.di) return -ENOMEM; @@ -358,7 +356,7 @@ static int mlx5e_init_di_list(struct mlx5e_rq *rq, return 0; } -static void mlx5e_free_di_list(struct mlx5e_rq *rq) +void mlx5e_free_di_list(struct mlx5e_rq *rq) { kvfree(rq->wqe.di); } @@ -500,7 +498,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - err = mlx5e_init_di_list(rq, wq_sz, c->cpu); + err = mlx5e_init_di_list(rq, wq_sz, cpu_to_node(c->cpu)); if (err) goto err_rq_frags; -- GitLab From 5543e989fe5e2fe6a5829ee42c00152cac2bb8a0 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:16 -0800 Subject: [PATCH 1605/2012] net/mlx5e: Add trap entity to ETH driver Introduce mlx5e_trap which includes a dedicated RQ and NAPI for trapped packets. Trap-RQ processes packets that were destined to be dropped, but for debug and visibility sake these packets are trapped and reported to devlink. Trap-RQ connects between the HW and the driver and is not a part of a channel. Open mlx5e_create_rq() and mlx5_core_destroy_rq() as API and add dedicate RQ handlers which report to devlink of trapped packets. Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 7 + .../net/ethernet/mellanox/mlx5/core/en/trap.c | 409 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en/trap.h | 35 ++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 5 +- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 46 ++ include/linux/mlx5/device.h | 5 + 7 files changed, 505 insertions(+), 4 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/trap.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/trap.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index fcfc0b1149852..d44f5f6ee4498 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -27,7 +27,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ - en/qos.o + en/qos.o en/trap.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index dc4895a1fa9bc..f439a977ad612 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -564,6 +564,7 @@ typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq); typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk); +void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params); enum mlx5e_rq_flag { MLX5E_RQ_FLAG_XDP_XMIT, @@ -805,6 +806,8 @@ struct mlx5e_htb { u16 defcls; }; +struct mlx5e_trap; + struct mlx5e_priv { /* priv data path fields - start */ /* +1 for port ptp ts */ @@ -844,8 +847,10 @@ struct mlx5e_priv { struct mlx5_core_dev *mdev; struct net_device *netdev; + struct mlx5e_trap *en_trap; struct mlx5e_stats stats; struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_channel_stats trap_stats; struct mlx5e_port_ptp_stats port_ptp_stats; u16 max_nch; u8 max_opened_tc; @@ -961,6 +966,8 @@ int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); void mlx5e_deactivate_rq(struct mlx5e_rq *rq); void mlx5e_close_rq(struct mlx5e_rq *rq); +int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param); +void mlx5e_destroy_rq(struct mlx5e_rq *rq); struct mlx5e_sq_param; int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c new file mode 100644 index 0000000000000..5507efacb9dc4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies */ + +#include +#include "en/txrx.h" +#include "en/params.h" +#include "en/trap.h" + +static int mlx5e_trap_napi_poll(struct napi_struct *napi, int budget) +{ + struct mlx5e_trap *trap_ctx = container_of(napi, struct mlx5e_trap, napi); + struct mlx5e_ch_stats *ch_stats = trap_ctx->stats; + struct mlx5e_rq *rq = &trap_ctx->rq; + bool busy = false; + int work_done = 0; + + ch_stats->poll++; + + work_done = mlx5e_poll_rx_cq(&rq->cq, budget); + busy |= work_done == budget; + busy |= rq->post_wqes(rq); + + if (busy) + return budget; + + if (unlikely(!napi_complete_done(napi, work_done))) + return work_done; + + mlx5e_cq_arm(&rq->cq); + return work_done; +} + +static int mlx5e_alloc_trap_rq(struct mlx5e_priv *priv, struct mlx5e_rq_param *rqp, + struct mlx5e_rq_stats *stats, struct mlx5e_params *params, + struct mlx5e_ch_stats *ch_stats, + struct mlx5e_rq *rq) +{ + void *rqc_wq = MLX5_ADDR_OF(rqc, rqp->rqc, wq); + struct mlx5_core_dev *mdev = priv->mdev; + struct page_pool_params pp_params = {}; + int node = dev_to_node(mdev->device); + u32 pool_size; + int wq_sz; + int err; + int i; + + rqp->wq.db_numa_node = node; + + rq->wq_type = params->rq_wq_type; + rq->pdev = mdev->device; + rq->netdev = priv->netdev; + rq->mdev = mdev; + rq->priv = priv; + rq->stats = stats; + rq->clock = &mdev->clock; + rq->tstamp = &priv->tstamp; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + + xdp_rxq_info_unused(&rq->xdp_rxq); + + rq->buff.map_dir = DMA_FROM_DEVICE; + rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, NULL); + pool_size = 1 << params->log_rq_mtu_frames; + + err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, &rq->wq_ctrl); + if (err) + return err; + + rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR]; + + wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); + + rq->wqe.info = rqp->frags_info; + rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride; + rq->wqe.frags = kvzalloc_node(array_size(sizeof(*rq->wqe.frags), + (wq_sz << rq->wqe.info.log_num_frags)), + GFP_KERNEL, node); + if (!rq->wqe.frags) { + err = -ENOMEM; + goto err_wq_cyc_destroy; + } + + err = mlx5e_init_di_list(rq, wq_sz, node); + if (err) + goto err_free_frags; + + rq->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); + + mlx5e_rq_set_trap_handlers(rq, params); + + /* Create a page_pool and register it with rxq */ + pp_params.order = 0; + pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ + pp_params.pool_size = pool_size; + pp_params.nid = node; + pp_params.dev = mdev->device; + pp_params.dma_dir = rq->buff.map_dir; + + /* page_pool can be used even when there is no rq->xdp_prog, + * given page_pool does not handle DMA mapping there is no + * required state to clear. And page_pool gracefully handle + * elevated refcnt. + */ + rq->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rq->page_pool)) { + err = PTR_ERR(rq->page_pool); + rq->page_pool = NULL; + goto err_free_di_list; + } + for (i = 0; i < wq_sz; i++) { + struct mlx5e_rx_wqe_cyc *wqe = + mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i); + int f; + + for (f = 0; f < rq->wqe.info.num_frags; f++) { + u32 frag_size = rq->wqe.info.arr[f].frag_size | + MLX5_HW_START_PADDING; + + wqe->data[f].byte_count = cpu_to_be32(frag_size); + wqe->data[f].lkey = rq->mkey_be; + } + /* check if num_frags is not a pow of two */ + if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) { + wqe->data[f].byte_count = 0; + wqe->data[f].lkey = cpu_to_be32(MLX5_INVALID_LKEY); + wqe->data[f].addr = 0; + } + } + return 0; + +err_free_di_list: + mlx5e_free_di_list(rq); +err_free_frags: + kvfree(rq->wqe.frags); +err_wq_cyc_destroy: + mlx5_wq_destroy(&rq->wq_ctrl); + + return err; +} + +static void mlx5e_free_trap_rq(struct mlx5e_rq *rq) +{ + page_pool_destroy(rq->page_pool); + mlx5e_free_di_list(rq); + kvfree(rq->wqe.frags); + mlx5_wq_destroy(&rq->wq_ctrl); +} + +static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct napi_struct *napi, + struct mlx5e_rq_stats *stats, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_param, + struct mlx5e_ch_stats *ch_stats, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_create_cq_param ccp = {}; + struct dim_cq_moder trap_moder = {}; + struct mlx5e_cq *cq = &rq->cq; + int err; + + ccp.node = dev_to_node(mdev->device); + ccp.ch_stats = ch_stats; + ccp.napi = napi; + ccp.ix = 0; + err = mlx5e_open_cq(priv, trap_moder, &rq_param->cqp, &ccp, cq); + if (err) + return err; + + err = mlx5e_alloc_trap_rq(priv, rq_param, stats, params, ch_stats, rq); + if (err) + goto err_destroy_cq; + + err = mlx5e_create_rq(rq, rq_param); + if (err) + goto err_free_rq; + + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + if (err) + goto err_destroy_rq; + + return 0; + +err_destroy_rq: + mlx5e_destroy_rq(rq); + mlx5e_free_rx_descs(rq); +err_free_rq: + mlx5e_free_trap_rq(rq); +err_destroy_cq: + mlx5e_close_cq(cq); + + return err; +} + +static void mlx5e_close_trap_rq(struct mlx5e_rq *rq) +{ + mlx5e_destroy_rq(rq); + mlx5e_free_rx_descs(rq); + mlx5e_free_trap_rq(rq); + mlx5e_close_cq(&rq->cq); +} + +static int mlx5e_create_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, + u32 rqn) +{ + void *tirc; + int inlen; + u32 *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.td.tdn); + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_NONE); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); + MLX5_SET(tirc, tirc, inline_rqn, rqn); + err = mlx5e_create_tir(mdev, tir, in); + kvfree(in); + + return err; +} + +static void mlx5e_destroy_trap_direct_rq_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir) +{ + mlx5e_destroy_tir(mdev, tir); +} + +static void mlx5e_activate_trap_rq(struct mlx5e_rq *rq) +{ + set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); +} + +static void mlx5e_deactivate_trap_rq(struct mlx5e_rq *rq) +{ + clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); +} + +static void mlx5e_build_trap_params(struct mlx5e_priv *priv, struct mlx5e_trap *t) +{ + struct mlx5e_params *params = &t->params; + + params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC; + mlx5e_init_rq_type_params(priv->mdev, params); + params->sw_mtu = priv->netdev->max_mtu; + mlx5e_build_rq_param(priv, params, NULL, &t->rq_param); +} + +static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) +{ + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, 0)); + struct net_device *netdev = priv->netdev; + struct mlx5e_trap *t; + int err; + + t = kvzalloc_node(sizeof(*t), GFP_KERNEL, cpu_to_node(cpu)); + if (!t) + return ERR_PTR(-ENOMEM); + + mlx5e_build_trap_params(priv, t); + + t->priv = priv; + t->mdev = priv->mdev; + t->tstamp = &priv->tstamp; + t->pdev = mlx5_core_dma_dev(priv->mdev); + t->netdev = priv->netdev; + t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); + t->stats = &priv->trap_stats.ch; + + netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll, 64); + + err = mlx5e_open_trap_rq(priv, &t->napi, + &priv->trap_stats.rq, + &t->params, &t->rq_param, + &priv->trap_stats.ch, + &t->rq); + if (unlikely(err)) + goto err_napi_del; + + err = mlx5e_create_trap_direct_rq_tir(t->mdev, &t->tir, t->rq.rqn); + if (err) + goto err_close_trap_rq; + + return t; + +err_close_trap_rq: + mlx5e_close_trap_rq(&t->rq); +err_napi_del: + netif_napi_del(&t->napi); + kvfree(t); + return ERR_PTR(err); +} + +void mlx5e_close_trap(struct mlx5e_trap *trap) +{ + mlx5e_destroy_trap_direct_rq_tir(trap->mdev, &trap->tir); + mlx5e_close_trap_rq(&trap->rq); + netif_napi_del(&trap->napi); + kvfree(trap); +} + +static void mlx5e_activate_trap(struct mlx5e_trap *trap) +{ + napi_enable(&trap->napi); + mlx5e_activate_trap_rq(&trap->rq); + napi_schedule(&trap->napi); +} + +void mlx5e_deactivate_trap(struct mlx5e_priv *priv) +{ + struct mlx5e_trap *trap = priv->en_trap; + + mlx5e_deactivate_trap_rq(&trap->rq); + napi_disable(&trap->napi); +} + +static struct mlx5e_trap *mlx5e_add_trap_queue(struct mlx5e_priv *priv) +{ + struct mlx5e_trap *trap; + + trap = mlx5e_open_trap(priv); + if (IS_ERR(trap)) + goto out; + + mlx5e_activate_trap(trap); +out: + return trap; +} + +static void mlx5e_del_trap_queue(struct mlx5e_priv *priv) +{ + mlx5e_deactivate_trap(priv); + mlx5e_close_trap(priv->en_trap); + priv->en_trap = NULL; +} + +static int mlx5e_trap_get_tirn(struct mlx5e_trap *en_trap) +{ + return en_trap->tir.tirn; +} + +static int mlx5e_handle_action_trap(struct mlx5e_priv *priv, int trap_id) +{ + bool open_queue = !priv->en_trap; + struct mlx5e_trap *trap; + int err; + + if (open_queue) { + trap = mlx5e_add_trap_queue(priv); + if (IS_ERR(trap)) + return PTR_ERR(trap); + priv->en_trap = trap; + } + + switch (trap_id) { + case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER: + err = mlx5e_add_vlan_trap(priv, trap_id, mlx5e_trap_get_tirn(priv->en_trap)); + if (err) + goto err_out; + break; + default: + netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id); + err = -EINVAL; + goto err_out; + } + return 0; + +err_out: + if (open_queue) + mlx5e_del_trap_queue(priv); + return err; +} + +static int mlx5e_handle_action_drop(struct mlx5e_priv *priv, int trap_id) +{ + switch (trap_id) { + case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER: + mlx5e_remove_vlan_trap(priv); + break; + default: + netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id); + return -EINVAL; + } + if (priv->en_trap && !mlx5_devlink_trap_get_num_active(priv->mdev)) + mlx5e_del_trap_queue(priv); + + return 0; +} + +int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ctx) +{ + int err = 0; + + switch (trap_ctx->action) { + case DEVLINK_TRAP_ACTION_TRAP: + err = mlx5e_handle_action_trap(priv, trap_ctx->id); + break; + case DEVLINK_TRAP_ACTION_DROP: + err = mlx5e_handle_action_drop(priv, trap_ctx->id); + break; + default: + netdev_warn(priv->netdev, "%s: Unsupported action %d\n", __func__, + trap_ctx->action); + err = -EINVAL; + } + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h new file mode 100644 index 0000000000000..cc1fa9f12c45b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies */ + +#ifndef __MLX5E_TRAP_H__ +#define __MLX5E_TRAP_H__ + +#include "../en.h" +#include "../devlink.h" + +struct mlx5e_trap { + /* data path */ + struct mlx5e_rq rq; + struct mlx5e_tir tir; + struct napi_struct napi; + struct device *pdev; + struct net_device *netdev; + __be32 mkey_be; + + /* data path - accessed per napi poll */ + struct mlx5e_ch_stats *stats; + + /* control */ + struct mlx5e_priv *priv; + struct mlx5_core_dev *mdev; + struct hwtstamp_config *tstamp; + DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); + + struct mlx5e_params params; + struct mlx5e_rq_param rq_param; +}; + +void mlx5e_close_trap(struct mlx5e_trap *trap); +void mlx5e_deactivate_trap(struct mlx5e_priv *priv); +int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ctx); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index bed2f1a6d7303..ec5bb48cb54a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -649,8 +649,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) mlx5_wq_destroy(&rq->wq_ctrl); } -static int mlx5e_create_rq(struct mlx5e_rq *rq, - struct mlx5e_rq_param *param) +int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) { struct mlx5_core_dev *mdev = rq->mdev; @@ -773,7 +772,7 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) return err; } -static void mlx5e_destroy_rq(struct mlx5e_rq *rq) +void mlx5e_destroy_rq(struct mlx5e_rq *rq) { mlx5_core_destroy_rq(rq->mdev, rq->rqn); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index dec93d57542f2..98b56f495b32c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -52,6 +52,7 @@ #include "en/xsk/rx.h" #include "en/health.h" #include "en/params.h" +#include "devlink.h" static struct sk_buff * mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, @@ -1815,3 +1816,48 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool return 0; } + +static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5e_priv *priv = netdev_priv(rq->netdev); + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct mlx5e_wqe_frag_info *wi; + struct sk_buff *skb; + u32 cqe_bcnt; + u16 trap_id; + u16 ci; + + trap_id = get_cqe_flow_tag(cqe); + ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); + wi = get_frag(rq, ci); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { + rq->stats->wqe_err++; + goto free_wqe; + } + + skb = mlx5e_skb_from_cqe_nonlinear(rq, cqe, wi, cqe_bcnt); + if (!skb) + goto free_wqe; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + skb_push(skb, ETH_HLEN); + + mlx5_devlink_trap_report(rq->mdev, trap_id, skb, &priv->dl_port); + dev_kfree_skb_any(skb); + +free_wqe: + mlx5e_free_rx_wqe(rq, wi, false); + mlx5_wq_cyc_pop(wq); +} + +void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params) +{ + rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(params, NULL) ? + mlx5e_skb_from_cqe_linear : + mlx5e_skb_from_cqe_nonlinear; + rq->post_wqes = mlx5e_post_rx_wqes; + rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; + rq->handle_rx_cqe = mlx5e_trap_handle_rx_cqe; +} diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 77ba54d38772d..00057eae89ab8 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -903,6 +903,11 @@ static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe) return (u64)lo | ((u64)hi << 32); } +static inline u16 get_cqe_flow_tag(struct mlx5_cqe64 *cqe) +{ + return be32_to_cpu(cqe->sop_drop_qpn) & 0xFFF; +} + #define MLX5_MPWQE_LOG_NUM_STRIDES_BASE (9) #define MLX5_MPWQE_LOG_STRIDE_SZ_BASE (6) -- GitLab From 70038b73e40e2ce6bc4c8f25bbf0747b7a07a61f Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:17 -0800 Subject: [PATCH 1606/2012] net/mlx5e: Add listener to trap event Add support for listening to blocking events in the ETH driver. Listen on trap event. If received, call mlx5e_handle_trap_event() which: 1) Verifies if driver needs open/close trap-RQ with respect to the active traps count. 2) Inspects trap id and its action (trap/drop) and add/remove the flow steering rule accordingly. Otherwise, return an error. Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 35 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f439a977ad612..39f389cc40fc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -859,6 +859,7 @@ struct mlx5e_priv { u16 q_counter; u16 drop_rq_q_counter; struct notifier_block events_nb; + struct notifier_block blocking_events_nb; int num_tc_x_num_ch; struct udp_tunnel_nic_info nic_info; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ec5bb48cb54a1..3252919ec7bfd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -66,6 +66,7 @@ #include "lib/mlx5.h" #include "en/ptp.h" #include "qos.h" +#include "en/trap.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { @@ -212,6 +213,33 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv) mlx5_notifier_unregister(priv->mdev, &priv->events_nb); } +static int blocking_event(struct notifier_block *nb, unsigned long event, void *data) +{ + struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, blocking_events_nb); + int err; + + switch (event) { + case MLX5_DRIVER_EVENT_TYPE_TRAP: + err = mlx5e_handle_trap_event(priv, data); + break; + default: + netdev_warn(priv->netdev, "Sync event: Unknouwn event %ld\n", event); + err = -EINVAL; + } + return err; +} + +static void mlx5e_enable_blocking_events(struct mlx5e_priv *priv) +{ + priv->blocking_events_nb.notifier_call = blocking_event; + mlx5_blocking_notifier_register(priv->mdev, &priv->blocking_events_nb); +} + +static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv) +{ + mlx5_blocking_notifier_unregister(priv->mdev, &priv->blocking_events_nb); +} + static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *wqe) @@ -5341,6 +5369,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5_lag_add(mdev, netdev); mlx5e_enable_async_events(priv); + mlx5e_enable_blocking_events(priv); if (mlx5e_monitor_counter_supported(priv)) mlx5e_monitor_counter_init(priv); @@ -5378,6 +5407,12 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) if (mlx5e_monitor_counter_supported(priv)) mlx5e_monitor_counter_cleanup(priv); + mlx5e_disable_blocking_events(priv); + if (priv->en_trap) { + mlx5e_deactivate_trap(priv); + mlx5e_close_trap(priv->en_trap); + priv->en_trap = NULL; + } mlx5e_disable_async_events(priv); mlx5_lag_remove(mdev); mlx5_vxlan_reset_to_default(mdev->vxlan); -- GitLab From 49fdbd23418f5b18536d02f257096bd71fc83086 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:18 -0800 Subject: [PATCH 1607/2012] net/mlx5e: Add listener to DMAC filter trap event Add support for trapping packets which didn't match any DMAC in the MAC table. Add a listener which adds/removes MAC trap rule in the flow steering according to the trap's action trap/drop. Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 5507efacb9dc4..d078281dbd1de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -360,6 +360,11 @@ static int mlx5e_handle_action_trap(struct mlx5e_priv *priv, int trap_id) if (err) goto err_out; break; + case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER: + err = mlx5e_add_mac_trap(priv, trap_id, mlx5e_trap_get_tirn(priv->en_trap)); + if (err) + goto err_out; + break; default: netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id); err = -EINVAL; @@ -379,6 +384,9 @@ static int mlx5e_handle_action_drop(struct mlx5e_priv *priv, int trap_id) case DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER: mlx5e_remove_vlan_trap(priv); break; + case DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER: + mlx5e_remove_mac_trap(priv); + break; default: netdev_warn(priv->netdev, "%s: Unknown trap id %d\n", __func__, trap_id); return -EINVAL; -- GitLab From eb3862a0525d26f0975ed4f750bc151920f2f25c Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 26 Jan 2021 15:24:19 -0800 Subject: [PATCH 1608/2012] net/mlx5e: Enable traps according to link state Avoid trapping packets when the interface is down, and revive them when interface is back up. Add API to mlx5 core retrieving the action by trap id. Use it to apply traps when interface is up, and disable then when interface is down. Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 16 ++++++++ .../net/ethernet/mellanox/mlx5/core/devlink.h | 2 + .../net/ethernet/mellanox/mlx5/core/en/trap.c | 40 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en/trap.h | 2 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 2 + 5 files changed, 62 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index c47291467cb0c..b23b548143569 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -308,6 +308,22 @@ int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev) return count; } +int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id, + enum devlink_trap_action *action) +{ + struct mlx5_devlink_trap *dl_trap; + + dl_trap = mlx5_find_trap_by_id(dev, trap_id); + if (!dl_trap) { + mlx5_core_err(dev, "Devlink trap: Get action on invalid trap id 0x%x", + trap_id); + return -EINVAL; + } + + *action = dl_trap->trap.action; + return 0; +} + struct devlink *mlx5_devlink_alloc(void) { return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index a9829006fa789..eff107dad922d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -27,6 +27,8 @@ struct mlx5_core_dev; void mlx5_devlink_trap_report(struct mlx5_core_dev *dev, int trap_id, struct sk_buff *skb, struct devlink_port *dl_port); int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev); +int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id, + enum devlink_trap_action *action); struct devlink *mlx5_devlink_alloc(void); void mlx5_devlink_free(struct devlink *devlink); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index d078281dbd1de..37fc1d77ded7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -401,6 +401,14 @@ int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ { int err = 0; + /* Traps are unarmed when interface is down, no need to update + * them. The configuration is saved in the core driver, + * queried and applied upon interface up operation in + * mlx5e_open_locked(). + */ + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + switch (trap_ctx->action) { case DEVLINK_TRAP_ACTION_TRAP: err = mlx5e_handle_action_trap(priv, trap_ctx->id); @@ -415,3 +423,35 @@ int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ } return err; } + +static int mlx5e_apply_trap(struct mlx5e_priv *priv, int trap_id, bool enable) +{ + enum devlink_trap_action action; + int err; + + err = mlx5_devlink_traps_get_action(priv->mdev, trap_id, &action); + if (err) + return err; + if (action == DEVLINK_TRAP_ACTION_TRAP) + err = enable ? mlx5e_handle_action_trap(priv, trap_id) : + mlx5e_handle_action_drop(priv, trap_id); + return err; +} + +static const int mlx5e_traps_arr[] = { + DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER, + DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER, +}; + +int mlx5e_apply_traps(struct mlx5e_priv *priv, bool enable) +{ + int err; + int i; + + for (i = 0; i < ARRAY_SIZE(mlx5e_traps_arr); i++) { + err = mlx5e_apply_trap(priv, mlx5e_traps_arr[i], enable); + if (err) + return err; + } + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h index cc1fa9f12c45b..aa3f17658c6d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.h @@ -32,4 +32,6 @@ struct mlx5e_trap { void mlx5e_close_trap(struct mlx5e_trap *trap); void mlx5e_deactivate_trap(struct mlx5e_priv *priv); int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ctx); +int mlx5e_apply_traps(struct mlx5e_priv *priv, bool enable); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3252919ec7bfd..f8619d3813452 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3247,6 +3247,7 @@ int mlx5e_open_locked(struct net_device *netdev) priv->profile->update_rx(priv); mlx5e_activate_priv_channels(priv); + mlx5e_apply_traps(priv, true); if (priv->profile->update_carrier) priv->profile->update_carrier(priv); @@ -3282,6 +3283,7 @@ int mlx5e_close_locked(struct net_device *netdev) if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; + mlx5e_apply_traps(priv, false); clear_bit(MLX5E_STATE_OPENED, &priv->state); netif_carrier_off(priv->netdev); -- GitLab From d1f3bdd4eaae1222063c2f309625656108815915 Mon Sep 17 00:00:00 2001 From: Lorenzo Carletti Date: Wed, 27 Jan 2021 02:06:32 +0100 Subject: [PATCH 1609/2012] net: dsa: rtl8366rb: standardize init jam tables In the rtl8366rb driver there are some jam tables which contain undocumented values. While trying to understand what these tables actually do, I noticed a discrepancy in how one of those was treated. Most of them were plain u16 arrays, while the ethernet one was an u16 matrix. By looking at the vendor's droplets of source code these tables came from, I found out that they were all originally u16 matrixes. This commit standardizes the jam tables, turning them all into jam_tbl_entry arrays. Each entry contains 2 u16 values. This change makes it easier to understand how the jam tables are used and also makes it possible for a single function to handle all of them, removing some duplicated code. Signed-off-by: Lorenzo Carletti Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Reviewed-by: Linus Walleij Signed-off-by: Jakub Kicinski --- drivers/net/dsa/rtl8366rb.c | 273 ++++++++++++++++++------------------ 1 file changed, 139 insertions(+), 134 deletions(-) diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/rtl8366rb.c index c6cc4938897ce..a89093bc6c6ad 100644 --- a/drivers/net/dsa/rtl8366rb.c +++ b/drivers/net/dsa/rtl8366rb.c @@ -601,108 +601,114 @@ static int rtl8366rb_set_addr(struct realtek_smi *smi) /* Found in a vendor driver */ +/* Struct for handling the jam tables' entries */ +struct rtl8366rb_jam_tbl_entry { + u16 reg; + u16 val; +}; + /* For the "version 0" early silicon, appear in most source releases */ -static const u16 rtl8366rb_init_jam_ver_0[] = { - 0x000B, 0x0001, 0x03A6, 0x0100, 0x03A7, 0x0001, 0x02D1, 0x3FFF, - 0x02D2, 0x3FFF, 0x02D3, 0x3FFF, 0x02D4, 0x3FFF, 0x02D5, 0x3FFF, - 0x02D6, 0x3FFF, 0x02D7, 0x3FFF, 0x02D8, 0x3FFF, 0x022B, 0x0688, - 0x022C, 0x0FAC, 0x03D0, 0x4688, 0x03D1, 0x01F5, 0x0000, 0x0830, - 0x02F9, 0x0200, 0x02F7, 0x7FFF, 0x02F8, 0x03FF, 0x0080, 0x03E8, - 0x0081, 0x00CE, 0x0082, 0x00DA, 0x0083, 0x0230, 0xBE0F, 0x2000, - 0x0231, 0x422A, 0x0232, 0x422A, 0x0233, 0x422A, 0x0234, 0x422A, - 0x0235, 0x422A, 0x0236, 0x422A, 0x0237, 0x422A, 0x0238, 0x422A, - 0x0239, 0x422A, 0x023A, 0x422A, 0x023B, 0x422A, 0x023C, 0x422A, - 0x023D, 0x422A, 0x023E, 0x422A, 0x023F, 0x422A, 0x0240, 0x422A, - 0x0241, 0x422A, 0x0242, 0x422A, 0x0243, 0x422A, 0x0244, 0x422A, - 0x0245, 0x422A, 0x0246, 0x422A, 0x0247, 0x422A, 0x0248, 0x422A, - 0x0249, 0x0146, 0x024A, 0x0146, 0x024B, 0x0146, 0xBE03, 0xC961, - 0x024D, 0x0146, 0x024E, 0x0146, 0x024F, 0x0146, 0x0250, 0x0146, - 0xBE64, 0x0226, 0x0252, 0x0146, 0x0253, 0x0146, 0x024C, 0x0146, - 0x0251, 0x0146, 0x0254, 0x0146, 0xBE62, 0x3FD0, 0x0084, 0x0320, - 0x0255, 0x0146, 0x0256, 0x0146, 0x0257, 0x0146, 0x0258, 0x0146, - 0x0259, 0x0146, 0x025A, 0x0146, 0x025B, 0x0146, 0x025C, 0x0146, - 0x025D, 0x0146, 0x025E, 0x0146, 0x025F, 0x0146, 0x0260, 0x0146, - 0x0261, 0xA23F, 0x0262, 0x0294, 0x0263, 0xA23F, 0x0264, 0x0294, - 0x0265, 0xA23F, 0x0266, 0x0294, 0x0267, 0xA23F, 0x0268, 0x0294, - 0x0269, 0xA23F, 0x026A, 0x0294, 0x026B, 0xA23F, 0x026C, 0x0294, - 0x026D, 0xA23F, 0x026E, 0x0294, 0x026F, 0xA23F, 0x0270, 0x0294, - 0x02F5, 0x0048, 0xBE09, 0x0E00, 0xBE1E, 0x0FA0, 0xBE14, 0x8448, - 0xBE15, 0x1007, 0xBE4A, 0xA284, 0xC454, 0x3F0B, 0xC474, 0x3F0B, - 0xBE48, 0x3672, 0xBE4B, 0x17A7, 0xBE4C, 0x0B15, 0xBE52, 0x0EDD, - 0xBE49, 0x8C00, 0xBE5B, 0x785C, 0xBE5C, 0x785C, 0xBE5D, 0x785C, - 0xBE61, 0x368A, 0xBE63, 0x9B84, 0xC456, 0xCC13, 0xC476, 0xCC13, - 0xBE65, 0x307D, 0xBE6D, 0x0005, 0xBE6E, 0xE120, 0xBE2E, 0x7BAF, +static const struct rtl8366rb_jam_tbl_entry rtl8366rb_init_jam_ver_0[] = { + {0x000B, 0x0001}, {0x03A6, 0x0100}, {0x03A7, 0x0001}, {0x02D1, 0x3FFF}, + {0x02D2, 0x3FFF}, {0x02D3, 0x3FFF}, {0x02D4, 0x3FFF}, {0x02D5, 0x3FFF}, + {0x02D6, 0x3FFF}, {0x02D7, 0x3FFF}, {0x02D8, 0x3FFF}, {0x022B, 0x0688}, + {0x022C, 0x0FAC}, {0x03D0, 0x4688}, {0x03D1, 0x01F5}, {0x0000, 0x0830}, + {0x02F9, 0x0200}, {0x02F7, 0x7FFF}, {0x02F8, 0x03FF}, {0x0080, 0x03E8}, + {0x0081, 0x00CE}, {0x0082, 0x00DA}, {0x0083, 0x0230}, {0xBE0F, 0x2000}, + {0x0231, 0x422A}, {0x0232, 0x422A}, {0x0233, 0x422A}, {0x0234, 0x422A}, + {0x0235, 0x422A}, {0x0236, 0x422A}, {0x0237, 0x422A}, {0x0238, 0x422A}, + {0x0239, 0x422A}, {0x023A, 0x422A}, {0x023B, 0x422A}, {0x023C, 0x422A}, + {0x023D, 0x422A}, {0x023E, 0x422A}, {0x023F, 0x422A}, {0x0240, 0x422A}, + {0x0241, 0x422A}, {0x0242, 0x422A}, {0x0243, 0x422A}, {0x0244, 0x422A}, + {0x0245, 0x422A}, {0x0246, 0x422A}, {0x0247, 0x422A}, {0x0248, 0x422A}, + {0x0249, 0x0146}, {0x024A, 0x0146}, {0x024B, 0x0146}, {0xBE03, 0xC961}, + {0x024D, 0x0146}, {0x024E, 0x0146}, {0x024F, 0x0146}, {0x0250, 0x0146}, + {0xBE64, 0x0226}, {0x0252, 0x0146}, {0x0253, 0x0146}, {0x024C, 0x0146}, + {0x0251, 0x0146}, {0x0254, 0x0146}, {0xBE62, 0x3FD0}, {0x0084, 0x0320}, + {0x0255, 0x0146}, {0x0256, 0x0146}, {0x0257, 0x0146}, {0x0258, 0x0146}, + {0x0259, 0x0146}, {0x025A, 0x0146}, {0x025B, 0x0146}, {0x025C, 0x0146}, + {0x025D, 0x0146}, {0x025E, 0x0146}, {0x025F, 0x0146}, {0x0260, 0x0146}, + {0x0261, 0xA23F}, {0x0262, 0x0294}, {0x0263, 0xA23F}, {0x0264, 0x0294}, + {0x0265, 0xA23F}, {0x0266, 0x0294}, {0x0267, 0xA23F}, {0x0268, 0x0294}, + {0x0269, 0xA23F}, {0x026A, 0x0294}, {0x026B, 0xA23F}, {0x026C, 0x0294}, + {0x026D, 0xA23F}, {0x026E, 0x0294}, {0x026F, 0xA23F}, {0x0270, 0x0294}, + {0x02F5, 0x0048}, {0xBE09, 0x0E00}, {0xBE1E, 0x0FA0}, {0xBE14, 0x8448}, + {0xBE15, 0x1007}, {0xBE4A, 0xA284}, {0xC454, 0x3F0B}, {0xC474, 0x3F0B}, + {0xBE48, 0x3672}, {0xBE4B, 0x17A7}, {0xBE4C, 0x0B15}, {0xBE52, 0x0EDD}, + {0xBE49, 0x8C00}, {0xBE5B, 0x785C}, {0xBE5C, 0x785C}, {0xBE5D, 0x785C}, + {0xBE61, 0x368A}, {0xBE63, 0x9B84}, {0xC456, 0xCC13}, {0xC476, 0xCC13}, + {0xBE65, 0x307D}, {0xBE6D, 0x0005}, {0xBE6E, 0xE120}, {0xBE2E, 0x7BAF}, }; /* This v1 init sequence is from Belkin F5D8235 U-Boot release */ -static const u16 rtl8366rb_init_jam_ver_1[] = { - 0x0000, 0x0830, 0x0001, 0x8000, 0x0400, 0x8130, 0xBE78, 0x3C3C, - 0x0431, 0x5432, 0xBE37, 0x0CE4, 0x02FA, 0xFFDF, 0x02FB, 0xFFE0, - 0xC44C, 0x1585, 0xC44C, 0x1185, 0xC44C, 0x1585, 0xC46C, 0x1585, - 0xC46C, 0x1185, 0xC46C, 0x1585, 0xC451, 0x2135, 0xC471, 0x2135, - 0xBE10, 0x8140, 0xBE15, 0x0007, 0xBE6E, 0xE120, 0xBE69, 0xD20F, - 0xBE6B, 0x0320, 0xBE24, 0xB000, 0xBE23, 0xFF51, 0xBE22, 0xDF20, - 0xBE21, 0x0140, 0xBE20, 0x00BB, 0xBE24, 0xB800, 0xBE24, 0x0000, - 0xBE24, 0x7000, 0xBE23, 0xFF51, 0xBE22, 0xDF60, 0xBE21, 0x0140, - 0xBE20, 0x0077, 0xBE24, 0x7800, 0xBE24, 0x0000, 0xBE2E, 0x7B7A, - 0xBE36, 0x0CE4, 0x02F5, 0x0048, 0xBE77, 0x2940, 0x000A, 0x83E0, - 0xBE79, 0x3C3C, 0xBE00, 0x1340, +static const struct rtl8366rb_jam_tbl_entry rtl8366rb_init_jam_ver_1[] = { + {0x0000, 0x0830}, {0x0001, 0x8000}, {0x0400, 0x8130}, {0xBE78, 0x3C3C}, + {0x0431, 0x5432}, {0xBE37, 0x0CE4}, {0x02FA, 0xFFDF}, {0x02FB, 0xFFE0}, + {0xC44C, 0x1585}, {0xC44C, 0x1185}, {0xC44C, 0x1585}, {0xC46C, 0x1585}, + {0xC46C, 0x1185}, {0xC46C, 0x1585}, {0xC451, 0x2135}, {0xC471, 0x2135}, + {0xBE10, 0x8140}, {0xBE15, 0x0007}, {0xBE6E, 0xE120}, {0xBE69, 0xD20F}, + {0xBE6B, 0x0320}, {0xBE24, 0xB000}, {0xBE23, 0xFF51}, {0xBE22, 0xDF20}, + {0xBE21, 0x0140}, {0xBE20, 0x00BB}, {0xBE24, 0xB800}, {0xBE24, 0x0000}, + {0xBE24, 0x7000}, {0xBE23, 0xFF51}, {0xBE22, 0xDF60}, {0xBE21, 0x0140}, + {0xBE20, 0x0077}, {0xBE24, 0x7800}, {0xBE24, 0x0000}, {0xBE2E, 0x7B7A}, + {0xBE36, 0x0CE4}, {0x02F5, 0x0048}, {0xBE77, 0x2940}, {0x000A, 0x83E0}, + {0xBE79, 0x3C3C}, {0xBE00, 0x1340}, }; /* This v2 init sequence is from Belkin F5D8235 U-Boot release */ -static const u16 rtl8366rb_init_jam_ver_2[] = { - 0x0450, 0x0000, 0x0400, 0x8130, 0x000A, 0x83ED, 0x0431, 0x5432, - 0xC44F, 0x6250, 0xC46F, 0x6250, 0xC456, 0x0C14, 0xC476, 0x0C14, - 0xC44C, 0x1C85, 0xC44C, 0x1885, 0xC44C, 0x1C85, 0xC46C, 0x1C85, - 0xC46C, 0x1885, 0xC46C, 0x1C85, 0xC44C, 0x0885, 0xC44C, 0x0881, - 0xC44C, 0x0885, 0xC46C, 0x0885, 0xC46C, 0x0881, 0xC46C, 0x0885, - 0xBE2E, 0x7BA7, 0xBE36, 0x1000, 0xBE37, 0x1000, 0x8000, 0x0001, - 0xBE69, 0xD50F, 0x8000, 0x0000, 0xBE69, 0xD50F, 0xBE6E, 0x0320, - 0xBE77, 0x2940, 0xBE78, 0x3C3C, 0xBE79, 0x3C3C, 0xBE6E, 0xE120, - 0x8000, 0x0001, 0xBE15, 0x1007, 0x8000, 0x0000, 0xBE15, 0x1007, - 0xBE14, 0x0448, 0xBE1E, 0x00A0, 0xBE10, 0x8160, 0xBE10, 0x8140, - 0xBE00, 0x1340, 0x0F51, 0x0010, +static const struct rtl8366rb_jam_tbl_entry rtl8366rb_init_jam_ver_2[] = { + {0x0450, 0x0000}, {0x0400, 0x8130}, {0x000A, 0x83ED}, {0x0431, 0x5432}, + {0xC44F, 0x6250}, {0xC46F, 0x6250}, {0xC456, 0x0C14}, {0xC476, 0x0C14}, + {0xC44C, 0x1C85}, {0xC44C, 0x1885}, {0xC44C, 0x1C85}, {0xC46C, 0x1C85}, + {0xC46C, 0x1885}, {0xC46C, 0x1C85}, {0xC44C, 0x0885}, {0xC44C, 0x0881}, + {0xC44C, 0x0885}, {0xC46C, 0x0885}, {0xC46C, 0x0881}, {0xC46C, 0x0885}, + {0xBE2E, 0x7BA7}, {0xBE36, 0x1000}, {0xBE37, 0x1000}, {0x8000, 0x0001}, + {0xBE69, 0xD50F}, {0x8000, 0x0000}, {0xBE69, 0xD50F}, {0xBE6E, 0x0320}, + {0xBE77, 0x2940}, {0xBE78, 0x3C3C}, {0xBE79, 0x3C3C}, {0xBE6E, 0xE120}, + {0x8000, 0x0001}, {0xBE15, 0x1007}, {0x8000, 0x0000}, {0xBE15, 0x1007}, + {0xBE14, 0x0448}, {0xBE1E, 0x00A0}, {0xBE10, 0x8160}, {0xBE10, 0x8140}, + {0xBE00, 0x1340}, {0x0F51, 0x0010}, }; /* Appears in a DDWRT code dump */ -static const u16 rtl8366rb_init_jam_ver_3[] = { - 0x0000, 0x0830, 0x0400, 0x8130, 0x000A, 0x83ED, 0x0431, 0x5432, - 0x0F51, 0x0017, 0x02F5, 0x0048, 0x02FA, 0xFFDF, 0x02FB, 0xFFE0, - 0xC456, 0x0C14, 0xC476, 0x0C14, 0xC454, 0x3F8B, 0xC474, 0x3F8B, - 0xC450, 0x2071, 0xC470, 0x2071, 0xC451, 0x226B, 0xC471, 0x226B, - 0xC452, 0xA293, 0xC472, 0xA293, 0xC44C, 0x1585, 0xC44C, 0x1185, - 0xC44C, 0x1585, 0xC46C, 0x1585, 0xC46C, 0x1185, 0xC46C, 0x1585, - 0xC44C, 0x0185, 0xC44C, 0x0181, 0xC44C, 0x0185, 0xC46C, 0x0185, - 0xC46C, 0x0181, 0xC46C, 0x0185, 0xBE24, 0xB000, 0xBE23, 0xFF51, - 0xBE22, 0xDF20, 0xBE21, 0x0140, 0xBE20, 0x00BB, 0xBE24, 0xB800, - 0xBE24, 0x0000, 0xBE24, 0x7000, 0xBE23, 0xFF51, 0xBE22, 0xDF60, - 0xBE21, 0x0140, 0xBE20, 0x0077, 0xBE24, 0x7800, 0xBE24, 0x0000, - 0xBE2E, 0x7BA7, 0xBE36, 0x1000, 0xBE37, 0x1000, 0x8000, 0x0001, - 0xBE69, 0xD50F, 0x8000, 0x0000, 0xBE69, 0xD50F, 0xBE6B, 0x0320, - 0xBE77, 0x2800, 0xBE78, 0x3C3C, 0xBE79, 0x3C3C, 0xBE6E, 0xE120, - 0x8000, 0x0001, 0xBE10, 0x8140, 0x8000, 0x0000, 0xBE10, 0x8140, - 0xBE15, 0x1007, 0xBE14, 0x0448, 0xBE1E, 0x00A0, 0xBE10, 0x8160, - 0xBE10, 0x8140, 0xBE00, 0x1340, 0x0450, 0x0000, 0x0401, 0x0000, +static const struct rtl8366rb_jam_tbl_entry rtl8366rb_init_jam_ver_3[] = { + {0x0000, 0x0830}, {0x0400, 0x8130}, {0x000A, 0x83ED}, {0x0431, 0x5432}, + {0x0F51, 0x0017}, {0x02F5, 0x0048}, {0x02FA, 0xFFDF}, {0x02FB, 0xFFE0}, + {0xC456, 0x0C14}, {0xC476, 0x0C14}, {0xC454, 0x3F8B}, {0xC474, 0x3F8B}, + {0xC450, 0x2071}, {0xC470, 0x2071}, {0xC451, 0x226B}, {0xC471, 0x226B}, + {0xC452, 0xA293}, {0xC472, 0xA293}, {0xC44C, 0x1585}, {0xC44C, 0x1185}, + {0xC44C, 0x1585}, {0xC46C, 0x1585}, {0xC46C, 0x1185}, {0xC46C, 0x1585}, + {0xC44C, 0x0185}, {0xC44C, 0x0181}, {0xC44C, 0x0185}, {0xC46C, 0x0185}, + {0xC46C, 0x0181}, {0xC46C, 0x0185}, {0xBE24, 0xB000}, {0xBE23, 0xFF51}, + {0xBE22, 0xDF20}, {0xBE21, 0x0140}, {0xBE20, 0x00BB}, {0xBE24, 0xB800}, + {0xBE24, 0x0000}, {0xBE24, 0x7000}, {0xBE23, 0xFF51}, {0xBE22, 0xDF60}, + {0xBE21, 0x0140}, {0xBE20, 0x0077}, {0xBE24, 0x7800}, {0xBE24, 0x0000}, + {0xBE2E, 0x7BA7}, {0xBE36, 0x1000}, {0xBE37, 0x1000}, {0x8000, 0x0001}, + {0xBE69, 0xD50F}, {0x8000, 0x0000}, {0xBE69, 0xD50F}, {0xBE6B, 0x0320}, + {0xBE77, 0x2800}, {0xBE78, 0x3C3C}, {0xBE79, 0x3C3C}, {0xBE6E, 0xE120}, + {0x8000, 0x0001}, {0xBE10, 0x8140}, {0x8000, 0x0000}, {0xBE10, 0x8140}, + {0xBE15, 0x1007}, {0xBE14, 0x0448}, {0xBE1E, 0x00A0}, {0xBE10, 0x8160}, + {0xBE10, 0x8140}, {0xBE00, 0x1340}, {0x0450, 0x0000}, {0x0401, 0x0000}, }; /* Belkin F5D8235 v1, "belkin,f5d8235-v1" */ -static const u16 rtl8366rb_init_jam_f5d8235[] = { - 0x0242, 0x02BF, 0x0245, 0x02BF, 0x0248, 0x02BF, 0x024B, 0x02BF, - 0x024E, 0x02BF, 0x0251, 0x02BF, 0x0254, 0x0A3F, 0x0256, 0x0A3F, - 0x0258, 0x0A3F, 0x025A, 0x0A3F, 0x025C, 0x0A3F, 0x025E, 0x0A3F, - 0x0263, 0x007C, 0x0100, 0x0004, 0xBE5B, 0x3500, 0x800E, 0x200F, - 0xBE1D, 0x0F00, 0x8001, 0x5011, 0x800A, 0xA2F4, 0x800B, 0x17A3, - 0xBE4B, 0x17A3, 0xBE41, 0x5011, 0xBE17, 0x2100, 0x8000, 0x8304, - 0xBE40, 0x8304, 0xBE4A, 0xA2F4, 0x800C, 0xA8D5, 0x8014, 0x5500, - 0x8015, 0x0004, 0xBE4C, 0xA8D5, 0xBE59, 0x0008, 0xBE09, 0x0E00, - 0xBE36, 0x1036, 0xBE37, 0x1036, 0x800D, 0x00FF, 0xBE4D, 0x00FF, +static const struct rtl8366rb_jam_tbl_entry rtl8366rb_init_jam_f5d8235[] = { + {0x0242, 0x02BF}, {0x0245, 0x02BF}, {0x0248, 0x02BF}, {0x024B, 0x02BF}, + {0x024E, 0x02BF}, {0x0251, 0x02BF}, {0x0254, 0x0A3F}, {0x0256, 0x0A3F}, + {0x0258, 0x0A3F}, {0x025A, 0x0A3F}, {0x025C, 0x0A3F}, {0x025E, 0x0A3F}, + {0x0263, 0x007C}, {0x0100, 0x0004}, {0xBE5B, 0x3500}, {0x800E, 0x200F}, + {0xBE1D, 0x0F00}, {0x8001, 0x5011}, {0x800A, 0xA2F4}, {0x800B, 0x17A3}, + {0xBE4B, 0x17A3}, {0xBE41, 0x5011}, {0xBE17, 0x2100}, {0x8000, 0x8304}, + {0xBE40, 0x8304}, {0xBE4A, 0xA2F4}, {0x800C, 0xA8D5}, {0x8014, 0x5500}, + {0x8015, 0x0004}, {0xBE4C, 0xA8D5}, {0xBE59, 0x0008}, {0xBE09, 0x0E00}, + {0xBE36, 0x1036}, {0xBE37, 0x1036}, {0x800D, 0x00FF}, {0xBE4D, 0x00FF}, }; /* DGN3500, "netgear,dgn3500", "netgear,dgn3500b" */ -static const u16 rtl8366rb_init_jam_dgn3500[] = { - 0x0000, 0x0830, 0x0400, 0x8130, 0x000A, 0x83ED, 0x0F51, 0x0017, - 0x02F5, 0x0048, 0x02FA, 0xFFDF, 0x02FB, 0xFFE0, 0x0450, 0x0000, - 0x0401, 0x0000, 0x0431, 0x0960, +static const struct rtl8366rb_jam_tbl_entry rtl8366rb_init_jam_dgn3500[] = { + {0x0000, 0x0830}, {0x0400, 0x8130}, {0x000A, 0x83ED}, {0x0F51, 0x0017}, + {0x02F5, 0x0048}, {0x02FA, 0xFFDF}, {0x02FB, 0xFFE0}, {0x0450, 0x0000}, + {0x0401, 0x0000}, {0x0431, 0x0960}, }; /* This jam table activates "green ethernet", which means low power mode @@ -710,16 +716,53 @@ static const u16 rtl8366rb_init_jam_dgn3500[] = { * necessary, and the ports should enter power saving mode 10 seconds after * a cable is disconnected. Seems to always be the same. */ -static const u16 rtl8366rb_green_jam[][2] = { +static const struct rtl8366rb_jam_tbl_entry rtl8366rb_green_jam[] = { {0xBE78, 0x323C}, {0xBE77, 0x5000}, {0xBE2E, 0x7BA7}, {0xBE59, 0x3459}, {0xBE5A, 0x745A}, {0xBE5B, 0x785C}, {0xBE5C, 0x785C}, {0xBE6E, 0xE120}, {0xBE79, 0x323C}, }; +/* Function that jams the tables in the proper registers */ +static int rtl8366rb_jam_table(const struct rtl8366rb_jam_tbl_entry *jam_table, + int jam_size, struct realtek_smi *smi, + bool write_dbg) +{ + u32 val; + int ret; + int i; + + for (i = 0; i < jam_size; i++) { + if ((jam_table[i].reg & 0xBE00) == 0xBE00) { + ret = regmap_read(smi->map, + RTL8366RB_PHY_ACCESS_BUSY_REG, + &val); + if (ret) + return ret; + if (!(val & RTL8366RB_PHY_INT_BUSY)) { + ret = regmap_write(smi->map, + RTL8366RB_PHY_ACCESS_CTRL_REG, + RTL8366RB_PHY_CTRL_WRITE); + if (ret) + return ret; + } + } + if (write_dbg) + dev_dbg(smi->dev, "jam %04x into register %04x\n", + jam_table[i].val, + jam_table[i].reg); + ret = regmap_write(smi->map, + jam_table[i].reg, + jam_table[i].val); + if (ret) + return ret; + } + return 0; +} + static int rtl8366rb_setup(struct dsa_switch *ds) { struct realtek_smi *smi = ds->priv; - const u16 *jam_table; + const struct rtl8366rb_jam_tbl_entry *jam_table; struct rtl8366rb *rb; u32 chip_ver = 0; u32 chip_id = 0; @@ -788,54 +831,16 @@ static int rtl8366rb_setup(struct dsa_switch *ds) jam_size = ARRAY_SIZE(rtl8366rb_init_jam_dgn3500); } - i = 0; - while (i < jam_size) { - if ((jam_table[i] & 0xBE00) == 0xBE00) { - ret = regmap_read(smi->map, - RTL8366RB_PHY_ACCESS_BUSY_REG, - &val); - if (ret) - return ret; - if (!(val & RTL8366RB_PHY_INT_BUSY)) { - ret = regmap_write(smi->map, - RTL8366RB_PHY_ACCESS_CTRL_REG, - RTL8366RB_PHY_CTRL_WRITE); - if (ret) - return ret; - } - } - dev_dbg(smi->dev, "jam %04x into register %04x\n", - jam_table[i + 1], - jam_table[i]); - ret = regmap_write(smi->map, - jam_table[i], - jam_table[i + 1]); - if (ret) - return ret; - i += 2; - } + ret = rtl8366rb_jam_table(jam_table, jam_size, smi, true); + if (ret) + return ret; /* Set up the "green ethernet" feature */ - i = 0; - while (i < ARRAY_SIZE(rtl8366rb_green_jam)) { - ret = regmap_read(smi->map, RTL8366RB_PHY_ACCESS_BUSY_REG, - &val); - if (ret) - return ret; - if (!(val & RTL8366RB_PHY_INT_BUSY)) { - ret = regmap_write(smi->map, - RTL8366RB_PHY_ACCESS_CTRL_REG, - RTL8366RB_PHY_CTRL_WRITE); - if (ret) - return ret; - ret = regmap_write(smi->map, - rtl8366rb_green_jam[i][0], - rtl8366rb_green_jam[i][1]); - if (ret) - return ret; - i++; - } - } + ret = rtl8366rb_jam_table(rtl8366rb_green_jam, + ARRAY_SIZE(rtl8366rb_green_jam), smi, false); + if (ret) + return ret; + ret = regmap_write(smi->map, RTL8366RB_GREEN_FEATURE_REG, (chip_ver == 1) ? 0x0007 : 0x0003); -- GitLab From de96c3943f591018727b862f51953c1b6c55bcc3 Mon Sep 17 00:00:00 2001 From: Qii Wang Date: Sat, 9 Jan 2021 16:29:50 +0800 Subject: [PATCH 1610/2012] i2c: mediatek: Move suspend and resume handling to NOIRQ phase Some i2c device driver indirectly uses I2C driver when it is now being suspended. The i2c devices driver is suspended during the NOIRQ phase and this cannot be changed due to other dependencies. Therefore, we also need to move the suspend handling for the I2C controller driver to the NOIRQ phase as well. Signed-off-by: Qii Wang Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-mt65xx.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 0818d3e507347..2ffd2f354d0ae 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -1275,7 +1275,8 @@ static int mtk_i2c_probe(struct platform_device *pdev) mtk_i2c_clock_disable(i2c); ret = devm_request_irq(&pdev->dev, irq, mtk_i2c_irq, - IRQF_TRIGGER_NONE, I2C_DRV_NAME, i2c); + IRQF_NO_SUSPEND | IRQF_TRIGGER_NONE, + I2C_DRV_NAME, i2c); if (ret < 0) { dev_err(&pdev->dev, "Request I2C IRQ %d fail\n", irq); @@ -1302,7 +1303,16 @@ static int mtk_i2c_remove(struct platform_device *pdev) } #ifdef CONFIG_PM_SLEEP -static int mtk_i2c_resume(struct device *dev) +static int mtk_i2c_suspend_noirq(struct device *dev) +{ + struct mtk_i2c *i2c = dev_get_drvdata(dev); + + i2c_mark_adapter_suspended(&i2c->adap); + + return 0; +} + +static int mtk_i2c_resume_noirq(struct device *dev) { int ret; struct mtk_i2c *i2c = dev_get_drvdata(dev); @@ -1317,12 +1327,15 @@ static int mtk_i2c_resume(struct device *dev) mtk_i2c_clock_disable(i2c); + i2c_mark_adapter_resumed(&i2c->adap); + return 0; } #endif static const struct dev_pm_ops mtk_i2c_pm = { - SET_SYSTEM_SLEEP_PM_OPS(NULL, mtk_i2c_resume) + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mtk_i2c_suspend_noirq, + mtk_i2c_resume_noirq) }; static struct platform_driver mtk_i2c_driver = { -- GitLab From 31f190e0ccac8b75d33fdc95a797c526cf9b149e Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Thu, 21 Jan 2021 15:44:02 +0100 Subject: [PATCH 1611/2012] media: rkisp1: uapi: change hist_bins array type from __u16 to __u32 Each entry in the array is a 20 bits value composed of 16 bits unsigned integer and 4 bits fractional part. So the type should change to __u32. In addition add a documentation of how the measurements are done. Signed-off-by: Dafna Hirschfeld Acked-by: Helen Koike Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/rkisp1-config.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h index 6e449e7842605..24f05d6d258f9 100644 --- a/include/uapi/linux/rkisp1-config.h +++ b/include/uapi/linux/rkisp1-config.h @@ -844,13 +844,18 @@ struct rkisp1_cif_isp_af_stat { /** * struct rkisp1_cif_isp_hist_stat - statistics histogram data * - * @hist_bins: measured bin counters + * @hist_bins: measured bin counters. Each bin is a 20 bits unsigned fixed point + * type. Bits 0-4 are the fractional part and bits 5-19 are the + * integer part. * - * Measurement window divided into 25 sub-windows, set - * with ISP_HIST_XXX + * The window of the measurements area is divided to 5x5 sub-windows. The + * histogram is then computed for each sub-window independently and the final + * result is a weighted average of the histogram measurements on all + * sub-windows. The window of the measurements area and the weight of each + * sub-window are configurable using struct @rkisp1_cif_isp_hst_config. */ struct rkisp1_cif_isp_hist_stat { - __u16 hist_bins[RKISP1_CIF_ISP_HIST_BIN_N_MAX]; + __u32 hist_bins[RKISP1_CIF_ISP_HIST_BIN_N_MAX]; }; /** -- GitLab From a76f8dc8be471028540df24749e99a3ec0ac7c94 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Thu, 21 Jan 2021 15:44:03 +0100 Subject: [PATCH 1612/2012] media: rkisp1: stats: remove a wrong cast to u8 hist_bins is an array of type __u32. Each entry represent a 20 bit fixed point value as documented inline. The cast to u8 when setting the values is wrong. Remove it. Signed-off-by: Dafna Hirschfeld Reviewed-by: Heiko Stuebner Acked-by: Helen Koike Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c index 3ddab8fa8f2d3..4cdb180fa64d0 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c @@ -235,8 +235,7 @@ static void rkisp1_stats_get_hst_meas(struct rkisp1_stats *stats, pbuf->meas_type |= RKISP1_CIF_ISP_STAT_HIST; for (i = 0; i < RKISP1_CIF_ISP_HIST_BIN_N_MAX; i++) pbuf->params.hist.hist_bins[i] = - (u8)rkisp1_read(rkisp1, - RKISP1_CIF_ISP_HIST_BIN_0 + i * 4); + rkisp1_read(rkisp1, RKISP1_CIF_ISP_HIST_BIN_0 + i * 4); } static void rkisp1_stats_get_bls_meas(struct rkisp1_stats *stats, -- GitLab From a802a0430b863f03bc01aaea2d2bf6ff464f03e7 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Thu, 21 Jan 2021 15:44:04 +0100 Subject: [PATCH 1613/2012] media: rkisp1: stats: mask the hist_bins values hist_bins is an array of type __u32. Each entry represents a 20 bit value. So mask out the unused bits. Signed-off-by: Dafna Hirschfeld Acked-by: Helen Koike Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h | 1 + drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h b/drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h index 8a8d960a679c2..fa33080f51db5 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-regs.h @@ -365,6 +365,7 @@ #define RKISP1_CIF_ISP_MAX_HIST_PREDIVIDER 0x0000007F #define RKISP1_CIF_ISP_HIST_ROW_NUM 5 #define RKISP1_CIF_ISP_HIST_COLUMN_NUM 5 +#define RKISP1_CIF_ISP_HIST_GET_BIN(x) ((x) & 0x000FFFFF) /* AUTO FOCUS MEASUREMENT: ISP_AFM_CTRL */ #define RKISP1_ISP_AFM_CTRL_ENABLE BIT(0) diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c index 4cdb180fa64d0..3b2783700abc2 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c @@ -233,9 +233,11 @@ static void rkisp1_stats_get_hst_meas(struct rkisp1_stats *stats, unsigned int i; pbuf->meas_type |= RKISP1_CIF_ISP_STAT_HIST; - for (i = 0; i < RKISP1_CIF_ISP_HIST_BIN_N_MAX; i++) - pbuf->params.hist.hist_bins[i] = - rkisp1_read(rkisp1, RKISP1_CIF_ISP_HIST_BIN_0 + i * 4); + for (i = 0; i < RKISP1_CIF_ISP_HIST_BIN_N_MAX; i++) { + u32 reg_val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_HIST_BIN_0 + i * 4); + + pbuf->params.hist.hist_bins[i] = RKISP1_CIF_ISP_HIST_GET_BIN(reg_val); + } } static void rkisp1_stats_get_bls_meas(struct rkisp1_stats *stats, -- GitLab From 66d81de7ea9d2b0775e5bfd5e770483a1c24b9ca Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 21 Jan 2021 15:44:05 +0100 Subject: [PATCH 1614/2012] media: rockchip: rkisp1: reduce number of histogram grid elements in uapi The uapi right now specifies an array size of 28 but the actual number of elements is only 25 with the last 3 being unused. Reduce the array size to the correct number of elements and change the params code to iterate the array 25 times. Signed-off-by: Heiko Stuebner Signed-off-by: Dafna Hirschfeld Acked-by: Helen Koike Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/rockchip/rkisp1/rkisp1-params.c | 3 ++- include/uapi/linux/rkisp1-config.h | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c index 6af4d551ffb54..021939466b241 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c @@ -589,7 +589,6 @@ static void rkisp1_hst_config(struct rkisp1_params *params, RKISP1_CIF_ISP_HIST_WEIGHT_22TO03, RKISP1_CIF_ISP_HIST_WEIGHT_13TO43, RKISP1_CIF_ISP_HIST_WEIGHT_04TO34, - RKISP1_CIF_ISP_HIST_WEIGHT_44, }; const u8 *weight; unsigned int i; @@ -622,6 +621,8 @@ static void rkisp1_hst_config(struct rkisp1_params *params, weight[2], weight[3]), hist_weight_regs[i]); + + rkisp1_write(params->rkisp1, weight[0] & 0x1F, RKISP1_CIF_ISP_HIST_WEIGHT_44); } static void diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h index 24f05d6d258f9..35aa82d5f6dd5 100644 --- a/include/uapi/linux/rkisp1-config.h +++ b/include/uapi/linux/rkisp1-config.h @@ -102,8 +102,7 @@ /* * Histogram calculation */ -/* Last 3 values unused. */ -#define RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE 28 +#define RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE 25 /* * Defect Pixel Cluster Correction -- GitLab From fc672d806bd77eff26117479e90ccdcfd2a8ecb4 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 21 Jan 2021 15:44:06 +0100 Subject: [PATCH 1615/2012] media: rockchip: rkisp1: carry ip version information The IP block evolved from its rk3288/rk3399 base and the vendor designates them with a numerical version. rk3399 for example is designated V10 probably meaning V1.0. There doesn't seem to be an actual version register we could read that information from, so allow the match_data to carry that information for future differentiation. Also carry that information in the hw_revision field of the media- controller API, so that userspace also has access to that. The added versions are: - V10: at least rk3288 + rk3399 - V11: seemingly unused as of now, but probably appeared in some soc - V12: at least rk3326 + px30 - V13: at least rk1808 [fix checkpatch warning don't use multiple blank lines] Signed-off-by: Heiko Stuebner Signed-off-by: Dafna Hirschfeld Reviewed-by: Ezequiel Garcia Acked-by: Helen Koike Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- Documentation/admin-guide/media/rkisp1.rst | 16 ++++++++++++++ .../platform/rockchip/rkisp1/rkisp1-dev.c | 21 +++++++++++-------- include/uapi/linux/rkisp1-config.h | 15 +++++++++++++ 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/media/rkisp1.rst b/Documentation/admin-guide/media/rkisp1.rst index 2267e4fb475e5..ccf418713623b 100644 --- a/Documentation/admin-guide/media/rkisp1.rst +++ b/Documentation/admin-guide/media/rkisp1.rst @@ -13,6 +13,22 @@ This file documents the driver for the Rockchip ISP1 that is part of RK3288 and RK3399 SoCs. The driver is located under drivers/staging/media/rkisp1 and uses the Media-Controller API. +Revisions +========= + +There exist multiple smaller revisions to this ISP that got introduced in +later SoCs. Revisions can be found in the enum :c:type:`rkisp1_cif_isp_version` +in the UAPI and the revision of the ISP inside the running SoC can be read +in the field hw_revision of struct media_device_info as returned by +ioctl MEDIA_IOC_DEVICE_INFO. + +Versions in use are: + +- RKISP1_V10: used at least in rk3288 and rk3399 +- RKISP1_V11: declared in the original vendor code, but not used +- RKISP1_V12: used at least in rk3326 and px30 +- RKISP1_V13: used at least in rk1808 + Topology ======== .. _rkisp1_topology_graph: diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c index 68da1eed753dc..f7e9fd3055485 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c @@ -104,6 +104,7 @@ struct rkisp1_match_data { const char * const *clks; unsigned int size; + enum rkisp1_cif_isp_version isp_ver; }; /* ---------------------------------------------------------------------------- @@ -411,15 +412,16 @@ static const char * const rk3399_isp_clks[] = { "hclk", }; -static const struct rkisp1_match_data rk3399_isp_clk_data = { +static const struct rkisp1_match_data rk3399_isp_match_data = { .clks = rk3399_isp_clks, .size = ARRAY_SIZE(rk3399_isp_clks), + .isp_ver = RKISP1_V10, }; static const struct of_device_id rkisp1_of_match[] = { { .compatible = "rockchip,rk3399-cif-isp", - .data = &rk3399_isp_clk_data, + .data = &rk3399_isp_match_data, }, {}, }; @@ -457,15 +459,15 @@ static void rkisp1_debug_init(struct rkisp1_device *rkisp1) static int rkisp1_probe(struct platform_device *pdev) { - const struct rkisp1_match_data *clk_data; + const struct rkisp1_match_data *match_data; struct device *dev = &pdev->dev; struct rkisp1_device *rkisp1; struct v4l2_device *v4l2_dev; unsigned int i; int ret, irq; - clk_data = of_device_get_match_data(&pdev->dev); - if (!clk_data) + match_data = of_device_get_match_data(&pdev->dev); + if (!match_data) return -ENODEV; rkisp1 = devm_kzalloc(dev, sizeof(*rkisp1), GFP_KERNEL); @@ -494,15 +496,16 @@ static int rkisp1_probe(struct platform_device *pdev) rkisp1->irq = irq; - for (i = 0; i < clk_data->size; i++) - rkisp1->clks[i].id = clk_data->clks[i]; - ret = devm_clk_bulk_get(dev, clk_data->size, rkisp1->clks); + for (i = 0; i < match_data->size; i++) + rkisp1->clks[i].id = match_data->clks[i]; + ret = devm_clk_bulk_get(dev, match_data->size, rkisp1->clks); if (ret) return ret; - rkisp1->clk_size = clk_data->size; + rkisp1->clk_size = match_data->size; pm_runtime_enable(&pdev->dev); + rkisp1->media_dev.hw_revision = match_data->isp_ver; strscpy(rkisp1->media_dev.model, RKISP1_DRIVER_NAME, sizeof(rkisp1->media_dev.model)); rkisp1->media_dev.dev = &pdev->dev; diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h index 35aa82d5f6dd5..bee4413fe0d35 100644 --- a/include/uapi/linux/rkisp1-config.h +++ b/include/uapi/linux/rkisp1-config.h @@ -123,6 +123,21 @@ #define RKISP1_CIF_ISP_STAT_AFM (1U << 2) #define RKISP1_CIF_ISP_STAT_HIST (1U << 3) +/** + * enum rkisp1_cif_isp_version - ISP variants + * + * @RKISP1_V10: used at least in rk3288 and rk3399 + * @RKISP1_V11: declared in the original vendor code, but not used + * @RKISP1_V12: used at least in rk3326 and px30 + * @RKISP1_V13: used at least in rk1808 + */ +enum rkisp1_cif_isp_version { + RKISP1_V10 = 10, + RKISP1_V11, + RKISP1_V12, + RKISP1_V13, +}; + enum rkisp1_cif_isp_histogram_mode { RKISP1_CIF_ISP_HISTOGRAM_MODE_DISABLE, RKISP1_CIF_ISP_HISTOGRAM_MODE_RGB_COMBINED, -- GitLab From ef357e02b6c420dc2d668ebf3165838c77358acd Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 21 Jan 2021 15:44:07 +0100 Subject: [PATCH 1616/2012] media: rockchip: rkisp1: extend uapi array sizes Later variants of the rkisp1 block use more entries in some arrays: RKISP1_CIF_ISP_AE_MEAN_MAX 25 -> 81 RKISP1_CIF_ISP_HIST_BIN_N_MAX 16 -> 32 RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES 17 -> 34 RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE 25 -> 81 and we can still extend the uapi during the 5.11-rc cycle, so do that now to be on the safe side. V10 and V11 only need the smaller sizes, while V12 and V13 needed the larger sizes. When adding the bigger sizes make sure, values filled from hardware values and transmitted to userspace don't leak kernel data by zeroing them beforehand. Signed-off-by: Heiko Stuebner Signed-off-by: Dafna Hirschfeld Reviewed-by: Ezequiel Garcia Acked-by: Helen Koike Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- .../platform/rockchip/rkisp1/rkisp1-params.c | 2 +- .../platform/rockchip/rkisp1/rkisp1-stats.c | 4 +- include/uapi/linux/rkisp1-config.h | 67 ++++++++++++++++--- 3 files changed, 60 insertions(+), 13 deletions(-) diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c index 021939466b241..aa5f45749543b 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c @@ -391,7 +391,7 @@ static void rkisp1_goc_config(struct rkisp1_params *params, RKISP1_CIF_ISP_CTRL_ISP_GAMMA_OUT_ENA); rkisp1_write(params->rkisp1, arg->mode, RKISP1_CIF_ISP_GAMMA_OUT_MODE); - for (i = 0; i < RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES; i++) + for (i = 0; i < RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V10; i++) rkisp1_write(params->rkisp1, arg->gamma_y[i], RKISP1_CIF_ISP_GAMMA_OUT_Y_0 + i * 4); } diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c index 3b2783700abc2..c1d07a2e8839f 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-stats.c @@ -203,7 +203,7 @@ static void rkisp1_stats_get_aec_meas(struct rkisp1_stats *stats, unsigned int i; pbuf->meas_type |= RKISP1_CIF_ISP_STAT_AUTOEXP; - for (i = 0; i < RKISP1_CIF_ISP_AE_MEAN_MAX; i++) + for (i = 0; i < RKISP1_CIF_ISP_AE_MEAN_MAX_V10; i++) pbuf->params.ae.exp_mean[i] = (u8)rkisp1_read(rkisp1, RKISP1_CIF_ISP_EXP_MEAN_00 + i * 4); @@ -233,7 +233,7 @@ static void rkisp1_stats_get_hst_meas(struct rkisp1_stats *stats, unsigned int i; pbuf->meas_type |= RKISP1_CIF_ISP_STAT_HIST; - for (i = 0; i < RKISP1_CIF_ISP_HIST_BIN_N_MAX; i++) { + for (i = 0; i < RKISP1_CIF_ISP_HIST_BIN_N_MAX_V10; i++) { u32 reg_val = rkisp1_read(rkisp1, RKISP1_CIF_ISP_HIST_BIN_0 + i * 4); pbuf->params.hist.hist_bins[i] = RKISP1_CIF_ISP_HIST_GET_BIN(reg_val); diff --git a/include/uapi/linux/rkisp1-config.h b/include/uapi/linux/rkisp1-config.h index bee4413fe0d35..36e3efb81b014 100644 --- a/include/uapi/linux/rkisp1-config.h +++ b/include/uapi/linux/rkisp1-config.h @@ -49,8 +49,14 @@ #define RKISP1_CIF_ISP_CTK_COEFF_MAX 0x100 #define RKISP1_CIF_ISP_CTK_OFFSET_MAX 0x800 -#define RKISP1_CIF_ISP_AE_MEAN_MAX 25 -#define RKISP1_CIF_ISP_HIST_BIN_N_MAX 16 +#define RKISP1_CIF_ISP_AE_MEAN_MAX_V10 25 +#define RKISP1_CIF_ISP_AE_MEAN_MAX_V12 81 +#define RKISP1_CIF_ISP_AE_MEAN_MAX RKISP1_CIF_ISP_AE_MEAN_MAX_V12 + +#define RKISP1_CIF_ISP_HIST_BIN_N_MAX_V10 16 +#define RKISP1_CIF_ISP_HIST_BIN_N_MAX_V12 32 +#define RKISP1_CIF_ISP_HIST_BIN_N_MAX RKISP1_CIF_ISP_HIST_BIN_N_MAX_V12 + #define RKISP1_CIF_ISP_AFM_MAX_WINDOWS 3 #define RKISP1_CIF_ISP_DEGAMMA_CURVE_SIZE 17 @@ -86,7 +92,9 @@ * Gamma out */ /* Maximum number of color samples supported */ -#define RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES 17 +#define RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V10 17 +#define RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V12 34 +#define RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V12 /* * Lens shade correction @@ -102,7 +110,9 @@ /* * Histogram calculation */ -#define RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE 25 +#define RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE_V10 25 +#define RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE_V12 81 +#define RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE_V12 /* * Defect Pixel Cluster Correction @@ -524,6 +534,15 @@ enum rkisp1_cif_isp_goc_mode { * * @mode: goc mode (from enum rkisp1_cif_isp_goc_mode) * @gamma_y: gamma out curve y-axis for all color components + * + * The number of entries of @gamma_y depends on the hardware revision + * as is reported by the hw_revision field of the struct media_device_info + * that is returned by ioctl MEDIA_IOC_DEVICE_INFO. + * + * Versions <= V11 have RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V10 + * entries, versions >= V12 have RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES_V12 + * entries. RKISP1_CIF_ISP_GAMMA_OUT_MAX_SAMPLES is equal to the maximum + * of the two. */ struct rkisp1_cif_isp_goc_config { __u32 mode; @@ -538,6 +557,15 @@ struct rkisp1_cif_isp_goc_config { * skipped * @meas_window: coordinates of the measure window * @hist_weight: weighting factor for sub-windows + * + * The number of entries of @hist_weight depends on the hardware revision + * as is reported by the hw_revision field of the struct media_device_info + * that is returned by ioctl MEDIA_IOC_DEVICE_INFO. + * + * Versions <= V11 have RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE_V10 + * entries, versions >= V12 have RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE_V12 + * entries. RKISP1_CIF_ISP_HISTOGRAM_WEIGHT_GRIDS_SIZE is equal to the maximum + * of the two. */ struct rkisp1_cif_isp_hst_config { __u32 mode; @@ -825,7 +853,15 @@ struct rkisp1_cif_isp_bls_meas_val { * @exp_mean: Mean luminance value of block xx * @bls_val: BLS measured values * - * Image is divided into 5x5 blocks. + * The number of entries of @exp_mean depends on the hardware revision + * as is reported by the hw_revision field of the struct media_device_info + * that is returned by ioctl MEDIA_IOC_DEVICE_INFO. + * + * Versions <= V11 have RKISP1_CIF_ISP_AE_MEAN_MAX_V10 entries, + * versions >= V12 have RKISP1_CIF_ISP_AE_MEAN_MAX_V12 entries. + * RKISP1_CIF_ISP_AE_MEAN_MAX is equal to the maximum of the two. + * + * Image is divided into 5x5 blocks on V10 and 9x9 blocks on V12. */ struct rkisp1_cif_isp_ae_stat { __u8 exp_mean[RKISP1_CIF_ISP_AE_MEAN_MAX]; @@ -862,11 +898,22 @@ struct rkisp1_cif_isp_af_stat { * type. Bits 0-4 are the fractional part and bits 5-19 are the * integer part. * - * The window of the measurements area is divided to 5x5 sub-windows. The - * histogram is then computed for each sub-window independently and the final - * result is a weighted average of the histogram measurements on all - * sub-windows. The window of the measurements area and the weight of each - * sub-window are configurable using struct @rkisp1_cif_isp_hst_config. + * The window of the measurements area is divided to 5x5 sub-windows for + * V10/V11 and to 9x9 sub-windows for V12. The histogram is then computed for + * each sub-window independently and the final result is a weighted average of + * the histogram measurements on all sub-windows. The window of the + * measurements area and the weight of each sub-window are configurable using + * struct @rkisp1_cif_isp_hst_config. + * + * The histogram contains 16 bins in V10/V11 and 32 bins in V12/V13. + * + * The number of entries of @hist_bins depends on the hardware revision + * as is reported by the hw_revision field of the struct media_device_info + * that is returned by ioctl MEDIA_IOC_DEVICE_INFO. + * + * Versions <= V11 have RKISP1_CIF_ISP_HIST_BIN_N_MAX_V10 entries, + * versions >= V12 have RKISP1_CIF_ISP_HIST_BIN_N_MAX_V12 entries. + * RKISP1_CIF_ISP_HIST_BIN_N_MAX is equal to the maximum of the two. */ struct rkisp1_cif_isp_hist_stat { __u32 hist_bins[RKISP1_CIF_ISP_HIST_BIN_N_MAX]; -- GitLab From a44092e326d403c7878018ba532369f84d31dbfa Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 20 Jan 2021 07:50:02 -0600 Subject: [PATCH 1617/2012] iommu/amd: Use IVHD EFR for early initialization of IOMMU features IOMMU Extended Feature Register (EFR) is used to communicate the supported features for each IOMMU to the IOMMU driver. This is normally read from the PCI MMIO register offset 0x30, and used by the iommu_feature() helper function. However, there are certain scenarios where the information is needed prior to PCI initialization, and the iommu_feature() function is used prematurely w/o warning. This has caused incorrect initialization of IOMMU. This is the case for the commit 6d39bdee238f ("iommu/amd: Enforce 4k mapping for certain IOMMU data structures") Since, the EFR is also available in the IVHD header, and is available to the driver prior to PCI initialization. Therefore, default to using the IVHD EFR instead. Fixes: 6d39bdee238f ("iommu/amd: Enforce 4k mapping for certain IOMMU data structures") Signed-off-by: Suravee Suthikulpanit Tested-by: Brijesh Singh Reviewed-by: Robert Richter Link: https://lore.kernel.org/r/20210120135002.2682-1-suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/amd_iommu.h | 7 ++-- drivers/iommu/amd/amd_iommu_types.h | 4 +++ drivers/iommu/amd/init.c | 56 +++++++++++++++++++++++++++-- 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 6b8cbdf717140..b4adab6985632 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -84,12 +84,9 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev) (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); } -static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) +static inline bool iommu_feature(struct amd_iommu *iommu, u64 mask) { - if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) - return false; - - return !!(iommu->features & f); + return !!(iommu->features & mask); } static inline u64 iommu_virt_to_phys(void *vaddr) diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 5535878277710..1a0495dd5fcbc 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -387,6 +387,10 @@ #define IOMMU_CAP_NPCACHE 26 #define IOMMU_CAP_EFR 27 +/* IOMMU IVINFO */ +#define IOMMU_IVINFO_OFFSET 36 +#define IOMMU_IVINFO_EFRSUP BIT(0) + /* IOMMU Feature Reporting Field (for IVHD type 10h */ #define IOMMU_FEAT_GASUP_SHIFT 6 diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 6a1f7048dacc6..83d8ab2aed9f4 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -257,6 +257,8 @@ static void init_device_table_dma(void); static bool amd_iommu_pre_enabled = true; +static u32 amd_iommu_ivinfo __initdata; + bool translation_pre_enabled(struct amd_iommu *iommu) { return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); @@ -296,6 +298,18 @@ int amd_iommu_get_num_iommus(void) return amd_iommus_present; } +/* + * For IVHD type 0x11/0x40, EFR is also available via IVHD. + * Default to IVHD EFR since it is available sooner + * (i.e. before PCI init). + */ +static void __init early_iommu_features_init(struct amd_iommu *iommu, + struct ivhd_header *h) +{ + if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) + iommu->features = h->efr_reg; +} + /* Access to l1 and l2 indexed register spaces */ static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) @@ -1577,6 +1591,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; + + early_iommu_features_init(iommu, h); + break; default: return -EINVAL; @@ -1770,6 +1787,35 @@ static const struct attribute_group *amd_iommu_groups[] = { NULL, }; +/* + * Note: IVHD 0x11 and 0x40 also contains exact copy + * of the IOMMU Extended Feature Register [MMIO Offset 0030h]. + * Default to EFR in IVHD since it is available sooner (i.e. before PCI init). + */ +static void __init late_iommu_features_init(struct amd_iommu *iommu) +{ + u64 features; + + if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) + return; + + /* read extended feature bits */ + features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); + + if (!iommu->features) { + iommu->features = features; + return; + } + + /* + * Sanity check and warn if EFR values from + * IVHD and MMIO conflict. + */ + if (features != iommu->features) + pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx\n).", + features, iommu->features); +} + static int __init iommu_init_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; @@ -1789,8 +1835,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) amd_iommu_iotlb_sup = false; - /* read extended feature bits */ - iommu->features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); + late_iommu_features_init(iommu); if (iommu_feature(iommu, FEATURE_GT)) { int glxval; @@ -2607,6 +2652,11 @@ static void __init free_dma_resources(void) free_unity_maps(); } +static void __init ivinfo_init(void *ivrs) +{ + amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET)); +} + /* * This is the hardware init function for AMD IOMMU in the system. * This function is called either from amd_iommu_init or from the interrupt @@ -2661,6 +2711,8 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; + ivinfo_init(ivrs_base); + amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); -- GitLab From 494b3688bb11a21af12e92a344a1313486693d47 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 19 Jan 2021 12:35:00 +0800 Subject: [PATCH 1618/2012] iommu/vt-d: Correctly check addr alignment in qi_flush_dev_iotlb_pasid() An incorrect address mask is being used in the qi_flush_dev_iotlb_pasid() to check the address alignment. This leads to a lot of spurious kernel warnings: [ 485.837093] DMAR: Invalidate non-aligned address 7f76f47f9000, order 0 [ 485.837098] DMAR: Invalidate non-aligned address 7f76f47f9000, order 0 [ 492.494145] qi_flush_dev_iotlb_pasid: 5734 callbacks suppressed [ 492.494147] DMAR: Invalidate non-aligned address 7f7728800000, order 11 [ 492.508965] DMAR: Invalidate non-aligned address 7f7728800000, order 11 Fix it by checking the alignment in right way. Fixes: 288d08e780088 ("iommu/vt-d: Handle non-page aligned address") Reported-and-tested-by: Guo Kaijie Signed-off-by: Lu Baolu Cc: Liu Yi L Link: https://lore.kernel.org/r/20210119043500.1539596-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 004feaed3c72c..02e7c10a4224b 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1496,7 +1496,7 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, * Max Invs Pending (MIP) is set to 0 for now until we have DIT in * ECAP. */ - if (addr & GENMASK_ULL(size_order + VTD_PAGE_SHIFT, 0)) + if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order)) pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n", addr, size_order); -- GitLab From 41c1a06d1d1544bed9692ba72a5692454eee1945 Mon Sep 17 00:00:00 2001 From: Yuxuan Shui Date: Sat, 23 Jan 2021 03:21:32 -0800 Subject: [PATCH 1619/2012] entry: Unbreak single step reporting behaviour The move of TIF_SYSCALL_EMU to SYSCALL_WORK_SYSCALL_EMU broke single step reporting. The original code reported the single step when TIF_SINGLESTEP was set and TIF_SYSCALL_EMU was not set. The SYSCALL_WORK conversion got the logic wrong and now the reporting only happens when both bits are set. Restore the original behaviour. [ tglx: Massaged changelog and dropped the pointless double negation ] Fixes: 64eb35f701f0 ("ptrace: Migrate TIF_SYSCALL_EMU to use SYSCALL_WORK flag") Signed-off-by: Yuxuan Shui Signed-off-by: Thomas Gleixner Reviewed-by: Gabriel Krisman Bertazi Link: https://lore.kernel.org/r/877do3gaq9.fsf@m5Zedd9JOGzJrf0 --- kernel/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 378341642f94c..6dd82be60df81 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -222,7 +222,7 @@ static inline bool report_single_step(unsigned long work) */ static inline bool report_single_step(unsigned long work) { - if (!(work & SYSCALL_WORK_SYSCALL_EMU)) + if (work & SYSCALL_WORK_SYSCALL_EMU) return false; return !!(current_thread_info()->flags & _TIF_SINGLESTEP); -- GitLab From 29b32839725f8c89a41cb6ee054c85f3116ea8b5 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 27 Jan 2021 09:53:17 -0800 Subject: [PATCH 1620/2012] iommu/vt-d: Do not use flush-queue when caching-mode is on When an Intel IOMMU is virtualized, and a physical device is passed-through to the VM, changes of the virtual IOMMU need to be propagated to the physical IOMMU. The hypervisor therefore needs to monitor PTE mappings in the IOMMU page-tables. Intel specifications provide "caching-mode" capability that a virtual IOMMU uses to report that the IOMMU is virtualized and a TLB flush is needed after mapping to allow the hypervisor to propagate virtual IOMMU mappings to the physical IOMMU. To the best of my knowledge no real physical IOMMU reports "caching-mode" as turned on. Synchronizing the virtual and the physical IOMMU tables is expensive if the hypervisor is unaware which PTEs have changed, as the hypervisor is required to walk all the virtualized tables and look for changes. Consequently, domain flushes are much more expensive than page-specific flushes on virtualized IOMMUs with passthrough devices. The kernel therefore exploited the "caching-mode" indication to avoid domain flushing and use page-specific flushing in virtualized environments. See commit 78d5f0f500e6 ("intel-iommu: Avoid global flushes with caching mode.") This behavior changed after commit 13cf01744608 ("iommu/vt-d: Make use of iova deferred flushing"). Now, when batched TLB flushing is used (the default), full TLB domain flushes are performed frequently, requiring the hypervisor to perform expensive synchronization between the virtual TLB and the physical one. Getting batched TLB flushes to use page-specific invalidations again in such circumstances is not easy, since the TLB invalidation scheme assumes that "full" domain TLB flushes are performed for scalability. Disable batched TLB flushes when caching-mode is on, as the performance benefit from using batched TLB invalidations is likely to be much smaller than the overhead of the virtual-to-physical IOMMU page-tables synchronization. Fixes: 13cf01744608 ("iommu/vt-d: Make use of iova deferred flushing") Signed-off-by: Nadav Amit Cc: David Woodhouse Cc: Lu Baolu Cc: Joerg Roedel Cc: Will Deacon Cc: stable@vger.kernel.org Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20210127175317.1600473-1-namit@vmware.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f665322a09919..06b00b5363d86 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5440,6 +5440,36 @@ intel_iommu_domain_set_attr(struct iommu_domain *domain, return ret; } +static bool domain_use_flush_queue(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + bool r = true; + + if (intel_iommu_strict) + return false; + + /* + * The flush queue implementation does not perform page-selective + * invalidations that are required for efficient TLB flushes in virtual + * environments. The benefit of batching is likely to be much lower than + * the overhead of synchronizing the virtual and physical IOMMU + * page-tables. + */ + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (!cap_caching_mode(iommu->cap)) + continue; + + pr_warn_once("IOMMU batching is disabled due to virtualization"); + r = false; + break; + } + rcu_read_unlock(); + + return r; +} + static int intel_iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr attr, void *data) @@ -5450,7 +5480,7 @@ intel_iommu_domain_get_attr(struct iommu_domain *domain, case IOMMU_DOMAIN_DMA: switch (attr) { case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: - *(int *)data = !intel_iommu_strict; + *(int *)data = domain_use_flush_queue(); return 0; default: return -ENODEV; -- GitLab From 6c635caef410aa757befbd8857c1eadde5cc22ed Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 28 Jan 2021 13:58:15 +0800 Subject: [PATCH 1621/2012] blk-cgroup: Use cond_resched() when destroy blkgs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On !PREEMPT kernel, we can get below softlockup when doing stress testing with creating and destroying block cgroup repeatly. The reason is it may take a long time to acquire the queue's lock in the loop of blkcg_destroy_blkgs(), or the system can accumulate a huge number of blkgs in pathological cases. We can add a need_resched() check on each loop and release locks and do cond_resched() if true to avoid this issue, since the blkcg_destroy_blkgs() is not called from atomic contexts. [ 4757.010308] watchdog: BUG: soft lockup - CPU#11 stuck for 94s! [ 4757.010698] Call trace: [ 4757.010700]  blkcg_destroy_blkgs+0x68/0x150 [ 4757.010701]  cgwb_release_workfn+0x104/0x158 [ 4757.010702]  process_one_work+0x1bc/0x3f0 [ 4757.010704]  worker_thread+0x164/0x468 [ 4757.010705]  kthread+0x108/0x138 Suggested-by: Tejun Heo Signed-off-by: Baolin Wang Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 031114d454a60..4221a1539391c 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1016,6 +1016,8 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) */ void blkcg_destroy_blkgs(struct blkcg *blkcg) { + might_sleep(); + spin_lock_irq(&blkcg->lock); while (!hlist_empty(&blkcg->blkg_list)) { @@ -1023,14 +1025,20 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg) struct blkcg_gq, blkcg_node); struct request_queue *q = blkg->q; - if (spin_trylock(&q->queue_lock)) { - blkg_destroy(blkg); - spin_unlock(&q->queue_lock); - } else { + if (need_resched() || !spin_trylock(&q->queue_lock)) { + /* + * Given that the system can accumulate a huge number + * of blkgs in pathological cases, check to see if we + * need to rescheduling to avoid softlockup. + */ spin_unlock_irq(&blkcg->lock); - cpu_relax(); + cond_resched(); spin_lock_irq(&blkcg->lock); + continue; } + + blkg_destroy(blkg); + spin_unlock(&q->queue_lock); } spin_unlock_irq(&blkcg->lock); -- GitLab From 0fe37724f8e70fa4cb72948f60fca553702df768 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 28 Jan 2021 15:36:19 +0900 Subject: [PATCH 1622/2012] block: fix bd_size_lock use Some block device drivers, e.g. the skd driver, call set_capacity() with IRQ disabled. This results in lockdep ito complain about inconsistent lock states ("inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage") because set_capacity takes a block device bd_size_lock using the functions spin_lock() and spin_unlock(). Ensure a consistent locking state by replacing these calls with spin_lock_irqsave() and spin_lock_irqrestore(). The same applies to bdev_set_nr_sectors(). With this fix, all lockdep complaints are resolved. Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/genhd.c | 5 +++-- block/partitions/core.c | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 419548e92d82f..9e741a4f351be 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -45,10 +45,11 @@ static void disk_release_events(struct gendisk *disk); void set_capacity(struct gendisk *disk, sector_t sectors) { struct block_device *bdev = disk->part0; + unsigned long flags; - spin_lock(&bdev->bd_size_lock); + spin_lock_irqsave(&bdev->bd_size_lock, flags); i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - spin_unlock(&bdev->bd_size_lock); + spin_unlock_irqrestore(&bdev->bd_size_lock, flags); } EXPORT_SYMBOL(set_capacity); diff --git a/block/partitions/core.c b/block/partitions/core.c index 23460cee9de50..4601a845cd79e 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -88,9 +88,11 @@ static int (*check_part[])(struct parsed_partitions *) = { static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) { - spin_lock(&bdev->bd_size_lock); + unsigned long flags; + + spin_lock_irqsave(&bdev->bd_size_lock, flags); i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - spin_unlock(&bdev->bd_size_lock); + spin_unlock_irqrestore(&bdev->bd_size_lock, flags); } static struct parsed_partitions *allocate_partitions(struct gendisk *hd) -- GitLab From 0df28cad06eb41cc36bfea69d9c882fb567fd0d6 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 28 Jan 2021 18:48:47 +0800 Subject: [PATCH 1623/2012] bcache: only check feature sets when sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES For super block version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES, it doesn't make sense to check the feature sets. This patch checks super block version in bch_has_feature_* routines, if the version doesn't have feature sets yet, returns 0 (false) to the caller. Fixes: 5342fd425502 ("bcache: set bcache device into read-only mode for BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET") Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket") Cc: stable@vger.kernel.org # 5.9+ Reported-and-tested-by: Bockholdt Arne Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/features.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h index 84fc2c0f01015..d1c8fd3977fc6 100644 --- a/drivers/md/bcache/features.h +++ b/drivers/md/bcache/features.h @@ -33,6 +33,8 @@ #define BCH_FEATURE_COMPAT_FUNCS(name, flagname) \ static inline int bch_has_feature_##name(struct cache_sb *sb) \ { \ + if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \ + return 0; \ return (((sb)->feature_compat & \ BCH##_FEATURE_COMPAT_##flagname) != 0); \ } \ @@ -50,6 +52,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ #define BCH_FEATURE_RO_COMPAT_FUNCS(name, flagname) \ static inline int bch_has_feature_##name(struct cache_sb *sb) \ { \ + if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \ + return 0; \ return (((sb)->feature_ro_compat & \ BCH##_FEATURE_RO_COMPAT_##flagname) != 0); \ } \ @@ -67,6 +71,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ #define BCH_FEATURE_INCOMPAT_FUNCS(name, flagname) \ static inline int bch_has_feature_##name(struct cache_sb *sb) \ { \ + if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \ + return 0; \ return (((sb)->feature_incompat & \ BCH##_FEATURE_INCOMPAT_##flagname) != 0); \ } \ -- GitLab From 19d51588125fb7abe258e85b412710486a3d9219 Mon Sep 17 00:00:00 2001 From: Adam Harvey Date: Wed, 27 Jan 2021 13:44:35 -0800 Subject: [PATCH 1624/2012] cifs: ignore auto and noauto options if given In 24e0a1eff9e2, the noauto and auto options were missed when migrating to the new mount API. As a result, users with noauto in their fstab mount options are now unable to mount cifs filesystems, as they'll receive an "Unknown parameter" error. This restores the old behaviour of ignoring noauto and auto if they're given. Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api") Signed-off-by: Adam Harvey Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/fs_context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 076bcadc756a7..62818b142e2e8 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -175,6 +175,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = { fsparam_flag_no("exec", Opt_ignore), fsparam_flag_no("dev", Opt_ignore), fsparam_flag_no("mand", Opt_ignore), + fsparam_flag_no("auto", Opt_ignore), fsparam_string("cred", Opt_ignore), fsparam_string("credentials", Opt_ignore), {} -- GitLab From 757fed1d0898b893d7daa84183947c70f27632f3 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Thu, 28 Jan 2021 19:32:50 +0800 Subject: [PATCH 1625/2012] Revert "mm/slub: fix a memory leak in sysfs_slab_add()" This reverts commit dde3c6b72a16c2db826f54b2d49bdea26c3534a2. syzbot report a double-free bug. The following case can cause this bug. - mm/slab_common.c: create_cache(): if the __kmem_cache_create() fails, it does: out_free_cache: kmem_cache_free(kmem_cache, s); - but __kmem_cache_create() - at least for slub() - will have done sysfs_slab_add(s) -> sysfs_create_group() .. fails .. -> kobject_del(&s->kobj); .. which frees s ... We can't remove the kmem_cache_free() in create_cache(), because other error cases of __kmem_cache_create() do not free this. So, revert the commit dde3c6b72a16 ("mm/slub: fix a memory leak in sysfs_slab_add()") to fix this. Reported-by: syzbot+d0bd96b4696c1ef67991@syzkaller.appspotmail.com Fixes: dde3c6b72a16 ("mm/slub: fix a memory leak in sysfs_slab_add()") Acked-by: Vlastimil Babka Signed-off-by: Wang Hai Cc: Signed-off-by: Linus Torvalds --- mm/slub.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 69742ab9a21d3..7ecbbbe5bc0c1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5625,10 +5625,8 @@ static int sysfs_slab_add(struct kmem_cache *s) s->kobj.kset = kset; err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name); - if (err) { - kobject_put(&s->kobj); + if (err) goto out; - } err = sysfs_create_group(&s->kobj, &slab_attr_group); if (err) -- GitLab From c9b8cd6a39c48b2827c0925b648b221b5f8ef25d Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 28 Jan 2021 11:20:22 -0600 Subject: [PATCH 1626/2012] cifs: fix mounts to subdirectories of target The "prefixpath" mount option needs to be ignored which was missed in the recent conversion to the new mount API (prefixpath would be set by the mount helper if mounting a subdirectory of the root of a share e.g. //server/share/subdir) Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api") Suggested-by: Ronnie Sahlberg Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg --- fs/cifs/fs_context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 62818b142e2e8..818c413db82d2 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -178,6 +178,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = { fsparam_flag_no("auto", Opt_ignore), fsparam_string("cred", Opt_ignore), fsparam_string("credentials", Opt_ignore), + fsparam_string("prefixpath", Opt_ignore), {} }; -- GitLab From 0391a45c800780f0aa33984542710451acd71b90 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Jan 2021 18:35:25 +0100 Subject: [PATCH 1627/2012] nl80211: call cfg80211_dev_rename() under RTNL This is required, and we have an assertion, move the RTNL unlock down to cover cfg80211_dev_rename(). Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") Reported-by: syzbot+ed107c5fa3e21cdcd86e@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20210128183454.16a4ea036740.I2beeb391dc322d6f1df3ee2612e714e0a7817397@changeid Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e5e9d889f00f9..3b45a9593e71a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3220,7 +3220,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) wdev = netdev->ieee80211_ptr; wiphy_lock(&rdev->wiphy); - rtnl_unlock(); /* * end workaround code, by now the rdev is available @@ -3230,6 +3229,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_WIPHY_NAME]) result = cfg80211_dev_rename( rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); + rtnl_unlock(); if (result) goto out; -- GitLab From bae173563cbf469533d0c38534b874d4a2df0c85 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Jan 2021 18:35:26 +0100 Subject: [PATCH 1628/2012] wext: call cfg80211_change_iface() with wiphy lock held This is needed now that all the driver callbacks are protected by the wiphy lock rather than (just) the RTNL. Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") Reported-by: syzbot+d2d412349f88521938aa@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20210128183454.e81bc6789b4b.I5deb8b6bfdc8b4ea7696cb2447ee6c58c7ce9a4e@changeid Signed-off-by: Johannes Berg --- net/wireless/wext-compat.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 2e35cb78221ee..0c6ea62124963 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -39,6 +39,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, struct cfg80211_registered_device *rdev; struct vif_params vifparams; enum nl80211_iftype type; + int ret; rdev = wiphy_to_rdev(wdev->wiphy); @@ -61,7 +62,11 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, memset(&vifparams, 0, sizeof(vifparams)); - return cfg80211_change_iface(rdev, dev, type, &vifparams); + wiphy_lock(wdev->wiphy); + ret = cfg80211_change_iface(rdev, dev, type, &vifparams); + wiphy_unlock(wdev->wiphy); + + return ret; } EXPORT_WEXT_HANDLER(cfg80211_wext_siwmode); -- GitLab From c88f952026ab0b860451bdc88f43d73124a7302a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Jan 2021 18:35:27 +0100 Subject: [PATCH 1629/2012] wext: call cfg80211_set_encryption() with wiphy lock held Similar to the previous commit, we need to hold the wiphy lock here. There's a second instance that is correct already, fix this one as well. Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") Link: https://lore.kernel.org/r/20210128183454.ea2f086465ed.I891d3bb44f068e6d97c160005010f052f28ab6e5@changeid Signed-off-by: Johannes Berg --- net/wireless/wext-compat.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 0c6ea62124963..a8320dc59af7e 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -655,6 +655,7 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev, bool remove = false; struct key_params params; u32 cipher; + int ret; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_ADHOC) @@ -726,12 +727,16 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev, params.seq_len = 6; } - return cfg80211_set_encryption( + wiphy_lock(wdev->wiphy); + ret = cfg80211_set_encryption( rdev, dev, !(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY), addr, remove, ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, idx, ¶ms); + wiphy_unlock(wdev->wiphy); + + return ret; } static int cfg80211_wext_giwencode(struct net_device *dev, -- GitLab From 776a39b8196dbca4afb69669db0d9926ffac29ab Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 28 Jan 2021 18:35:28 +0100 Subject: [PATCH 1630/2012] cfg80211: call cfg80211_destroy_ifaces() with wiphy lock held This is needed since it calls into the driver, which must have the same context as if we got to destroy an interface through nl80211. Fix this, and add a direct lockdep assertion so we don't see it pop up only when the driver calls back to cfg80211. Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") Reported-by: syzbot+4305e814f9b267131776@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20210128183454.d31df9cbd7ce.I1beb07c9492f0ade900e864a098c57041e7a7ebf@changeid Signed-off-by: Johannes Berg --- net/wireless/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/wireless/core.c b/net/wireless/core.c index 200cd9f5fd5f0..18f9a5c214b59 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -334,6 +334,7 @@ void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) struct wireless_dev *wdev, *tmp; ASSERT_RTNL(); + lockdep_assert_wiphy(&rdev->wiphy); list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) { if (wdev->nl_owner_dead) @@ -349,7 +350,9 @@ static void cfg80211_destroy_iface_wk(struct work_struct *work) destroy_work); rtnl_lock(); + wiphy_lock(&rdev->wiphy); cfg80211_destroy_ifaces(rdev); + wiphy_unlock(&rdev->wiphy); rtnl_unlock(); } -- GitLab From 899199292b14b7c735808a37517de4dd2160c300 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 25 Jan 2021 21:19:16 -0800 Subject: [PATCH 1631/2012] nvme-pci: add the DISABLE_WRITE_ZEROES quirk for a SPCC device This adds a quirk for SPCC 256GB NVMe 1.3 drive which fixes timeouts and I/O errors due to the fact that the controller does not properly handle the Write Zeroes command: [ 2745.659527] CPU: 2 PID: 0 Comm: swapper/2 Tainted: G E 5.10.6-BET #1 [ 2745.659528] Hardware name: System manufacturer System Product Name/PRIME X570-P, BIOS 3001 12/04/2020 [ 2776.138874] nvme nvme1: I/O 414 QID 3 timeout, aborting [ 2776.138886] nvme nvme1: I/O 415 QID 3 timeout, aborting [ 2776.138891] nvme nvme1: I/O 416 QID 3 timeout, aborting [ 2776.138895] nvme nvme1: I/O 417 QID 3 timeout, aborting [ 2776.138912] nvme nvme1: Abort status: 0x0 [ 2776.138921] nvme nvme1: I/O 428 QID 3 timeout, aborting [ 2776.138922] nvme nvme1: Abort status: 0x0 [ 2776.138925] nvme nvme1: Abort status: 0x0 [ 2776.138974] nvme nvme1: Abort status: 0x0 [ 2776.138977] nvme nvme1: Abort status: 0x0 [ 2806.346792] nvme nvme1: I/O 414 QID 3 timeout, reset controller [ 2806.363566] nvme nvme1: 15/0/0 default/read/poll queues [ 2836.554298] nvme nvme1: I/O 415 QID 3 timeout, disable controller [ 2836.672064] blk_update_request: I/O error, dev nvme1n1, sector 16350 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672072] blk_update_request: I/O error, dev nvme1n1, sector 16093 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672074] blk_update_request: I/O error, dev nvme1n1, sector 15836 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672076] blk_update_request: I/O error, dev nvme1n1, sector 15579 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672078] blk_update_request: I/O error, dev nvme1n1, sector 15322 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672080] blk_update_request: I/O error, dev nvme1n1, sector 15065 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672082] blk_update_request: I/O error, dev nvme1n1, sector 14808 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672083] blk_update_request: I/O error, dev nvme1n1, sector 14551 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672085] blk_update_request: I/O error, dev nvme1n1, sector 14294 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672087] blk_update_request: I/O error, dev nvme1n1, sector 14037 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672121] nvme nvme1: failed to mark controller live state [ 2836.672123] nvme nvme1: Removing after probe failure status: -19 [ 2836.689016] Aborting journal on device dm-0-8. [ 2836.689024] Buffer I/O error on dev dm-0, logical block 25198592, lost sync page write [ 2836.689027] JBD2: Error -5 detected when updating journal superblock for dm-0-8. Reported-by: Bradley Chapman Signed-off-by: Chaitanya Kulkarni Tested-by: Bradley Chapman Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 856aa31931c14..81e6389b20420 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3257,6 +3257,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x1d97, 0x2263), /* SPCC */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001), .driver_data = NVME_QUIRK_SINGLE_VECTOR }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, -- GitLab From d1bcf006a9d3d63c1bcb65a993cb13756954cd9c Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 27 Jan 2021 11:30:33 +0100 Subject: [PATCH 1632/2012] nvme-multipath: Early exit if no path is available nvme_round_robin_path() should test if the return ns pointer is valid. nvme_next_ns() will return a NULL pointer if there is no path left. Fixes: 75c10e732724 ("nvme-multipath: round-robin I/O policy") Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 9ac762b288112..282b7a4ea9a9a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -221,7 +221,7 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head, } for (ns = nvme_next_ns(head, old); - ns != old; + ns && ns != old; ns = nvme_next_ns(head, ns)) { if (nvme_path_is_disabled(ns)) continue; -- GitLab From 772ea326a4a00b6b4b2c8f3606ad10c31f46c511 Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Thu, 28 Jan 2021 11:33:51 +0800 Subject: [PATCH 1633/2012] nvme-core: use list_add_tail_rcu instead of list_add_tail for nvme_init_ns_head The "list" of nvme_ns_head is used as rcu list, now in nvme_init_ns_head list_add_tail is used to add ns->siblings to the rcu list. It is not safe. Should use list_add_tail_rcu instead of list_add_tail. Signed-off-by: Chao Leng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8caf9b34734dc..f13eb4ded95fa 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3829,7 +3829,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, } } - list_add_tail(&ns->siblings, &head->list); + list_add_tail_rcu(&ns->siblings, &head->list); ns->head = head; mutex_unlock(&ctrl->subsys->lock); return 0; -- GitLab From a119f87b86bcdf14a18ce39a899e97a1e9160f7f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 28 Jan 2021 13:28:59 -0500 Subject: [PATCH 1634/2012] Revert "drm/amdgpu/swsmu: drop set_fan_speed_percent (v2)" On some boards the rpm interface apparently does not work at all leading to the fan not spinning or spinning at strange speeds. Revert this for now to fix 5.10, 5.11. The follow on patch fixes this properly for 5.12. This reverts commit 8d6e65adc25e23fabbc5293b6cd320195c708dca. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1408 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h | 1 + drivers/gpu/drm/amd/pm/inc/smu_v11_0.h | 3 ++ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 9 ++---- .../gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 1 + .../gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 1 + .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 1 + .../gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 31 ++++++++++++++++++- 7 files changed, 39 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h index 4bdbcce7092d1..0d797fa9f5cc6 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h @@ -553,6 +553,7 @@ struct pptable_funcs { *clock_req); uint32_t (*get_fan_control_mode)(struct smu_context *smu); int (*set_fan_control_mode)(struct smu_context *smu, uint32_t mode); + int (*set_fan_speed_percent)(struct smu_context *smu, uint32_t speed); int (*set_fan_speed_rpm)(struct smu_context *smu, uint32_t speed); int (*set_xgmi_pstate)(struct smu_context *smu, uint32_t pstate); int (*gfx_off_control)(struct smu_context *smu, bool enable); diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h index 13de692a42136..5d0b29653ffa1 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h @@ -203,6 +203,9 @@ int smu_v11_0_set_fan_control_mode(struct smu_context *smu, uint32_t mode); +int +smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed); + int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t speed); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 8b867a6d52b57..e84c737e39673 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2151,19 +2151,14 @@ int smu_get_fan_speed_percent(struct smu_context *smu, uint32_t *speed) int smu_set_fan_speed_percent(struct smu_context *smu, uint32_t speed) { int ret = 0; - uint32_t rpm; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) return -EOPNOTSUPP; mutex_lock(&smu->mutex); - if (smu->ppt_funcs->set_fan_speed_rpm) { - if (speed > 100) - speed = 100; - rpm = speed * smu->fan_max_rpm / 100; - ret = smu->ppt_funcs->set_fan_speed_rpm(smu, rpm); - } + if (smu->ppt_funcs->set_fan_speed_percent) + ret = smu->ppt_funcs->set_fan_speed_percent(smu, speed); mutex_unlock(&smu->mutex); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c index cd7b411457ffa..16db0b506b0de 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c @@ -2326,6 +2326,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, .get_fan_control_mode = smu_v11_0_get_fan_control_mode, .set_fan_control_mode = smu_v11_0_set_fan_control_mode, + .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, .gfx_off_control = smu_v11_0_gfx_off_control, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 51e83123f72a1..cd7efa923195e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -2456,6 +2456,7 @@ static const struct pptable_funcs navi10_ppt_funcs = { .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, .get_fan_control_mode = smu_v11_0_get_fan_control_mode, .set_fan_control_mode = smu_v11_0_set_fan_control_mode, + .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, .gfx_off_control = smu_v11_0_gfx_off_control, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 12b36eb0ff6a5..d68d3dfee51d4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -2802,6 +2802,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .display_clock_voltage_request = smu_v11_0_display_clock_voltage_request, .get_fan_control_mode = smu_v11_0_get_fan_control_mode, .set_fan_control_mode = smu_v11_0_set_fan_control_mode, + .set_fan_speed_percent = smu_v11_0_set_fan_speed_percent, .set_fan_speed_rpm = smu_v11_0_set_fan_speed_rpm, .set_xgmi_pstate = smu_v11_0_set_xgmi_pstate, .gfx_off_control = smu_v11_0_gfx_off_control, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index b279dbbbce6ba..5aeb5f5a04478 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1173,6 +1173,35 @@ smu_v11_0_set_fan_static_mode(struct smu_context *smu, uint32_t mode) return 0; } +int +smu_v11_0_set_fan_speed_percent(struct smu_context *smu, uint32_t speed) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t duty100, duty; + uint64_t tmp64; + + if (speed > 100) + speed = 100; + + if (smu_v11_0_auto_fan_control(smu, 0)) + return -EINVAL; + + duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1), + CG_FDO_CTRL1, FMAX_DUTY100); + if (!duty100) + return -EINVAL; + + tmp64 = (uint64_t)speed * duty100; + do_div(tmp64, 100); + duty = (uint32_t)tmp64; + + WREG32_SOC15(THM, 0, mmCG_FDO_CTRL0, + REG_SET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL0), + CG_FDO_CTRL0, FDO_STATIC_DUTY, duty)); + + return smu_v11_0_set_fan_static_mode(smu, FDO_PWM_MODE_STATIC); +} + int smu_v11_0_set_fan_control_mode(struct smu_context *smu, uint32_t mode) @@ -1181,7 +1210,7 @@ smu_v11_0_set_fan_control_mode(struct smu_context *smu, switch (mode) { case AMD_FAN_CTRL_NONE: - ret = smu_v11_0_set_fan_speed_rpm(smu, smu->fan_max_rpm); + ret = smu_v11_0_set_fan_speed_percent(smu, 100); break; case AMD_FAN_CTRL_MANUAL: ret = smu_v11_0_auto_fan_control(smu, 0); -- GitLab From 00190bc087e795290502dc51c5d32de85cb2c2b8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Jan 2021 13:23:20 +0100 Subject: [PATCH 1635/2012] amdgpu: fix clang build warning clang warns about the -mhard-float command line arguments on architectures that do not support this: clang: error: argument unused during compilation: '-mhard-float' [-Werror,-Wunused-command-line-argument] Move this into the gcc-specific arguments. Fixes: e77165bf7b02 ("drm/amd/display: Add DCN3 blocks to Makefile") Signed-off-by: Arnd Bergmann Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 6 ++++-- drivers/gpu/drm/amd/display/dc/dcn301/Makefile | 3 ++- drivers/gpu/drm/amd/display/dc/dcn302/Makefile | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index c20331eb62e01..dfd77b3cc84d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -32,8 +32,8 @@ DCN30 = dcn30_init.o dcn30_hubbub.o dcn30_hubp.o dcn30_dpp.o dcn30_optc.o \ ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -msse -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -msse +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -msse +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -msse endif ifdef CONFIG_PPC64 @@ -45,6 +45,8 @@ ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 endif +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -mhard-float +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -mhard-float endif ifdef CONFIG_X86 diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile index 3ca7d911d25c4..09264716d1dc9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile @@ -14,7 +14,7 @@ DCN301 = dcn301_init.o dcn301_resource.o dcn301_dccg.o \ dcn301_dio_link_encoder.o dcn301_hwseq.o dcn301_panel_cntl.o dcn301_hubbub.o ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o := -mhard-float -msse +CFLAGS_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o := -msse endif ifdef CONFIG_PPC64 @@ -25,6 +25,7 @@ ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 endif +CFLAGS_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o += -mhard-float endif ifdef CONFIG_X86 diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile index 8d4924b7dc221..101620a8867aa 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile @@ -13,7 +13,7 @@ DCN3_02 = dcn302_init.o dcn302_hwseq.o dcn302_resource.o ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mhard-float -msse +CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -msse endif ifdef CONFIG_PPC64 @@ -24,6 +24,7 @@ ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 endif +CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o += -mhard-float endif ifdef CONFIG_X86 -- GitLab From f609cbb8911e40e15f9055e8f945f926ac906924 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 28 Jan 2021 18:39:24 +0000 Subject: [PATCH 1636/2012] io_uring: fix list corruption for splice file_get kernel BUG at lib/list_debug.c:29! Call Trace: __list_add include/linux/list.h:67 [inline] list_add include/linux/list.h:86 [inline] io_file_get+0x8cc/0xdb0 fs/io_uring.c:6466 __io_splice_prep+0x1bc/0x530 fs/io_uring.c:3866 io_splice_prep fs/io_uring.c:3920 [inline] io_req_prep+0x3546/0x4e80 fs/io_uring.c:6081 io_queue_sqe+0x609/0x10d0 fs/io_uring.c:6628 io_submit_sqe fs/io_uring.c:6705 [inline] io_submit_sqes+0x1495/0x2720 fs/io_uring.c:6953 __do_sys_io_uring_enter+0x107d/0x1f30 fs/io_uring.c:9353 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 io_file_get() may be called from splice, and so REQ_F_INFLIGHT may already be set. Fixes: 02a13674fa0e8 ("io_uring: account io_uring internal files as REQ_F_INFLIGHT") Cc: stable@vger.kernel.org # 5.9+ Reported-by: syzbot+6879187cf57845801267@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ae388cc528436..39ae1f821cef8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6460,7 +6460,8 @@ static struct file *io_file_get(struct io_submit_state *state, file = __io_file_get(state, fd); } - if (file && file->f_op == &io_uring_fops) { + if (file && file->f_op == &io_uring_fops && + !(req->flags & REQ_F_INFLIGHT)) { io_req_init_async(req); req->flags |= REQ_F_INFLIGHT; -- GitLab From 70b2c60d3797bffe182dddb9bb55975b9be5889a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 28 Jan 2021 18:39:25 +0000 Subject: [PATCH 1637/2012] io_uring: fix sqo ownership false positive warning WARNING: CPU: 0 PID: 21359 at fs/io_uring.c:9042 io_uring_cancel_task_requests+0xe55/0x10c0 fs/io_uring.c:9042 Call Trace: io_uring_flush+0x47b/0x6e0 fs/io_uring.c:9227 filp_close+0xb4/0x170 fs/open.c:1295 close_files fs/file.c:403 [inline] put_files_struct fs/file.c:418 [inline] put_files_struct+0x1cc/0x350 fs/file.c:415 exit_files+0x7e/0xa0 fs/file.c:435 do_exit+0xc22/0x2ae0 kernel/exit.c:820 do_group_exit+0x125/0x310 kernel/exit.c:922 get_signal+0x427/0x20f0 kernel/signal.c:2773 arch_do_signal_or_restart+0x2a8/0x1eb0 arch/x86/kernel/signal.c:811 handle_signal_work kernel/entry/common.c:147 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x148/0x250 kernel/entry/common.c:201 __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline] syscall_exit_to_user_mode+0x19/0x50 kernel/entry/common.c:302 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Now io_uring_cancel_task_requests() can be called not through file notes but directly, remove a WARN_ONCE() there that give us false positives. That check is not very important and we catch it in other places. Fixes: 84965ff8a84f0 ("io_uring: if we see flush on exit, cancel related tasks") Cc: stable@vger.kernel.org # 5.9+ Reported-by: syzbot+3e3d9bd0c6ce9efbc3ef@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 39ae1f821cef8..12bf7180c0f10 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8967,8 +8967,6 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, struct task_struct *task = current; if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { - /* for SQPOLL only sqo_task has task notes */ - WARN_ON_ONCE(ctx->sqo_task != current); io_disable_sqo_submit(ctx); task = ctx->sq_data->thread; atomic_inc(&task->io_uring->in_idle); -- GitLab From bbc20b70424aeb3c84f833860f6340adda5141fc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Jan 2021 07:27:31 -0800 Subject: [PATCH 1638/2012] net: reduce indentation level in sk_clone_lock() Rework initial test to jump over init code if memory allocation has failed. Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20210127152731.748663-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/core/sock.c | 189 ++++++++++++++++++++++++------------------------ 1 file changed, 93 insertions(+), 96 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index bbcd4b97eddd1..4600e58828da6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1876,123 +1876,120 @@ static void sk_init_common(struct sock *sk) struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) { struct proto *prot = READ_ONCE(sk->sk_prot); - struct sock *newsk; + struct sk_filter *filter; bool is_charged = true; + struct sock *newsk; newsk = sk_prot_alloc(prot, priority, sk->sk_family); - if (newsk != NULL) { - struct sk_filter *filter; + if (!newsk) + goto out; - sock_copy(newsk, sk); + sock_copy(newsk, sk); - newsk->sk_prot_creator = prot; + newsk->sk_prot_creator = prot; - /* SANITY */ - if (likely(newsk->sk_net_refcnt)) - get_net(sock_net(newsk)); - sk_node_init(&newsk->sk_node); - sock_lock_init(newsk); - bh_lock_sock(newsk); - newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; - newsk->sk_backlog.len = 0; + /* SANITY */ + if (likely(newsk->sk_net_refcnt)) + get_net(sock_net(newsk)); + sk_node_init(&newsk->sk_node); + sock_lock_init(newsk); + bh_lock_sock(newsk); + newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; + newsk->sk_backlog.len = 0; - atomic_set(&newsk->sk_rmem_alloc, 0); - /* - * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) - */ - refcount_set(&newsk->sk_wmem_alloc, 1); - atomic_set(&newsk->sk_omem_alloc, 0); - sk_init_common(newsk); + atomic_set(&newsk->sk_rmem_alloc, 0); - newsk->sk_dst_cache = NULL; - newsk->sk_dst_pending_confirm = 0; - newsk->sk_wmem_queued = 0; - newsk->sk_forward_alloc = 0; - atomic_set(&newsk->sk_drops, 0); - newsk->sk_send_head = NULL; - newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; - atomic_set(&newsk->sk_zckey, 0); + /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */ + refcount_set(&newsk->sk_wmem_alloc, 1); - sock_reset_flag(newsk, SOCK_DONE); + atomic_set(&newsk->sk_omem_alloc, 0); + sk_init_common(newsk); - /* sk->sk_memcg will be populated at accept() time */ - newsk->sk_memcg = NULL; + newsk->sk_dst_cache = NULL; + newsk->sk_dst_pending_confirm = 0; + newsk->sk_wmem_queued = 0; + newsk->sk_forward_alloc = 0; + atomic_set(&newsk->sk_drops, 0); + newsk->sk_send_head = NULL; + newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; + atomic_set(&newsk->sk_zckey, 0); - cgroup_sk_clone(&newsk->sk_cgrp_data); + sock_reset_flag(newsk, SOCK_DONE); - rcu_read_lock(); - filter = rcu_dereference(sk->sk_filter); - if (filter != NULL) - /* though it's an empty new sock, the charging may fail - * if sysctl_optmem_max was changed between creation of - * original socket and cloning - */ - is_charged = sk_filter_charge(newsk, filter); - RCU_INIT_POINTER(newsk->sk_filter, filter); - rcu_read_unlock(); + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; - if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { - /* We need to make sure that we don't uncharge the new - * socket if we couldn't charge it in the first place - * as otherwise we uncharge the parent's filter. - */ - if (!is_charged) - RCU_INIT_POINTER(newsk->sk_filter, NULL); - sk_free_unlock_clone(newsk); - newsk = NULL; - goto out; - } - RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); + cgroup_sk_clone(&newsk->sk_cgrp_data); - if (bpf_sk_storage_clone(sk, newsk)) { - sk_free_unlock_clone(newsk); - newsk = NULL; - goto out; - } + rcu_read_lock(); + filter = rcu_dereference(sk->sk_filter); + if (filter != NULL) + /* though it's an empty new sock, the charging may fail + * if sysctl_optmem_max was changed between creation of + * original socket and cloning + */ + is_charged = sk_filter_charge(newsk, filter); + RCU_INIT_POINTER(newsk->sk_filter, filter); + rcu_read_unlock(); - /* Clear sk_user_data if parent had the pointer tagged - * as not suitable for copying when cloning. + if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { + /* We need to make sure that we don't uncharge the new + * socket if we couldn't charge it in the first place + * as otherwise we uncharge the parent's filter. */ - if (sk_user_data_is_nocopy(newsk)) - newsk->sk_user_data = NULL; + if (!is_charged) + RCU_INIT_POINTER(newsk->sk_filter, NULL); + sk_free_unlock_clone(newsk); + newsk = NULL; + goto out; + } + RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); - newsk->sk_err = 0; - newsk->sk_err_soft = 0; - newsk->sk_priority = 0; - newsk->sk_incoming_cpu = raw_smp_processor_id(); - if (likely(newsk->sk_net_refcnt)) - sock_inuse_add(sock_net(newsk), 1); + if (bpf_sk_storage_clone(sk, newsk)) { + sk_free_unlock_clone(newsk); + newsk = NULL; + goto out; + } - /* - * Before updating sk_refcnt, we must commit prior changes to memory - * (Documentation/RCU/rculist_nulls.rst for details) - */ - smp_wmb(); - refcount_set(&newsk->sk_refcnt, 2); + /* Clear sk_user_data if parent had the pointer tagged + * as not suitable for copying when cloning. + */ + if (sk_user_data_is_nocopy(newsk)) + newsk->sk_user_data = NULL; - /* - * Increment the counter in the same struct proto as the master - * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that - * is the same as sk->sk_prot->socks, as this field was copied - * with memcpy). - * - * This _changes_ the previous behaviour, where - * tcp_create_openreq_child always was incrementing the - * equivalent to tcp_prot->socks (inet_sock_nr), so this have - * to be taken into account in all callers. -acme - */ - sk_refcnt_debug_inc(newsk); - sk_set_socket(newsk, NULL); - sk_tx_queue_clear(newsk); - RCU_INIT_POINTER(newsk->sk_wq, NULL); + newsk->sk_err = 0; + newsk->sk_err_soft = 0; + newsk->sk_priority = 0; + newsk->sk_incoming_cpu = raw_smp_processor_id(); + if (likely(newsk->sk_net_refcnt)) + sock_inuse_add(sock_net(newsk), 1); - if (newsk->sk_prot->sockets_allocated) - sk_sockets_allocated_inc(newsk); + /* Before updating sk_refcnt, we must commit prior changes to memory + * (Documentation/RCU/rculist_nulls.rst for details) + */ + smp_wmb(); + refcount_set(&newsk->sk_refcnt, 2); - if (sock_needs_netstamp(sk) && - newsk->sk_flags & SK_FLAGS_TIMESTAMP) - net_enable_timestamp(); - } + /* Increment the counter in the same struct proto as the master + * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that + * is the same as sk->sk_prot->socks, as this field was copied + * with memcpy). + * + * This _changes_ the previous behaviour, where + * tcp_create_openreq_child always was incrementing the + * equivalent to tcp_prot->socks (inet_sock_nr), so this have + * to be taken into account in all callers. -acme + */ + sk_refcnt_debug_inc(newsk); + sk_set_socket(newsk, NULL); + sk_tx_queue_clear(newsk); + RCU_INIT_POINTER(newsk->sk_wq, NULL); + + if (newsk->sk_prot->sockets_allocated) + sk_sockets_allocated_inc(newsk); + + if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP) + net_enable_timestamp(); out: return newsk; } -- GitLab From dad3a72f5eec966dbd714c1a657894ffd2a6f471 Mon Sep 17 00:00:00 2001 From: Giacinto Cifelli Date: Tue, 26 Jan 2021 05:42:45 +0100 Subject: [PATCH 1639/2012] net: usb: cdc_ether: added support for Thales Cinterion PLSx3 modem family. lsusb -v for this device: Bus 003 Device 007: ID 1e2d:0069 Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 239 Miscellaneous Device bDeviceSubClass 2 ? bDeviceProtocol 1 Interface Association bMaxPacketSize0 64 idVendor 0x1e2d idProduct 0x0069 bcdDevice 0.00 iManufacturer 4 Cinterion Wireless Modules iProduct 3 PLSx3 iSerial 5 fa3c1419 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 352 bNumInterfaces 10 bConfigurationValue 1 iConfiguration 2 Cinterion Configuration bmAttributes 0xe0 Self Powered Remote Wakeup MaxPower 500mA Interface Association: bLength 8 bDescriptorType 11 bFirstInterface 0 bInterfaceCount 2 bFunctionClass 2 Communications bFunctionSubClass 2 Abstract (modem) bFunctionProtocol 1 AT-commands (v.25ter) iFunction 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 1 AT-commands (v.25ter) iInterface 0 CDC Header: bcdCDC 1.10 CDC ACM: bmCapabilities 0x02 line coding and serial state CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 1 CDC Union: bMasterInterface 0 bSlaveInterface 1 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x82 EP 2 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x01 EP 1 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Association: bLength 8 bDescriptorType 11 bFirstInterface 2 bInterfaceCount 2 bFunctionClass 2 Communications bFunctionSubClass 2 Abstract (modem) bFunctionProtocol 1 AT-commands (v.25ter) iFunction 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 2 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 1 AT-commands (v.25ter) iInterface 0 CDC Header: bcdCDC 1.10 CDC ACM: bmCapabilities 0x02 line coding and serial state CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 3 CDC Union: bMasterInterface 2 bSlaveInterface 3 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x83 EP 3 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 3 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x84 EP 4 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Association: bLength 8 bDescriptorType 11 bFirstInterface 4 bInterfaceCount 2 bFunctionClass 2 Communications bFunctionSubClass 2 Abstract (modem) bFunctionProtocol 1 AT-commands (v.25ter) iFunction 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 4 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 1 AT-commands (v.25ter) iInterface 0 CDC Header: bcdCDC 1.10 CDC ACM: bmCapabilities 0x02 line coding and serial state CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 5 CDC Union: bMasterInterface 4 bSlaveInterface 5 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x85 EP 5 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 5 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x86 EP 6 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x03 EP 3 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Association: bLength 8 bDescriptorType 11 bFirstInterface 6 bInterfaceCount 2 bFunctionClass 2 Communications bFunctionSubClass 2 Abstract (modem) bFunctionProtocol 1 AT-commands (v.25ter) iFunction 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 6 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 1 AT-commands (v.25ter) iInterface 0 CDC Header: bcdCDC 1.10 CDC ACM: bmCapabilities 0x02 line coding and serial state CDC Call Management: bmCapabilities 0x03 call management use DataInterface bDataInterface 7 CDC Union: bMasterInterface 6 bSlaveInterface 7 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x87 EP 7 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 7 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x88 EP 8 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x04 EP 4 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Interface Association: bLength 8 bDescriptorType 11 bFirstInterface 8 bInterfaceCount 2 bFunctionClass 2 Communications bFunctionSubClass 0 bFunctionProtocol 0 iFunction 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 8 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 6 Ethernet Networking bInterfaceProtocol 0 iInterface 0 CDC Header: bcdCDC 1.10 CDC Ethernet: iMacAddress 1 00A0C6C14190 bmEthernetStatistics 0x00000000 wMaxSegmentSize 16384 wNumberMCFilters 0x0001 bNumberPowerFilters 0 CDC Union: bMasterInterface 8 bSlaveInterface 9 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x89 EP 9 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 5 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 9 bAlternateSetting 0 bNumEndpoints 0 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 9 bAlternateSetting 1 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x8a EP 10 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x05 EP 5 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0200 1x 512 bytes bInterval 0 Device Qualifier (for other device speed): bLength 10 bDescriptorType 6 bcdUSB 2.00 bDeviceClass 239 Miscellaneous Device bDeviceSubClass 2 ? bDeviceProtocol 1 Interface Association bMaxPacketSize0 64 bNumConfigurations 1 Device Status: 0x0000 (Bus Powered) Signed-off-by: Giacinto Cifelli Link: https://lore.kernel.org/r/20210126044245.8455-1-gciofono@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/cdc_ether.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 6aaa0675c28a3..a9b5510286595 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -968,6 +968,12 @@ static const struct usb_device_id products[] = { USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, +}, { + /* Cinterion PLS83/PLS63 modem by GEMALTO/THALES */ + USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0069, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_info, }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), -- GitLab From 88af9bd4efbd4d171eea537486493c9601c9a486 Mon Sep 17 00:00:00 2001 From: "Wong, Vee Khee" Date: Tue, 26 Jan 2021 16:58:32 +0800 Subject: [PATCH 1640/2012] stmmac: intel: Add ADL-S 1Gbps PCI IDs Added PCI IDs for both Ethernet TSN Controllers on the ADL-S. Also, skip SerDes programming sequences as these are being carried out at the BIOS level for ADL-S. Signed-off-by: Wong, Vee Khee Link: https://lore.kernel.org/r/20210126085832.3814-1-vee.khee.wong@intel.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-intel.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 9a6a519426a08..9c272a2411369 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -457,6 +457,21 @@ static struct stmmac_pci_info tgl_sgmii1g_info = { .setup = tgl_sgmii_data, }; +static int adls_sgmii_data(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat) +{ + plat->bus_id = 1; + plat->phy_interface = PHY_INTERFACE_MODE_SGMII; + + /* SerDes power up and power down are done in BIOS for ADL */ + + return tgl_common_data(pdev, plat); +} + +static struct stmmac_pci_info adls_sgmii1g_info = { + .setup = adls_sgmii_data, +}; + static const struct stmmac_pci_func_data galileo_stmmac_func_data[] = { { .func = 6, @@ -724,6 +739,8 @@ static SIMPLE_DEV_PM_OPS(intel_eth_pm_ops, intel_eth_pci_suspend, #define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_0_ID 0x43ac #define PCI_DEVICE_ID_INTEL_TGLH_SGMII1G_1_ID 0x43a2 #define PCI_DEVICE_ID_INTEL_TGL_SGMII1G_ID 0xa0ac +#define PCI_DEVICE_ID_INTEL_ADLS_SGMII1G_0_ID 0x7aac +#define PCI_DEVICE_ID_INTEL_ADLS_SGMII1G_1_ID 0x7aad static const struct pci_device_id intel_eth_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, QUARK_ID, &quark_info) }, @@ -739,6 +756,8 @@ static const struct pci_device_id intel_eth_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, TGL_SGMII1G_ID, &tgl_sgmii1g_info) }, { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_0_ID, &tgl_sgmii1g_info) }, { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1_ID, &tgl_sgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_0_ID, &adls_sgmii1g_info) }, + { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_1_ID, &adls_sgmii1g_info) }, {} }; MODULE_DEVICE_TABLE(pci, intel_eth_pci_id_table); -- GitLab From 7cfc4486e7ea25bd405df162d9c131ee5d4c6c93 Mon Sep 17 00:00:00 2001 From: Voon Weifeng Date: Tue, 26 Jan 2021 18:08:44 +0800 Subject: [PATCH 1641/2012] stmmac: intel: Configure EHL PSE0 GbE and PSE1 GbE to 32 bits DMA addressing Fix an issue where dump stack is printed and Reset Adapter occurs when PSE0 GbE or/and PSE1 GbE is/are enabled. EHL PSE0 GbE and PSE1 GbE use 32 bits DMA addressing whereas EHL PCH GbE uses 64 bits DMA addressing. [ 25.535095] ------------[ cut here ]------------ [ 25.540276] NETDEV WATCHDOG: enp0s29f2 (intel-eth-pci): transmit queue 2 timed out [ 25.548749] WARNING: CPU: 2 PID: 0 at net/sched/sch_generic.c:443 dev_watchdog+0x259/0x260 [ 25.558004] Modules linked in: 8021q bnep bluetooth ecryptfs snd_hda_codec_hdmi intel_gpy marvell intel_ishtp_loader intel_ishtp_hid iTCO_wdt mei_hdcp iTCO_vendor_support x86_pkg_temp_thermal kvm_intel dwmac_intel stmmac kvm igb pcs_xpcs irqbypass phylink snd_hda_intel intel_rapl_msr pcspkr dca snd_hda_codec i915 i2c_i801 i2c_smbus libphy intel_ish_ipc snd_hda_core mei_me intel_ishtp mei spi_dw_pci 8250_lpss spi_dw thermal dw_dmac_core parport_pc tpm_crb tpm_tis parport tpm_tis_core tpm intel_pmc_core sch_fq_codel uhid fuse configfs snd_sof_pci snd_sof_intel_byt snd_sof_intel_ipc snd_sof_intel_hda_common snd_sof_xtensa_dsp snd_sof snd_soc_acpi_intel_match snd_soc_acpi snd_intel_dspcfg ledtrig_audio snd_soc_core snd_compress ac97_bus snd_pcm snd_timer snd soundcore [ 25.633795] CPU: 2 PID: 0 Comm: swapper/2 Tainted: G U 5.11.0-rc4-intel-lts-MISMAIL5+ #5 [ 25.644306] Hardware name: Intel Corporation Elkhart Lake Embedded Platform/ElkhartLake LPDDR4x T4 RVP1, BIOS EHLSFWI1.R00.2434.A00.2010231402 10/23/2020 [ 25.659674] RIP: 0010:dev_watchdog+0x259/0x260 [ 25.664650] Code: e8 3b 6b 60 ff eb 98 4c 89 ef c6 05 ec e7 bf 00 01 e8 fb e5 fa ff 89 d9 4c 89 ee 48 c7 c7 78 31 d2 9e 48 89 c2 e8 79 1b 18 00 <0f> 0b e9 77 ff ff ff 0f 1f 44 00 00 48 c7 47 08 00 00 00 00 48 c7 [ 25.685647] RSP: 0018:ffffb7ca80160eb8 EFLAGS: 00010286 [ 25.691498] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000000103 [ 25.699483] RDX: 0000000080000103 RSI: 00000000000000f6 RDI: 00000000ffffffff [ 25.707465] RBP: ffff985709ce0440 R08: 0000000000000000 R09: c0000000ffffefff [ 25.715455] R10: ffffb7ca80160cf0 R11: ffffb7ca80160ce8 R12: ffff985709ce039c [ 25.723438] R13: ffff985709ce0000 R14: 0000000000000008 R15: ffff9857068af940 [ 25.731425] FS: 0000000000000000(0000) GS:ffff985864300000(0000) knlGS:0000000000000000 [ 25.740481] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 25.746913] CR2: 00005567f8bb76b8 CR3: 00000001f8e0a000 CR4: 0000000000350ee0 [ 25.754900] Call Trace: [ 25.757631] [ 25.759891] ? qdisc_put_unlocked+0x30/0x30 [ 25.764565] ? qdisc_put_unlocked+0x30/0x30 [ 25.769245] call_timer_fn+0x2e/0x140 [ 25.773346] run_timer_softirq+0x1f3/0x430 [ 25.777932] ? __hrtimer_run_queues+0x12c/0x2c0 [ 25.783005] ? ktime_get+0x3e/0xa0 [ 25.786812] __do_softirq+0xa6/0x2ef [ 25.790816] asm_call_irq_on_stack+0xf/0x20 [ 25.795501] [ 25.797852] do_softirq_own_stack+0x5d/0x80 [ 25.802538] irq_exit_rcu+0x94/0xb0 [ 25.806475] sysvec_apic_timer_interrupt+0x42/0xc0 [ 25.811836] asm_sysvec_apic_timer_interrupt+0x12/0x20 [ 25.817586] RIP: 0010:cpuidle_enter_state+0xd9/0x370 [ 25.823142] Code: 85 c0 0f 8f 0a 02 00 00 31 ff e8 22 d5 7e ff 45 84 ff 74 12 9c 58 f6 c4 02 0f 85 47 02 00 00 31 ff e8 7b a0 84 ff fb 45 85 f6 <0f> 88 ab 00 00 00 49 63 ce 48 2b 2c 24 48 89 c8 48 6b d1 68 48 c1 [ 25.844140] RSP: 0018:ffffb7ca800f7e80 EFLAGS: 00000206 [ 25.849996] RAX: ffff985864300000 RBX: 0000000000000003 RCX: 000000000000001f [ 25.857975] RDX: 00000005f2028ea8 RSI: ffffffff9ec5907f RDI: ffffffff9ec62a5d [ 25.865961] RBP: 00000005f2028ea8 R08: 0000000000000000 R09: 0000000000029d00 [ 25.873947] R10: 000000137b0e0508 R11: ffff9858643294e4 R12: ffff9858643336d0 [ 25.881935] R13: ffffffff9ef74b00 R14: 0000000000000003 R15: 0000000000000000 [ 25.889918] cpuidle_enter+0x29/0x40 [ 25.893922] do_idle+0x24a/0x290 [ 25.897536] cpu_startup_entry+0x19/0x20 [ 25.901930] start_secondary+0x128/0x160 [ 25.906326] secondary_startup_64_no_verify+0xb0/0xbb [ 25.911983] ---[ end trace b4c0c8195d0ba61f ]--- [ 25.917193] intel-eth-pci 0000:00:1d.2 enp0s29f2: Reset adapter. Fixes: 67c08ac4140a ("net: stmmac: add EHL PSE0 & PSE1 1Gbps PCI info and PCI ID") Signed-off-by: Voon Weifeng Co-developed-by: Mohammad Athari Bin Ismail Signed-off-by: Mohammad Athari Bin Ismail Link: https://lore.kernel.org/r/20210126100844.30326-1-mohammad.athari.ismail@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 9a6a519426a08..103d2448e9e0d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -375,6 +375,7 @@ static int ehl_pse0_common_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 2; + plat->addr64 = 32; return ehl_common_data(pdev, plat); } @@ -406,6 +407,7 @@ static int ehl_pse1_common_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { plat->bus_id = 3; + plat->addr64 = 32; return ehl_common_data(pdev, plat); } -- GitLab From 11df27f7fdf02cc2bb354358ad482e1fdd690589 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 28 Jan 2021 16:48:19 +0200 Subject: [PATCH 1642/2012] selftests: forwarding: Specify interface when invoking mausezahn Specify the interface through which packets should be transmitted so that the test will pass regardless of the libnet version against which mausezahn is linked. Fixes: cab14d1087d9 ("selftests: Add version of router_multipath.sh using nexthop objects") Fixes: 3d578d879517 ("selftests: forwarding: Test IPv4 weighted nexthops") Signed-off-by: Danielle Ratson Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/router_mpath_nh.sh | 2 +- tools/testing/selftests/net/forwarding/router_multipath.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index 388e4492b81be..76efb1f8375e3 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -203,7 +203,7 @@ multipath4_test() t0_rp12=$(link_stats_tx_packets_get $rp12) t0_rp13=$(link_stats_tx_packets_get $rp13) - ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ -d 1msec -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index 79a2099279621..464821c587a5e 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -178,7 +178,7 @@ multipath4_test() t0_rp12=$(link_stats_tx_packets_get $rp12) t0_rp13=$(link_stats_tx_packets_get $rp13) - ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ -d 1msec -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) -- GitLab From b6f6881aaf2344bf35a4221810737abe5fd210af Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 28 Jan 2021 16:48:20 +0200 Subject: [PATCH 1643/2012] mlxsw: spectrum_span: Do not overwrite policer configuration The purpose of the delayed work in the SPAN module is to potentially update the destination port and various encapsulation parameters of SPAN agents that point to a VLAN device or a GRE tap. The destination port can change following the insertion of a new route, for example. SPAN agents that point to a physical port or the CPU port are static and never change throughout the lifetime of the SPAN agent. Therefore, skip over them in the delayed work. This fixes an issue where the delayed work overwrites the policer that was set on a SPAN agent pointing to the CPU. Modifying the delayed work to inherit the original policer configuration is error-prone, as the same will be needed for any new parameter. Fixes: 4039504e6a0c ("mlxsw: spectrum_span: Allow setting policer on a SPAN agent") Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 6 ++++++ drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index c6c5826aba41e..1892cea05ee7c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -157,6 +157,7 @@ mlxsw_sp1_span_entry_cpu_deconfigure(struct mlxsw_sp_span_entry *span_entry) static const struct mlxsw_sp_span_entry_ops mlxsw_sp1_span_entry_ops_cpu = { + .is_static = true, .can_handle = mlxsw_sp1_span_cpu_can_handle, .parms_set = mlxsw_sp1_span_entry_cpu_parms, .configure = mlxsw_sp1_span_entry_cpu_configure, @@ -214,6 +215,7 @@ mlxsw_sp_span_entry_phys_deconfigure(struct mlxsw_sp_span_entry *span_entry) static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = { + .is_static = true, .can_handle = mlxsw_sp_port_dev_check, .parms_set = mlxsw_sp_span_entry_phys_parms, .configure = mlxsw_sp_span_entry_phys_configure, @@ -721,6 +723,7 @@ mlxsw_sp2_span_entry_cpu_deconfigure(struct mlxsw_sp_span_entry *span_entry) static const struct mlxsw_sp_span_entry_ops mlxsw_sp2_span_entry_ops_cpu = { + .is_static = true, .can_handle = mlxsw_sp2_span_cpu_can_handle, .parms_set = mlxsw_sp2_span_entry_cpu_parms, .configure = mlxsw_sp2_span_entry_cpu_configure, @@ -1036,6 +1039,9 @@ static void mlxsw_sp_span_respin_work(struct work_struct *work) if (!refcount_read(&curr->ref_count)) continue; + if (curr->ops->is_static) + continue; + err = curr->ops->parms_set(mlxsw_sp, curr->to_dev, &sparms); if (err) continue; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h index d907718bc8c58..aa1cd409c0e2e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h @@ -60,6 +60,7 @@ struct mlxsw_sp_span_entry { }; struct mlxsw_sp_span_entry_ops { + bool is_static; bool (*can_handle)(const struct net_device *to_dev); int (*parms_set)(struct mlxsw_sp *mlxsw_sp, const struct net_device *to_dev, -- GitLab From b8323f7288abd71794cd7b11a4c0a38b8637c8b5 Mon Sep 17 00:00:00 2001 From: Takeshi Misawa Date: Thu, 28 Jan 2021 10:48:36 +0000 Subject: [PATCH 1644/2012] rxrpc: Fix memory leak in rxrpc_lookup_local Commit 9ebeddef58c4 ("rxrpc: rxrpc_peer needs to hold a ref on the rxrpc_local record") Then release ref in __rxrpc_put_peer and rxrpc_put_peer_locked. struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) - peer->local = local; + peer->local = rxrpc_get_local(local); rxrpc_discard_prealloc also need ref release in discarding. syzbot report: BUG: memory leak unreferenced object 0xffff8881080ddc00 (size 256): comm "syz-executor339", pid 8462, jiffies 4294942238 (age 12.350s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 0a 00 00 00 00 c0 00 08 81 88 ff ff ................ backtrace: [<000000002b6e495f>] kmalloc include/linux/slab.h:552 [inline] [<000000002b6e495f>] kzalloc include/linux/slab.h:682 [inline] [<000000002b6e495f>] rxrpc_alloc_local net/rxrpc/local_object.c:79 [inline] [<000000002b6e495f>] rxrpc_lookup_local+0x1c1/0x760 net/rxrpc/local_object.c:244 [<000000006b43a77b>] rxrpc_bind+0x174/0x240 net/rxrpc/af_rxrpc.c:149 [<00000000fd447a55>] afs_open_socket+0xdb/0x200 fs/afs/rxrpc.c:64 [<000000007fd8867c>] afs_net_init+0x2b4/0x340 fs/afs/main.c:126 [<0000000063d80ec1>] ops_init+0x4e/0x190 net/core/net_namespace.c:152 [<00000000073c5efa>] setup_net+0xde/0x2d0 net/core/net_namespace.c:342 [<00000000a6744d5b>] copy_net_ns+0x19f/0x3e0 net/core/net_namespace.c:483 [<0000000017d3aec3>] create_new_namespaces+0x199/0x4f0 kernel/nsproxy.c:110 [<00000000186271ef>] unshare_nsproxy_namespaces+0x9b/0x120 kernel/nsproxy.c:226 [<000000002de7bac4>] ksys_unshare+0x2fe/0x5c0 kernel/fork.c:2957 [<00000000349b12ba>] __do_sys_unshare kernel/fork.c:3025 [inline] [<00000000349b12ba>] __se_sys_unshare kernel/fork.c:3023 [inline] [<00000000349b12ba>] __x64_sys_unshare+0x12/0x20 kernel/fork.c:3023 [<000000006d178ef7>] do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 [<00000000637076d4>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 9ebeddef58c4 ("rxrpc: rxrpc_peer needs to hold a ref on the rxrpc_local record") Signed-off-by: Takeshi Misawa Reported-and-tested-by: syzbot+305326672fed51b205f7@syzkaller.appspotmail.com Signed-off-by: David Howells Link: https://lore.kernel.org/r/161183091692.3506637.3206605651502458810.stgit@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/call_accept.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 382add72c66f6..1ae90fb979362 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -197,6 +197,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) tail = b->peer_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_peer *peer = b->peer_backlog[tail]; + rxrpc_put_local(peer->local); kfree(peer); tail = (tail + 1) & (size - 1); } -- GitLab From 426c6cbc409cbda9ab1a9dbf15d3c2ef947eb8c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 25 Jan 2021 16:02:27 +0100 Subject: [PATCH 1645/2012] net: sfp: add workaround for Realtek RTL8672 and RTL9601C chips MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The workaround for VSOL V2801F brand based GPON SFP modules added in commit 0d035bed2a4a ("net: sfp: VSOL V2801F / CarlitoxxPro CPGOS03-0490 v2.0 workaround") works only for IDs added explicitly to the list. Since there are rebranded modules where OEM vendors put different strings into the vendor name field, we cannot base workaround on IDs only. Moreover the issue which the above mentioned commit tried to work around is generic not only to VSOL based modules, but rather to all GPON modules based on Realtek RTL8672 and RTL9601C chips. These include at least the following GPON modules: * V-SOL V2801F * C-Data FD511GX-RM0 * OPTON GP801R * BAUDCOM BD-1234-SFM * CPGOS03-0490 v2.0 * Ubiquiti U-Fiber Instant * EXOT EGS1 These Realtek chips have broken EEPROM emulator which for N-byte read operation returns just the first byte of EEPROM data, followed by N-1 zeros. Introduce a new function, sfp_id_needs_byte_io(), which detects SFP modules with broken EEPROM emulator based on N-1 zeros and switch to 1 byte EEPROM reading operation. Function sfp_i2c_read() now always uses single byte reading when it is required and when function sfp_hwmon_probe() detects single byte access, it disables registration of hwmon device, because in this case we cannot reliably and atomically read 2 bytes as is required by the standard for retrieving values from diagnostic area. (These Realtek chips are broken in a way that violates SFP standards for diagnostic interface. Kernel in this case simply cannot do anything less of skipping registration of the hwmon interface.) This patch fixes reading of EEPROM content from SFP modules based on Realtek RTL8672 and RTL9601C chips. Diagnostic interface of EEPROM stays broken and cannot be fixed. Fixes: 0d035bed2a4a ("net: sfp: VSOL V2801F / CarlitoxxPro CPGOS03-0490 v2.0 workaround") Co-developed-by: Russell King Signed-off-by: Russell King Signed-off-by: Pali Rohár Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp.c | 100 ++++++++++++++++++++++++++++-------------- 1 file changed, 67 insertions(+), 33 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index b2a5ed6915fa3..ca68839c27ce2 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -340,19 +340,11 @@ static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf, size_t len) { struct i2c_msg msgs[2]; - size_t block_size; + u8 bus_addr = a2 ? 0x51 : 0x50; + size_t block_size = sfp->i2c_block_size; size_t this_len; - u8 bus_addr; int ret; - if (a2) { - block_size = 16; - bus_addr = 0x51; - } else { - block_size = sfp->i2c_block_size; - bus_addr = 0x50; - } - msgs[0].addr = bus_addr; msgs[0].flags = 0; msgs[0].len = 1; @@ -1286,6 +1278,20 @@ static void sfp_hwmon_probe(struct work_struct *work) struct sfp *sfp = container_of(work, struct sfp, hwmon_probe.work); int err, i; + /* hwmon interface needs to access 16bit registers in atomic way to + * guarantee coherency of the diagnostic monitoring data. If it is not + * possible to guarantee coherency because EEPROM is broken in such way + * that does not support atomic 16bit read operation then we have to + * skip registration of hwmon device. + */ + if (sfp->i2c_block_size < 2) { + dev_info(sfp->dev, + "skipping hwmon device registration due to broken EEPROM\n"); + dev_info(sfp->dev, + "diagnostic EEPROM area cannot be read atomically to guarantee data coherency\n"); + return; + } + err = sfp_read(sfp, true, 0, &sfp->diag, sizeof(sfp->diag)); if (err < 0) { if (sfp->hwmon_tries--) { @@ -1702,26 +1708,30 @@ static int sfp_sm_mod_hpower(struct sfp *sfp, bool enable) return 0; } -/* Some modules (Nokia 3FE46541AA) lock up if byte 0x51 is read as a - * single read. Switch back to reading 16 byte blocks unless we have - * a CarlitoxxPro module (rebranded VSOL V2801F). Even more annoyingly, - * some VSOL V2801F have the vendor name changed to OEM. +/* GPON modules based on Realtek RTL8672 and RTL9601C chips (e.g. V-SOL + * V2801F, CarlitoxxPro CPGOS03-0490, Ubiquiti U-Fiber Instant, ...) do + * not support multibyte reads from the EEPROM. Each multi-byte read + * operation returns just one byte of EEPROM followed by zeros. There is + * no way to identify which modules are using Realtek RTL8672 and RTL9601C + * chips. Moreover every OEM of V-SOL V2801F module puts its own vendor + * name and vendor id into EEPROM, so there is even no way to detect if + * module is V-SOL V2801F. Therefore check for those zeros in the read + * data and then based on check switch to reading EEPROM to one byte + * at a time. */ -static int sfp_quirk_i2c_block_size(const struct sfp_eeprom_base *base) +static bool sfp_id_needs_byte_io(struct sfp *sfp, void *buf, size_t len) { - if (!memcmp(base->vendor_name, "VSOL ", 16)) - return 1; - if (!memcmp(base->vendor_name, "OEM ", 16) && - !memcmp(base->vendor_pn, "V2801F ", 16)) - return 1; + size_t i, block_size = sfp->i2c_block_size; - /* Some modules can't cope with long reads */ - return 16; -} + /* Already using byte IO */ + if (block_size == 1) + return false; -static void sfp_quirks_base(struct sfp *sfp, const struct sfp_eeprom_base *base) -{ - sfp->i2c_block_size = sfp_quirk_i2c_block_size(base); + for (i = 1; i < len; i += block_size) { + if (memchr_inv(buf + i, '\0', min(block_size - 1, len - i))) + return false; + } + return true; } static int sfp_cotsworks_fixup_check(struct sfp *sfp, struct sfp_eeprom_id *id) @@ -1765,11 +1775,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) u8 check; int ret; - /* Some modules (CarlitoxxPro CPGOS03-0490) do not support multibyte - * reads from the EEPROM, so start by reading the base identifying - * information one byte at a time. + /* Some SFP modules and also some Linux I2C drivers do not like reads + * longer than 16 bytes, so read the EEPROM in chunks of 16 bytes at + * a time. */ - sfp->i2c_block_size = 1; + sfp->i2c_block_size = 16; ret = sfp_read(sfp, false, 0, &id.base, sizeof(id.base)); if (ret < 0) { @@ -1783,6 +1793,33 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) return -EAGAIN; } + /* Some SFP modules (e.g. Nokia 3FE46541AA) lock up if read from + * address 0x51 is just one byte at a time. Also SFF-8472 requires + * that EEPROM supports atomic 16bit read operation for diagnostic + * fields, so do not switch to one byte reading at a time unless it + * is really required and we have no other option. + */ + if (sfp_id_needs_byte_io(sfp, &id.base, sizeof(id.base))) { + dev_info(sfp->dev, + "Detected broken RTL8672/RTL9601C emulated EEPROM\n"); + dev_info(sfp->dev, + "Switching to reading EEPROM to one byte at a time\n"); + sfp->i2c_block_size = 1; + + ret = sfp_read(sfp, false, 0, &id.base, sizeof(id.base)); + if (ret < 0) { + if (report) + dev_err(sfp->dev, "failed to read EEPROM: %d\n", + ret); + return -EAGAIN; + } + + if (ret != sizeof(id.base)) { + dev_err(sfp->dev, "EEPROM short read: %d\n", ret); + return -EAGAIN; + } + } + /* Cotsworks do not seem to update the checksums when they * do the final programming with the final module part number, * serial number and date code. @@ -1817,9 +1854,6 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) } } - /* Apply any early module-specific quirks */ - sfp_quirks_base(sfp, &id.base); - ret = sfp_read(sfp, false, SFP_CC_BASE + 1, &id.ext, sizeof(id.ext)); if (ret < 0) { if (report) -- GitLab From f0b4f847673299577c29b71d3f3acd3c313d81b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Mon, 25 Jan 2021 16:02:28 +0100 Subject: [PATCH 1646/2012] net: sfp: add mode quirk for GPON module Ubiquiti U-Fiber Instant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Ubiquiti U-Fiber Instant SFP GPON module has nonsensical information stored in its EEPROM. It claims to support all transceiver types including 10G Ethernet. Clear all claimed modes and set only 1000baseX_Full, which is the only one supported. This module has also phys_id set to SFF, and the SFP subsystem currently does not allow to use SFP modules detected as SFFs. Add exception for this module so it can be detected as supported. This change finally allows to detect and use SFP GPON module Ubiquiti U-Fiber Instant on Linux system. EEPROM content of this SFP module is (where XX is serial number): 00: 02 04 0b ff ff ff ff ff ff ff ff 03 0c 00 14 c8 ???........??.?? 10: 00 00 00 00 55 42 4e 54 20 20 20 20 20 20 20 20 ....UBNT 20: 20 20 20 20 00 18 e8 29 55 46 2d 49 4e 53 54 41 .??)UF-INSTA 30: 4e 54 20 20 20 20 20 20 34 20 20 20 05 1e 00 36 NT 4 ??.6 40: 00 06 00 00 55 42 4e 54 XX XX XX XX XX XX XX XX .?..UBNTXXXXXXXX 50: 20 20 20 20 31 34 30 31 32 33 20 20 60 80 02 41 140123 `??A Signed-off-by: Pali Rohár Signed-off-by: Jakub Kicinski --- drivers/net/phy/sfp-bus.c | 15 +++++++++++++++ drivers/net/phy/sfp.c | 17 +++++++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 3c67ad9951ab2..3cfd773ae5f44 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -44,6 +44,17 @@ static void sfp_quirk_2500basex(const struct sfp_eeprom_id *id, phylink_set(modes, 2500baseX_Full); } +static void sfp_quirk_ubnt_uf_instant(const struct sfp_eeprom_id *id, + unsigned long *modes) +{ + /* Ubiquiti U-Fiber Instant module claims that support all transceiver + * types including 10G Ethernet which is not truth. So clear all claimed + * modes and set only one mode which module supports: 1000baseX_Full. + */ + phylink_zero(modes); + phylink_set(modes, 1000baseX_Full); +} + static const struct sfp_quirk sfp_quirks[] = { { // Alcatel Lucent G-010S-P can operate at 2500base-X, but @@ -63,6 +74,10 @@ static const struct sfp_quirk sfp_quirks[] = { .vendor = "HUAWEI", .part = "MA5671A", .modes = sfp_quirk_2500basex, + }, { + .vendor = "UBNT", + .part = "UF-INSTANT", + .modes = sfp_quirk_ubnt_uf_instant, }, }; diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index ca68839c27ce2..7998acc689b74 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -277,8 +277,21 @@ static const struct sff_data sff_data = { static bool sfp_module_supported(const struct sfp_eeprom_id *id) { - return id->base.phys_id == SFF8024_ID_SFP && - id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP; + if (id->base.phys_id == SFF8024_ID_SFP && + id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP) + return true; + + /* SFP GPON module Ubiquiti U-Fiber Instant has in its EEPROM stored + * phys id SFF instead of SFP. Therefore mark this module explicitly + * as supported based on vendor name and pn match. + */ + if (id->base.phys_id == SFF8024_ID_SFF_8472 && + id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP && + !memcmp(id->base.vendor_name, "UBNT ", 16) && + !memcmp(id->base.vendor_pn, "UF-INSTANT ", 16)) + return true; + + return false; } static const struct sff_data sfp_data = { -- GitLab From bd2f0b43c1c864fa653342c5c074bfcd29f10934 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 28 Jan 2021 16:43:03 -0600 Subject: [PATCH 1647/2012] cifs: returning mount parm processing errors correctly During additional testing of the updated cifs.ko with the new mount API support, we found a few additional cases where we were logging errors, but not returning them to the user. For example: a) invalid security mechanisms b) invalid cache options c) unsupported rdma d) invalid smb dialect requested Fixes: 24e0a1eff9e2 ("cifs: switch to new mount api") Acked-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/fs_context.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 818c413db82d2..27354417e9889 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -533,7 +533,7 @@ static int smb3_fs_context_validate(struct fs_context *fc) if (ctx->rdma && ctx->vals->protocol_id < SMB30_PROT_ID) { cifs_dbg(VFS, "SMB Direct requires Version >=3.0\n"); - return -1; + return -EOPNOTSUPP; } #ifndef CONFIG_KEYS @@ -556,7 +556,7 @@ static int smb3_fs_context_validate(struct fs_context *fc) /* make sure UNC has a share name */ if (strlen(ctx->UNC) < 3 || !strchr(ctx->UNC + 3, '\\')) { cifs_dbg(VFS, "Malformed UNC. Unable to find share name.\n"); - return -1; + return -ENOENT; } if (!ctx->got_ip) { @@ -570,7 +570,7 @@ static int smb3_fs_context_validate(struct fs_context *fc) if (!cifs_convert_address((struct sockaddr *)&ctx->dstaddr, &ctx->UNC[2], len)) { pr_err("Unable to determine destination address\n"); - return -1; + return -EHOSTUNREACH; } } @@ -1265,7 +1265,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, return 0; cifs_parse_mount_err: - return 1; + return -EINVAL; } int smb3_init_fs_context(struct fs_context *fc) -- GitLab From 3a7efd1ad269ccaf9c1423364d97c9661ba6dafa Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 28 Jan 2021 23:23:42 +0000 Subject: [PATCH 1648/2012] io_uring: reinforce cancel on flush during exit What 84965ff8a84f0 ("io_uring: if we see flush on exit, cancel related tasks") really wants is to cancel all relevant REQ_F_INFLIGHT requests reliably. That can be achieved by io_uring_cancel_files(), but we'll miss it calling io_uring_cancel_task_requests(files=NULL) from io_uring_flush(), because it will go through __io_uring_cancel_task_requests(). Just always call io_uring_cancel_files() during cancel, it's good enough for now. Cc: stable@vger.kernel.org # 5.9+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 12bf7180c0f10..38c6cbe1ab387 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8976,10 +8976,9 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, io_cancel_defer_files(ctx, task, files); io_cqring_overflow_flush(ctx, true, task, files); + io_uring_cancel_files(ctx, task, files); if (!files) __io_uring_cancel_task_requests(ctx, task); - else - io_uring_cancel_files(ctx, task, files); if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { atomic_dec(&task->io_uring->in_idle); -- GitLab From fd55b61ebd31449549e14c33574825d64de2b29b Mon Sep 17 00:00:00 2001 From: Bastian Beranek Date: Thu, 21 Jan 2021 15:27:36 +0100 Subject: [PATCH 1649/2012] drm/nouveau/dispnv50: Restore pushing of all data. Commit f844eb485eb056ad3b67e49f95cbc6c685a73db4 introduced a regression for NV50, which lead to visual artifacts, tearing and eventual crashes. In the changes of f844eb485eb056ad3b67e49f95cbc6c685a73db4 only the first line was correctly translated to the new NVIDIA header macros: - PUSH_NVSQ(push, NV827C, 0x0110, 0, - 0x0114, 0); + PUSH_MTHD(push, NV827C, SET_PROCESSING, + NVDEF(NV827C, SET_PROCESSING, USE_GAIN_OFS, DISABLE)); The lower part ("0x0114, 0") was probably omitted by accident. This patch restores the push of the missing data and fixes the regression. Signed-off-by: Bastian Beranek Fixes: f844eb485eb05 ("drm/nouveau/kms/nv50-: use NVIDIA's headers for wndw image_set()") Link: https://gitlab.freedesktop.org/drm/nouveau/-/issues/14 Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/base507c.c | 6 +++++- drivers/gpu/drm/nouveau/dispnv50/base827c.c | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/base507c.c b/drivers/gpu/drm/nouveau/dispnv50/base507c.c index 302d4e6fc52f1..788db043a3429 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/base507c.c +++ b/drivers/gpu/drm/nouveau/dispnv50/base507c.c @@ -88,7 +88,11 @@ base507c_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) NVVAL(NV507C, SET_CONVERSION, OFS, 0x64)); } else { PUSH_MTHD(push, NV507C, SET_PROCESSING, - NVDEF(NV507C, SET_PROCESSING, USE_GAIN_OFS, DISABLE)); + NVDEF(NV507C, SET_PROCESSING, USE_GAIN_OFS, DISABLE), + + SET_CONVERSION, + NVVAL(NV507C, SET_CONVERSION, GAIN, 0) | + NVVAL(NV507C, SET_CONVERSION, OFS, 0)); } PUSH_MTHD(push, NV507C, SURFACE_SET_OFFSET(0, 0), asyw->image.offset[0] >> 8); diff --git a/drivers/gpu/drm/nouveau/dispnv50/base827c.c b/drivers/gpu/drm/nouveau/dispnv50/base827c.c index 18d34096f1258..093d4ba6910ec 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/base827c.c +++ b/drivers/gpu/drm/nouveau/dispnv50/base827c.c @@ -49,7 +49,11 @@ base827c_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) NVVAL(NV827C, SET_CONVERSION, OFS, 0x64)); } else { PUSH_MTHD(push, NV827C, SET_PROCESSING, - NVDEF(NV827C, SET_PROCESSING, USE_GAIN_OFS, DISABLE)); + NVDEF(NV827C, SET_PROCESSING, USE_GAIN_OFS, DISABLE), + + SET_CONVERSION, + NVVAL(NV827C, SET_CONVERSION, GAIN, 0) | + NVVAL(NV827C, SET_CONVERSION, OFS, 0)); } PUSH_MTHD(push, NV827C, SURFACE_SET_OFFSET(0, 0), asyw->image.offset[0] >> 8, -- GitLab From dcd602cc5fe2803bf532d407cde24ba0b7808ff3 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Mon, 18 Jan 2021 18:16:06 +0100 Subject: [PATCH 1650/2012] drm/nouveau/svm: fail NOUVEAU_SVM_INIT ioctl on unsupported devices Fixes a crash when trying to create a channel on e.g. Turing GPUs when NOUVEAU_SVM_INIT was called before. Fixes: eeaf06ac1a558 ("drm/nouveau/svm: initial support for shared virtual memory") Signed-off-by: Karol Herbst Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_svm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 4f69e4c3dafde..1c3f890377d2c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -315,6 +315,10 @@ nouveau_svmm_init(struct drm_device *dev, void *data, struct drm_nouveau_svm_init *args = data; int ret; + /* We need to fail if svm is disabled */ + if (!cli->drm->svm) + return -ENOSYS; + /* Allocate tracking for SVM-enabled VMM. */ if (!(svmm = kzalloc(sizeof(*svmm), GFP_KERNEL))) return -ENOMEM; -- GitLab From 7c6d659868c77da9b518f32348160340dcdfa008 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 18 Jan 2021 20:54:12 -0500 Subject: [PATCH 1651/2012] drivers/nouveau/kms/nv50-: Reject format modifiers for cursor planes Nvidia hardware doesn't actually support using tiling formats with the cursor plane, only linear is allowed. In the future, we should write a testcase for this. Fixes: c586f30bf74c ("drm/nouveau/kms: Add format mod prop to base/ovly/nvdisp") Cc: James Jones Cc: Martin Peres Cc: Jeremy Cline Cc: Simon Ser Cc: # v5.8+ Signed-off-by: Lyude Paul Reviewed-by: Simon Ser Reviewed-by: James Jones Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index ce451242f79eb..271de3a63f21c 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -702,6 +702,11 @@ nv50_wndw_init(struct nv50_wndw *wndw) nvif_notify_get(&wndw->notify); } +static const u64 nv50_cursor_format_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID, +}; + int nv50_wndw_new_(const struct nv50_wndw_func *func, struct drm_device *dev, enum drm_plane_type type, const char *name, int index, @@ -713,6 +718,7 @@ nv50_wndw_new_(const struct nv50_wndw_func *func, struct drm_device *dev, struct nvif_mmu *mmu = &drm->client.mmu; struct nv50_disp *disp = nv50_disp(dev); struct nv50_wndw *wndw; + const u64 *format_modifiers; int nformat; int ret; @@ -728,10 +734,13 @@ nv50_wndw_new_(const struct nv50_wndw_func *func, struct drm_device *dev, for (nformat = 0; format[nformat]; nformat++); - ret = drm_universal_plane_init(dev, &wndw->plane, heads, &nv50_wndw, - format, nformat, - nouveau_display(dev)->format_modifiers, - type, "%s-%d", name, index); + if (type == DRM_PLANE_TYPE_CURSOR) + format_modifiers = nv50_cursor_format_modifiers; + else + format_modifiers = nouveau_display(dev)->format_modifiers; + + ret = drm_universal_plane_init(dev, &wndw->plane, heads, &nv50_wndw, format, nformat, + format_modifiers, type, "%s-%d", name, index); if (ret) { kfree(*pwndw); *pwndw = NULL; -- GitLab From d3b2f0f7921c75b5f0de50e618e4bd165fded3e1 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 18 Jan 2021 20:54:13 -0500 Subject: [PATCH 1652/2012] drm/nouveau/kms/nv50-: Report max cursor size to userspace Cc: Martin Peres Cc: Jeremy Cline Cc: Simon Ser Signed-off-by: Lyude Paul Tested-by: Simon Ser Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index c6367035970eb..5f4f09a601d4c 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2663,6 +2663,14 @@ nv50_display_create(struct drm_device *dev) else nouveau_display(dev)->format_modifiers = disp50xx_modifiers; + if (disp->disp->object.oclass >= GK104_DISP) { + dev->mode_config.cursor_width = 256; + dev->mode_config.cursor_height = 256; + } else { + dev->mode_config.cursor_width = 64; + dev->mode_config.cursor_height = 64; + } + /* create crtc objects to represent the hw heads */ if (disp->disp->object.oclass >= GV100_DISP) crtcs = nvif_rd32(&device->object, 0x610060) & 0xff; -- GitLab From ba839b7598440a5d78550a115bac21b08d57cc32 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 18 Jan 2021 20:54:14 -0500 Subject: [PATCH 1653/2012] drm/nouveau/kms/gk104-gp1xx: Fix > 64x64 cursors While we do handle the additional cursor sizes introduced in NVE4, it looks like we accidentally broke this when converting over to use Nvidia's display headers. Since we now use NVVAL in dispnv50/head907d.c in order to format the value for the cursor layout and NVD9 only had one byte reserved vs. the 2 bytes reserved in later generations, we end up accidentally stripping the second bit in the cursor layout format parameter - causing us to set the wrong cursor size. This fixes that by adding our own curs_set hook for 917d which uses the NV917D headers. Cc: Martin Peres Cc: Jeremy Cline Cc: Simon Ser Cc: # v5.9+ Signed-off-by: Lyude Paul Fixes: ed0b86a90bf9 ("drm/nouveau/kms/nv50-: use NVIDIA's headers for core head_curs_set()") Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/head917d.c | 28 ++++++++++++++++++- .../drm/nouveau/include/nvhw/class/cl917d.h | 4 +++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/head917d.c b/drivers/gpu/drm/nouveau/dispnv50/head917d.c index a5d8274036609..ea9f8667305ec 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/head917d.c +++ b/drivers/gpu/drm/nouveau/dispnv50/head917d.c @@ -22,6 +22,7 @@ #include "head.h" #include "core.h" +#include "nvif/push.h" #include #include @@ -73,6 +74,31 @@ head917d_base(struct nv50_head *head, struct nv50_head_atom *asyh) return 0; } +static int +head917d_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + struct nvif_push *push = nv50_disp(head->base.base.dev)->core->chan.push; + const int i = head->base.index; + int ret; + + ret = PUSH_WAIT(push, 5); + if (ret) + return ret; + + PUSH_MTHD(push, NV917D, HEAD_SET_CONTROL_CURSOR(i), + NVDEF(NV917D, HEAD_SET_CONTROL_CURSOR, ENABLE, ENABLE) | + NVVAL(NV917D, HEAD_SET_CONTROL_CURSOR, FORMAT, asyh->curs.format) | + NVVAL(NV917D, HEAD_SET_CONTROL_CURSOR, SIZE, asyh->curs.layout) | + NVVAL(NV917D, HEAD_SET_CONTROL_CURSOR, HOT_SPOT_X, 0) | + NVVAL(NV917D, HEAD_SET_CONTROL_CURSOR, HOT_SPOT_Y, 0) | + NVDEF(NV917D, HEAD_SET_CONTROL_CURSOR, COMPOSITION, ALPHA_BLEND), + + HEAD_SET_OFFSET_CURSOR(i), asyh->curs.offset >> 8); + + PUSH_MTHD(push, NV917D, HEAD_SET_CONTEXT_DMA_CURSOR(i), asyh->curs.handle); + return 0; +} + int head917d_curs_layout(struct nv50_head *head, struct nv50_wndw_atom *asyw, struct nv50_head_atom *asyh) @@ -101,7 +127,7 @@ head917d = { .core_clr = head907d_core_clr, .curs_layout = head917d_curs_layout, .curs_format = head507d_curs_format, - .curs_set = head907d_curs_set, + .curs_set = head917d_curs_set, .curs_clr = head907d_curs_clr, .base = head917d_base, .ovly = head907d_ovly, diff --git a/drivers/gpu/drm/nouveau/include/nvhw/class/cl917d.h b/drivers/gpu/drm/nouveau/include/nvhw/class/cl917d.h index 2a2612d6e1e0e..fb223723a38ad 100644 --- a/drivers/gpu/drm/nouveau/include/nvhw/class/cl917d.h +++ b/drivers/gpu/drm/nouveau/include/nvhw/class/cl917d.h @@ -66,6 +66,10 @@ #define NV917D_HEAD_SET_CONTROL_CURSOR_COMPOSITION_ALPHA_BLEND (0x00000000) #define NV917D_HEAD_SET_CONTROL_CURSOR_COMPOSITION_PREMULT_ALPHA_BLEND (0x00000001) #define NV917D_HEAD_SET_CONTROL_CURSOR_COMPOSITION_XOR (0x00000002) +#define NV917D_HEAD_SET_OFFSET_CURSOR(a) (0x00000484 + (a)*0x00000300) +#define NV917D_HEAD_SET_OFFSET_CURSOR_ORIGIN 31:0 +#define NV917D_HEAD_SET_CONTEXT_DMA_CURSOR(a) (0x0000048C + (a)*0x00000300) +#define NV917D_HEAD_SET_CONTEXT_DMA_CURSOR_HANDLE 31:0 #define NV917D_HEAD_SET_DITHER_CONTROL(a) (0x000004A0 + (a)*0x00000300) #define NV917D_HEAD_SET_DITHER_CONTROL_ENABLE 0:0 #define NV917D_HEAD_SET_DITHER_CONTROL_ENABLE_DISABLE (0x00000000) -- GitLab From d7a177ea8fe6fc6fe146639d63a45090de196498 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sun, 24 Jan 2021 11:08:04 -0800 Subject: [PATCH 1654/2012] ipvlan: remove h from printk format specifier This change fixes the checkpatch warning described in this commit commit cbacb5ab0aa0 ("docs: printk-formats: Stop encouraging use of unnecessary %h[xudi] and %hh[xudi]") Standard integer promotion is already done and %hx and %hhx is useless so do not encourage the use of %hh[xudi] or %h[xudi]. Cleanup output to use __func__ over explicit function strings. Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20210124190804.1964580-1-trix@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ipvlan/ipvlan_core.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 8801d093135c3..6cd50106e6112 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -651,8 +651,7 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) } /* Should not reach here */ - WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n", - port->mode); + WARN_ONCE(true, "%s called for mode = [%x]\n", __func__, port->mode); out: kfree_skb(skb); return NET_XMIT_DROP; @@ -749,8 +748,7 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) } /* Should not reach here */ - WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n", - port->mode); + WARN_ONCE(true, "%s called for mode = [%x]\n", __func__, port->mode); kfree_skb(skb); return RX_HANDLER_CONSUMED; } -- GitLab From e594ad980ec26fb7351d02c84abaa77ecdb4e522 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Wed, 27 Jan 2021 16:34:32 +0100 Subject: [PATCH 1655/2012] net: usb: qmi_wwan: add qmap id sysfs file for qmimux interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add qmimux interface sysfs file qmap/mux_id to show qmap id set during the interface creation, in order to provide a method for userspace to associate QMI control channels to network interfaces. Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Acked-by: Aleksander Morgado Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index b96a3dc775a4e..29568ea8a9f25 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -218,6 +218,28 @@ skip: return 1; } +static ssize_t mux_id_show(struct device *d, struct device_attribute *attr, char *buf) +{ + struct net_device *dev = to_net_dev(d); + struct qmimux_priv *priv; + + priv = netdev_priv(dev); + + return sysfs_emit(buf, "0x%02x\n", priv->mux_id); +} + +static DEVICE_ATTR_RO(mux_id); + +static struct attribute *qmi_wwan_sysfs_qmimux_attrs[] = { + &dev_attr_mux_id.attr, + NULL, +}; + +static struct attribute_group qmi_wwan_sysfs_qmimux_attr_group = { + .name = "qmap", + .attrs = qmi_wwan_sysfs_qmimux_attrs, +}; + static int qmimux_register_device(struct net_device *real_dev, u8 mux_id) { struct net_device *new_dev; @@ -240,6 +262,8 @@ static int qmimux_register_device(struct net_device *real_dev, u8 mux_id) goto out_free_newdev; } + new_dev->sysfs_groups[0] = &qmi_wwan_sysfs_qmimux_attr_group; + err = register_netdevice(new_dev); if (err < 0) goto out_free_newdev; -- GitLab From b4b91e24094ad54b3d176e3fd2997fae62a315dc Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Wed, 27 Jan 2021 16:34:33 +0100 Subject: [PATCH 1656/2012] net: qmi_wwan: document qmap/mux_id sysfs file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document qmap/mux_id sysfs file showing qmimux interface id Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Signed-off-by: Jakub Kicinski --- Documentation/ABI/testing/sysfs-class-net-qmi | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-net-qmi b/Documentation/ABI/testing/sysfs-class-net-qmi index c310db4ccbc2e..ed79f58934213 100644 --- a/Documentation/ABI/testing/sysfs-class-net-qmi +++ b/Documentation/ABI/testing/sysfs-class-net-qmi @@ -48,3 +48,13 @@ Description: Write a number ranging from 1 to 254 to delete a previously created qmap mux based network device. + +What: /sys/class/net//qmap/mux_id +Date: January 2021 +KernelVersion: 5.12 +Contact: Daniele Palmas +Description: + Unsigned integer + + Indicates the mux id associated to the qmimux network interface + during its creation. -- GitLab From 59e139cf0b32a7a08ef20453927ecd57db086d8e Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Mon, 25 Jan 2021 00:33:35 -0700 Subject: [PATCH 1657/2012] net: qmi_wwan: Add pass through mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass through mode is to allow packets in MAP format to be passed on to the stack. rmnet driver can be used to process and demultiplex these packets. Pass through mode can be enabled when the device is in raw ip mode only. Conversely, raw ip mode cannot be disabled when pass through mode is enabled. Userspace can use pass through mode in conjunction with rmnet driver through the following steps- 1. Enable raw ip mode on qmi_wwan device 2. Enable pass through mode on qmi_wwan device 3. Create a rmnet device with qmi_wwan device as real device using netlink Signed-off-by: Subash Abhinov Kasiviswanathan Acked-by: Bjørn Mork Link: https://lore.kernel.org/r/1611560015-20034-1-git-send-email-subashab@codeaurora.org Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 58 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 29568ea8a9f25..c8b2b60d21834 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -57,6 +57,7 @@ struct qmi_wwan_state { enum qmi_wwan_flags { QMI_WWAN_FLAG_RAWIP = 1 << 0, QMI_WWAN_FLAG_MUX = 1 << 1, + QMI_WWAN_FLAG_PASS_THROUGH = 1 << 2, }; enum qmi_wwan_quirks { @@ -350,6 +351,13 @@ static ssize_t raw_ip_store(struct device *d, struct device_attribute *attr, co if (enable == (info->flags & QMI_WWAN_FLAG_RAWIP)) return len; + /* ip mode cannot be cleared when pass through mode is set */ + if (!enable && (info->flags & QMI_WWAN_FLAG_PASS_THROUGH)) { + netdev_err(dev->net, + "Cannot clear ip mode on pass through device\n"); + return -EINVAL; + } + if (!rtnl_trylock()) return restart_syscall(); @@ -480,14 +488,59 @@ err: return ret; } +static ssize_t pass_through_show(struct device *d, + struct device_attribute *attr, char *buf) +{ + struct usbnet *dev = netdev_priv(to_net_dev(d)); + struct qmi_wwan_state *info; + + info = (void *)&dev->data; + return sprintf(buf, "%c\n", + info->flags & QMI_WWAN_FLAG_PASS_THROUGH ? 'Y' : 'N'); +} + +static ssize_t pass_through_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct usbnet *dev = netdev_priv(to_net_dev(d)); + struct qmi_wwan_state *info; + bool enable; + + if (strtobool(buf, &enable)) + return -EINVAL; + + info = (void *)&dev->data; + + /* no change? */ + if (enable == (info->flags & QMI_WWAN_FLAG_PASS_THROUGH)) + return len; + + /* pass through mode can be set for raw ip devices only */ + if (!(info->flags & QMI_WWAN_FLAG_RAWIP)) { + netdev_err(dev->net, + "Cannot set pass through mode on non ip device\n"); + return -EINVAL; + } + + if (enable) + info->flags |= QMI_WWAN_FLAG_PASS_THROUGH; + else + info->flags &= ~QMI_WWAN_FLAG_PASS_THROUGH; + + return len; +} + static DEVICE_ATTR_RW(raw_ip); static DEVICE_ATTR_RW(add_mux); static DEVICE_ATTR_RW(del_mux); +static DEVICE_ATTR_RW(pass_through); static struct attribute *qmi_wwan_sysfs_attrs[] = { &dev_attr_raw_ip.attr, &dev_attr_add_mux.attr, &dev_attr_del_mux.attr, + &dev_attr_pass_through.attr, NULL, }; @@ -534,6 +587,11 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (info->flags & QMI_WWAN_FLAG_MUX) return qmimux_rx_fixup(dev, skb); + if (info->flags & QMI_WWAN_FLAG_PASS_THROUGH) { + skb->protocol = htons(ETH_P_MAP); + return (netif_rx(skb) == NET_RX_SUCCESS); + } + switch (skb->data[0] & 0xf0) { case 0x40: proto = htons(ETH_P_IP); -- GitLab From 4140ff1ba06d3fc16afd518736940ab742886317 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 28 Jan 2021 22:07:36 +0100 Subject: [PATCH 1658/2012] e100: switch from 'pci_' to 'dma_' API The wrappers in include/linux/pci-dma-compat.h should go away. The patch has been generated with the coccinelle script below and has been hand modified to replace GFP_ with a correct flag. It has been compile tested. When memory is allocated in 'e100_alloc()', GFP_KERNEL can be used because it is only called from the probe function and no lock is acquired. @@ @@ - PCI_DMA_BIDIRECTIONAL + DMA_BIDIRECTIONAL @@ @@ - PCI_DMA_TODEVICE + DMA_TO_DEVICE @@ @@ - PCI_DMA_FROMDEVICE + DMA_FROM_DEVICE @@ @@ - PCI_DMA_NONE + DMA_NONE @@ expression e1, e2, e3; @@ - pci_alloc_consistent(e1, e2, e3) + dma_alloc_coherent(&e1->dev, e2, e3, GFP_) @@ expression e1, e2, e3; @@ - pci_zalloc_consistent(e1, e2, e3) + dma_alloc_coherent(&e1->dev, e2, e3, GFP_) @@ expression e1, e2, e3, e4; @@ - pci_free_consistent(e1, e2, e3, e4) + dma_free_coherent(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_map_single(e1, e2, e3, e4) + dma_map_single(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_unmap_single(e1, e2, e3, e4) + dma_unmap_single(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4, e5; @@ - pci_map_page(e1, e2, e3, e4, e5) + dma_map_page(&e1->dev, e2, e3, e4, e5) @@ expression e1, e2, e3, e4; @@ - pci_unmap_page(e1, e2, e3, e4) + dma_unmap_page(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_map_sg(e1, e2, e3, e4) + dma_map_sg(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_unmap_sg(e1, e2, e3, e4) + dma_unmap_sg(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_single_for_cpu(e1, e2, e3, e4) + dma_sync_single_for_cpu(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_single_for_device(e1, e2, e3, e4) + dma_sync_single_for_device(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_sg_for_cpu(e1, e2, e3, e4) + dma_sync_sg_for_cpu(&e1->dev, e2, e3, e4) @@ expression e1, e2, e3, e4; @@ - pci_dma_sync_sg_for_device(e1, e2, e3, e4) + dma_sync_sg_for_device(&e1->dev, e2, e3, e4) @@ expression e1, e2; @@ - pci_dma_mapping_error(e1, e2) + dma_mapping_error(&e1->dev, e2) @@ expression e1, e2; @@ - pci_set_dma_mask(e1, e2) + dma_set_mask(&e1->dev, e2) @@ expression e1, e2; @@ - pci_set_consistent_dma_mask(e1, e2) + dma_set_coherent_mask(&e1->dev, e2) Signed-off-by: Christophe JAILLET Tested-by: Aaron Brown Link: https://lore.kernel.org/r/20210128210736.749724-1-christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/e100.c | 92 ++++++++++++++++--------------- 1 file changed, 49 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 8cc651d37a7fd..f8d78af76d7de 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1739,10 +1739,10 @@ static int e100_xmit_prepare(struct nic *nic, struct cb *cb, dma_addr_t dma_addr; cb->command = nic->tx_command; - dma_addr = pci_map_single(nic->pdev, - skb->data, skb->len, PCI_DMA_TODEVICE); + dma_addr = dma_map_single(&nic->pdev->dev, skb->data, skb->len, + DMA_TO_DEVICE); /* If we can't map the skb, have the upper layer try later */ - if (pci_dma_mapping_error(nic->pdev, dma_addr)) { + if (dma_mapping_error(&nic->pdev->dev, dma_addr)) { dev_kfree_skb_any(skb); skb = NULL; return -ENOMEM; @@ -1828,10 +1828,10 @@ static int e100_tx_clean(struct nic *nic) dev->stats.tx_packets++; dev->stats.tx_bytes += cb->skb->len; - pci_unmap_single(nic->pdev, - le32_to_cpu(cb->u.tcb.tbd.buf_addr), - le16_to_cpu(cb->u.tcb.tbd.size), - PCI_DMA_TODEVICE); + dma_unmap_single(&nic->pdev->dev, + le32_to_cpu(cb->u.tcb.tbd.buf_addr), + le16_to_cpu(cb->u.tcb.tbd.size), + DMA_TO_DEVICE); dev_kfree_skb_any(cb->skb); cb->skb = NULL; tx_cleaned = 1; @@ -1855,10 +1855,10 @@ static void e100_clean_cbs(struct nic *nic) while (nic->cbs_avail != nic->params.cbs.count) { struct cb *cb = nic->cb_to_clean; if (cb->skb) { - pci_unmap_single(nic->pdev, - le32_to_cpu(cb->u.tcb.tbd.buf_addr), - le16_to_cpu(cb->u.tcb.tbd.size), - PCI_DMA_TODEVICE); + dma_unmap_single(&nic->pdev->dev, + le32_to_cpu(cb->u.tcb.tbd.buf_addr), + le16_to_cpu(cb->u.tcb.tbd.size), + DMA_TO_DEVICE); dev_kfree_skb(cb->skb); } nic->cb_to_clean = nic->cb_to_clean->next; @@ -1925,10 +1925,10 @@ static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx) /* Init, and map the RFD. */ skb_copy_to_linear_data(rx->skb, &nic->blank_rfd, sizeof(struct rfd)); - rx->dma_addr = pci_map_single(nic->pdev, rx->skb->data, - RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL); + rx->dma_addr = dma_map_single(&nic->pdev->dev, rx->skb->data, + RFD_BUF_LEN, DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(nic->pdev, rx->dma_addr)) { + if (dma_mapping_error(&nic->pdev->dev, rx->dma_addr)) { dev_kfree_skb_any(rx->skb); rx->skb = NULL; rx->dma_addr = 0; @@ -1941,8 +1941,10 @@ static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx) if (rx->prev->skb) { struct rfd *prev_rfd = (struct rfd *)rx->prev->skb->data; put_unaligned_le32(rx->dma_addr, &prev_rfd->link); - pci_dma_sync_single_for_device(nic->pdev, rx->prev->dma_addr, - sizeof(struct rfd), PCI_DMA_BIDIRECTIONAL); + dma_sync_single_for_device(&nic->pdev->dev, + rx->prev->dma_addr, + sizeof(struct rfd), + DMA_BIDIRECTIONAL); } return 0; @@ -1961,8 +1963,8 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx, return -EAGAIN; /* Need to sync before taking a peek at cb_complete bit */ - pci_dma_sync_single_for_cpu(nic->pdev, rx->dma_addr, - sizeof(struct rfd), PCI_DMA_BIDIRECTIONAL); + dma_sync_single_for_cpu(&nic->pdev->dev, rx->dma_addr, + sizeof(struct rfd), DMA_BIDIRECTIONAL); rfd_status = le16_to_cpu(rfd->status); netif_printk(nic, rx_status, KERN_DEBUG, nic->netdev, @@ -1981,9 +1983,9 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx, if (ioread8(&nic->csr->scb.status) & rus_no_res) nic->ru_running = RU_SUSPENDED; - pci_dma_sync_single_for_device(nic->pdev, rx->dma_addr, - sizeof(struct rfd), - PCI_DMA_FROMDEVICE); + dma_sync_single_for_device(&nic->pdev->dev, rx->dma_addr, + sizeof(struct rfd), + DMA_FROM_DEVICE); return -ENODATA; } @@ -1995,8 +1997,8 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx, actual_size = RFD_BUF_LEN - sizeof(struct rfd); /* Get data */ - pci_unmap_single(nic->pdev, rx->dma_addr, - RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL); + dma_unmap_single(&nic->pdev->dev, rx->dma_addr, RFD_BUF_LEN, + DMA_BIDIRECTIONAL); /* If this buffer has the el bit, but we think the receiver * is still running, check to see if it really stopped while @@ -2097,22 +2099,25 @@ static void e100_rx_clean(struct nic *nic, unsigned int *work_done, (struct rfd *)new_before_last_rx->skb->data; new_before_last_rfd->size = 0; new_before_last_rfd->command |= cpu_to_le16(cb_el); - pci_dma_sync_single_for_device(nic->pdev, - new_before_last_rx->dma_addr, sizeof(struct rfd), - PCI_DMA_BIDIRECTIONAL); + dma_sync_single_for_device(&nic->pdev->dev, + new_before_last_rx->dma_addr, + sizeof(struct rfd), + DMA_BIDIRECTIONAL); /* Now that we have a new stopping point, we can clear the old * stopping point. We must sync twice to get the proper * ordering on the hardware side of things. */ old_before_last_rfd->command &= ~cpu_to_le16(cb_el); - pci_dma_sync_single_for_device(nic->pdev, - old_before_last_rx->dma_addr, sizeof(struct rfd), - PCI_DMA_BIDIRECTIONAL); + dma_sync_single_for_device(&nic->pdev->dev, + old_before_last_rx->dma_addr, + sizeof(struct rfd), + DMA_BIDIRECTIONAL); old_before_last_rfd->size = cpu_to_le16(VLAN_ETH_FRAME_LEN + ETH_FCS_LEN); - pci_dma_sync_single_for_device(nic->pdev, - old_before_last_rx->dma_addr, sizeof(struct rfd), - PCI_DMA_BIDIRECTIONAL); + dma_sync_single_for_device(&nic->pdev->dev, + old_before_last_rx->dma_addr, + sizeof(struct rfd), + DMA_BIDIRECTIONAL); } if (restart_required) { @@ -2134,8 +2139,9 @@ static void e100_rx_clean_list(struct nic *nic) if (nic->rxs) { for (rx = nic->rxs, i = 0; i < count; rx++, i++) { if (rx->skb) { - pci_unmap_single(nic->pdev, rx->dma_addr, - RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL); + dma_unmap_single(&nic->pdev->dev, + rx->dma_addr, RFD_BUF_LEN, + DMA_BIDIRECTIONAL); dev_kfree_skb(rx->skb); } } @@ -2177,8 +2183,8 @@ static int e100_rx_alloc_list(struct nic *nic) before_last = (struct rfd *)rx->skb->data; before_last->command |= cpu_to_le16(cb_el); before_last->size = 0; - pci_dma_sync_single_for_device(nic->pdev, rx->dma_addr, - sizeof(struct rfd), PCI_DMA_BIDIRECTIONAL); + dma_sync_single_for_device(&nic->pdev->dev, rx->dma_addr, + sizeof(struct rfd), DMA_BIDIRECTIONAL); nic->rx_to_use = nic->rx_to_clean = nic->rxs; nic->ru_running = RU_SUSPENDED; @@ -2377,8 +2383,8 @@ static int e100_loopback_test(struct nic *nic, enum loopback loopback_mode) msleep(10); - pci_dma_sync_single_for_cpu(nic->pdev, nic->rx_to_clean->dma_addr, - RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL); + dma_sync_single_for_cpu(&nic->pdev->dev, nic->rx_to_clean->dma_addr, + RFD_BUF_LEN, DMA_BIDIRECTIONAL); if (memcmp(nic->rx_to_clean->skb->data + sizeof(struct rfd), skb->data, ETH_DATA_LEN)) @@ -2751,16 +2757,16 @@ static int e100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) static int e100_alloc(struct nic *nic) { - nic->mem = pci_alloc_consistent(nic->pdev, sizeof(struct mem), - &nic->dma_addr); + nic->mem = dma_alloc_coherent(&nic->pdev->dev, sizeof(struct mem), + &nic->dma_addr, GFP_KERNEL); return nic->mem ? 0 : -ENOMEM; } static void e100_free(struct nic *nic) { if (nic->mem) { - pci_free_consistent(nic->pdev, sizeof(struct mem), - nic->mem, nic->dma_addr); + dma_free_coherent(&nic->pdev->dev, sizeof(struct mem), + nic->mem, nic->dma_addr); nic->mem = NULL; } } @@ -2853,7 +2859,7 @@ static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_out_disable_pdev; } - if ((err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))) { + if ((err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)))) { netif_err(nic, probe, nic->netdev, "No usable DMA configuration, aborting\n"); goto err_out_free_res; } -- GitLab From 462512824f902a24de794290dd622e664587da1d Mon Sep 17 00:00:00 2001 From: Alexey Denisov Date: Thu, 28 Jan 2021 09:48:59 +0500 Subject: [PATCH 1659/2012] lan743x: fix endianness when accessing descriptors TX/RX descriptor ring fields are always little-endian, but conversion wasn't performed for big-endian CPUs, so the driver failed to work. This patch makes the driver work on big-endian CPUs. It was tested and confirmed to work on NXP P1010 processor (PowerPC). Signed-off-by: Alexey Denisov Link: https://lore.kernel.org/r/20210128044859.280219-1-rtgbnm@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 66 +++++++++---------- drivers/net/ethernet/microchip/lan743x_main.h | 20 +++--- 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 3804310c853a8..51359ce650bdd 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1253,7 +1253,7 @@ static void lan743x_tx_release_desc(struct lan743x_tx *tx, if (!(buffer_info->flags & TX_BUFFER_INFO_FLAG_ACTIVE)) goto done; - descriptor_type = (descriptor->data0) & + descriptor_type = le32_to_cpu(descriptor->data0) & TX_DESC_DATA0_DTYPE_MASK_; if (descriptor_type == TX_DESC_DATA0_DTYPE_DATA_) goto clean_up_data_descriptor; @@ -1313,7 +1313,7 @@ static int lan743x_tx_next_index(struct lan743x_tx *tx, int index) static void lan743x_tx_release_completed_descriptors(struct lan743x_tx *tx) { - while ((*tx->head_cpu_ptr) != (tx->last_head)) { + while (le32_to_cpu(*tx->head_cpu_ptr) != (tx->last_head)) { lan743x_tx_release_desc(tx, tx->last_head, false); tx->last_head = lan743x_tx_next_index(tx, tx->last_head); } @@ -1399,10 +1399,10 @@ static int lan743x_tx_frame_start(struct lan743x_tx *tx, if (dma_mapping_error(dev, dma_ptr)) return -ENOMEM; - tx_descriptor->data1 = DMA_ADDR_LOW32(dma_ptr); - tx_descriptor->data2 = DMA_ADDR_HIGH32(dma_ptr); - tx_descriptor->data3 = (frame_length << 16) & - TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_; + tx_descriptor->data1 = cpu_to_le32(DMA_ADDR_LOW32(dma_ptr)); + tx_descriptor->data2 = cpu_to_le32(DMA_ADDR_HIGH32(dma_ptr)); + tx_descriptor->data3 = cpu_to_le32((frame_length << 16) & + TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_); buffer_info->skb = NULL; buffer_info->dma_ptr = dma_ptr; @@ -1443,7 +1443,7 @@ static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx, tx->frame_data0 |= TX_DESC_DATA0_IOC_; } tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; - tx_descriptor->data0 = tx->frame_data0; + tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); /* move to next descriptor */ tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail); @@ -1487,7 +1487,7 @@ static int lan743x_tx_frame_add_fragment(struct lan743x_tx *tx, /* wrap up previous descriptor */ tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; - tx_descriptor->data0 = tx->frame_data0; + tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); /* move to next descriptor */ tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail); @@ -1513,10 +1513,10 @@ static int lan743x_tx_frame_add_fragment(struct lan743x_tx *tx, return -ENOMEM; } - tx_descriptor->data1 = DMA_ADDR_LOW32(dma_ptr); - tx_descriptor->data2 = DMA_ADDR_HIGH32(dma_ptr); - tx_descriptor->data3 = (frame_length << 16) & - TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_; + tx_descriptor->data1 = cpu_to_le32(DMA_ADDR_LOW32(dma_ptr)); + tx_descriptor->data2 = cpu_to_le32(DMA_ADDR_HIGH32(dma_ptr)); + tx_descriptor->data3 = cpu_to_le32((frame_length << 16) & + TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_); buffer_info->skb = NULL; buffer_info->dma_ptr = dma_ptr; @@ -1560,7 +1560,7 @@ static void lan743x_tx_frame_end(struct lan743x_tx *tx, if (ignore_sync) buffer_info->flags |= TX_BUFFER_INFO_FLAG_IGNORE_SYNC; - tx_descriptor->data0 = tx->frame_data0; + tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail); tx->last_tail = tx->frame_tail; @@ -1967,11 +1967,11 @@ static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index, } buffer_info->buffer_length = length; - descriptor->data1 = DMA_ADDR_LOW32(buffer_info->dma_ptr); - descriptor->data2 = DMA_ADDR_HIGH32(buffer_info->dma_ptr); + descriptor->data1 = cpu_to_le32(DMA_ADDR_LOW32(buffer_info->dma_ptr)); + descriptor->data2 = cpu_to_le32(DMA_ADDR_HIGH32(buffer_info->dma_ptr)); descriptor->data3 = 0; - descriptor->data0 = (RX_DESC_DATA0_OWN_ | - (length & RX_DESC_DATA0_BUF_LENGTH_MASK_)); + descriptor->data0 = cpu_to_le32((RX_DESC_DATA0_OWN_ | + (length & RX_DESC_DATA0_BUF_LENGTH_MASK_))); skb_reserve(buffer_info->skb, RX_HEAD_PADDING); lan743x_rx_update_tail(rx, index); @@ -1986,12 +1986,12 @@ static void lan743x_rx_reuse_ring_element(struct lan743x_rx *rx, int index) descriptor = &rx->ring_cpu_ptr[index]; buffer_info = &rx->buffer_info[index]; - descriptor->data1 = DMA_ADDR_LOW32(buffer_info->dma_ptr); - descriptor->data2 = DMA_ADDR_HIGH32(buffer_info->dma_ptr); + descriptor->data1 = cpu_to_le32(DMA_ADDR_LOW32(buffer_info->dma_ptr)); + descriptor->data2 = cpu_to_le32(DMA_ADDR_HIGH32(buffer_info->dma_ptr)); descriptor->data3 = 0; - descriptor->data0 = (RX_DESC_DATA0_OWN_ | + descriptor->data0 = cpu_to_le32((RX_DESC_DATA0_OWN_ | ((buffer_info->buffer_length) & - RX_DESC_DATA0_BUF_LENGTH_MASK_)); + RX_DESC_DATA0_BUF_LENGTH_MASK_))); lan743x_rx_update_tail(rx, index); } @@ -2025,7 +2025,7 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx) { struct skb_shared_hwtstamps *hwtstamps = NULL; int result = RX_PROCESS_RESULT_NOTHING_TO_DO; - int current_head_index = *rx->head_cpu_ptr; + int current_head_index = le32_to_cpu(*rx->head_cpu_ptr); struct lan743x_rx_buffer_info *buffer_info; struct lan743x_rx_descriptor *descriptor; int extension_index = -1; @@ -2040,14 +2040,14 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx) if (rx->last_head != current_head_index) { descriptor = &rx->ring_cpu_ptr[rx->last_head]; - if (descriptor->data0 & RX_DESC_DATA0_OWN_) + if (le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_OWN_) goto done; - if (!(descriptor->data0 & RX_DESC_DATA0_FS_)) + if (!(le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_FS_)) goto done; first_index = rx->last_head; - if (descriptor->data0 & RX_DESC_DATA0_LS_) { + if (le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_LS_) { last_index = rx->last_head; } else { int index; @@ -2055,10 +2055,10 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx) index = lan743x_rx_next_index(rx, first_index); while (index != current_head_index) { descriptor = &rx->ring_cpu_ptr[index]; - if (descriptor->data0 & RX_DESC_DATA0_OWN_) + if (le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_OWN_) goto done; - if (descriptor->data0 & RX_DESC_DATA0_LS_) { + if (le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_LS_) { last_index = index; break; } @@ -2067,17 +2067,17 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx) } if (last_index >= 0) { descriptor = &rx->ring_cpu_ptr[last_index]; - if (descriptor->data0 & RX_DESC_DATA0_EXT_) { + if (le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_EXT_) { /* extension is expected to follow */ int index = lan743x_rx_next_index(rx, last_index); if (index != current_head_index) { descriptor = &rx->ring_cpu_ptr[index]; - if (descriptor->data0 & + if (le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_OWN_) { goto done; } - if (descriptor->data0 & + if (le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_EXT_) { extension_index = index; } else { @@ -2129,7 +2129,7 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx) } buffer_info->skb = NULL; packet_length = RX_DESC_DATA0_FRAME_LENGTH_GET_ - (descriptor->data0); + (le32_to_cpu(descriptor->data0)); skb_put(skb, packet_length - 4); skb->protocol = eth_type_trans(skb, rx->adapter->netdev); @@ -2167,8 +2167,8 @@ process_extension: descriptor = &rx->ring_cpu_ptr[extension_index]; buffer_info = &rx->buffer_info[extension_index]; - ts_sec = descriptor->data1; - ts_nsec = (descriptor->data2 & + ts_sec = le32_to_cpu(descriptor->data1); + ts_nsec = (le32_to_cpu(descriptor->data2) & RX_DESC_DATA2_TS_NS_MASK_); lan743x_rx_reuse_ring_element(rx, extension_index); real_last_index = extension_index; diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index 404af3f4635ea..f3f778910fcc6 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -661,7 +661,7 @@ struct lan743x_tx { struct lan743x_tx_buffer_info *buffer_info; - u32 *head_cpu_ptr; + __le32 *head_cpu_ptr; dma_addr_t head_dma_ptr; int last_head; int last_tail; @@ -691,7 +691,7 @@ struct lan743x_rx { struct lan743x_rx_buffer_info *buffer_info; - u32 *head_cpu_ptr; + __le32 *head_cpu_ptr; dma_addr_t head_dma_ptr; u32 last_head; u32 last_tail; @@ -775,10 +775,10 @@ struct lan743x_adapter { #define TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_ (0x3FFF0000) struct lan743x_tx_descriptor { - u32 data0; - u32 data1; - u32 data2; - u32 data3; + __le32 data0; + __le32 data1; + __le32 data2; + __le32 data3; } __aligned(DEFAULT_DMA_DESCRIPTOR_SPACING); #define TX_BUFFER_INFO_FLAG_ACTIVE BIT(0) @@ -813,10 +813,10 @@ struct lan743x_tx_buffer_info { #define RX_HEAD_PADDING NET_IP_ALIGN struct lan743x_rx_descriptor { - u32 data0; - u32 data1; - u32 data2; - u32 data3; + __le32 data0; + __le32 data1; + __le32 data2; + __le32 data3; } __aligned(DEFAULT_DMA_DESCRIPTOR_SPACING); #define RX_BUFFER_INFO_FLAG_ACTIVE BIT(0) -- GitLab From 0d4873f9aa4ff8fc1d63a5755395b794d32ce046 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 28 Jan 2021 21:35:10 -0600 Subject: [PATCH 1660/2012] cifs: fix dfs domain referrals The new mount API requires additional changes to how DFS is handled. Additional testing of DFS uncovered problems with domain based DFS referrals (a follow on patch addresses DFS links) which this patch addresses. Signed-off-by: Ronnie Sahlberg Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/cifs_dfs_ref.c | 12 ++++++++---- fs/cifs/cifsfs.c | 2 +- fs/cifs/cifsproto.h | 6 ++++-- fs/cifs/connect.c | 32 ++++++++++++++++++++++++++------ fs/cifs/dfs_cache.c | 8 +++++--- fs/cifs/fs_context.c | 31 +++++++++++++++++++++++++++++++ 6 files changed, 75 insertions(+), 16 deletions(-) diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index e4c6ae47a7961..6b1ce4efb591c 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -133,8 +133,9 @@ cifs_build_devname(char *nodename, const char *prepath) * Caller is responsible for freeing returned value if it is not error. */ char *cifs_compose_mount_options(const char *sb_mountdata, - const char *fullpath, - const struct dfs_info3_param *ref) + const char *fullpath, + const struct dfs_info3_param *ref, + char **devname) { int rc; char *name; @@ -231,7 +232,10 @@ char *cifs_compose_mount_options(const char *sb_mountdata, strcat(mountdata, "ip="); strcat(mountdata, srvIP); - kfree(name); + if (devname) + *devname = name; + else + kfree(name); /*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/ /*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/ @@ -278,7 +282,7 @@ static struct vfsmount *cifs_dfs_do_mount(struct dentry *mntpt, /* strip first '\' from fullpath */ mountdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, - fullpath + 1, NULL); + fullpath + 1, NULL, NULL); if (IS_ERR(mountdata)) { kfree(devname); return (struct vfsmount *)mountdata; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index ce0d0037fd0af..e46da536ed339 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -822,7 +822,7 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, goto out; } - rc = cifs_setup_volume_info(cifs_sb->ctx); + rc = cifs_setup_volume_info(cifs_sb->ctx, NULL, old_ctx->UNC); if (rc) { root = ERR_PTR(rc); goto out; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 340ff81ee87bf..32f7a013402ee 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -78,7 +78,8 @@ extern char *cifs_build_path_to_root(struct smb3_fs_context *ctx, int add_treename); extern char *build_wildcard_path_from_dentry(struct dentry *direntry); extern char *cifs_compose_mount_options(const char *sb_mountdata, - const char *fullpath, const struct dfs_info3_param *ref); + const char *fullpath, const struct dfs_info3_param *ref, + char **devname); /* extern void renew_parental_timestamps(struct dentry *direntry);*/ extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server); @@ -89,6 +90,7 @@ extern void cifs_wake_up_task(struct mid_q_entry *mid); extern int cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid); extern int smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx); +extern int smb3_parse_opt(const char *options, const char *key, char **val); extern bool cifs_match_ipaddr(struct sockaddr *srcaddr, struct sockaddr *rhs); extern int cifs_discard_remaining_data(struct TCP_Server_Info *server); extern int cifs_call_async(struct TCP_Server_Info *server, @@ -549,7 +551,7 @@ extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24); extern int -cifs_setup_volume_info(struct smb3_fs_context *ctx); +cifs_setup_volume_info(struct smb3_fs_context *ctx, const char *mntopts, const char *devname); extern struct TCP_Server_Info * cifs_find_tcp_session(struct smb3_fs_context *ctx); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index c8ef24bac94fc..10fe6d6d2dee4 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2972,17 +2972,20 @@ expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses, rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ref_path, &referral, NULL); if (!rc) { + char *fake_devname = NULL; + mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, - full_path + 1, &referral); + full_path + 1, &referral, + &fake_devname); free_dfs_info_param(&referral); if (IS_ERR(mdata)) { rc = PTR_ERR(mdata); mdata = NULL; } else { - smb3_cleanup_fs_context_contents(ctx); - rc = cifs_setup_volume_info(ctx); + rc = cifs_setup_volume_info(ctx, mdata, fake_devname); } + kfree(fake_devname); kfree(cifs_sb->ctx->mount_options); cifs_sb->ctx->mount_options = mdata; } @@ -3036,6 +3039,7 @@ static int setup_dfs_tgt_conn(const char *path, const char *full_path, struct dfs_info3_param ref = {0}; char *mdata = NULL; struct smb3_fs_context fake_ctx = {NULL}; + char *fake_devname = NULL; cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path); @@ -3044,16 +3048,18 @@ static int setup_dfs_tgt_conn(const char *path, const char *full_path, return rc; mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, - full_path + 1, &ref); + full_path + 1, &ref, + &fake_devname); free_dfs_info_param(&ref); if (IS_ERR(mdata)) { rc = PTR_ERR(mdata); mdata = NULL; } else - rc = cifs_setup_volume_info(&fake_ctx); + rc = cifs_setup_volume_info(&fake_ctx, mdata, fake_devname); kfree(mdata); + kfree(fake_devname); if (!rc) { /* @@ -3122,10 +3128,24 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_ * we should pass a clone of the original context? */ int -cifs_setup_volume_info(struct smb3_fs_context *ctx) +cifs_setup_volume_info(struct smb3_fs_context *ctx, const char *mntopts, const char *devname) { int rc = 0; + smb3_parse_devname(devname, ctx); + + if (mntopts) { + char *ip; + + cifs_dbg(FYI, "%s: mntopts=%s\n", __func__, mntopts); + rc = smb3_parse_opt(mntopts, "ip", &ip); + if (!rc && !cifs_convert_address((struct sockaddr *)&ctx->dstaddr, ip, + strlen(ip))) { + cifs_dbg(VFS, "%s: failed to convert ip address\n", __func__); + return -EINVAL; + } + } + if (ctx->nullauth) { cifs_dbg(FYI, "Anonymous login\n"); kfree(ctx->username); diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 0fdb0de7ff861..4950ab0486aee 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -1417,7 +1417,7 @@ static struct cifs_ses *find_root_ses(struct vol_info *vi, int rc; struct cache_entry *ce; struct dfs_info3_param ref = {0}; - char *mdata = NULL; + char *mdata = NULL, *devname = NULL; struct TCP_Server_Info *server; struct cifs_ses *ses; struct smb3_fs_context ctx = {NULL}; @@ -1444,7 +1444,8 @@ static struct cifs_ses *find_root_ses(struct vol_info *vi, up_read(&htable_rw_lock); - mdata = cifs_compose_mount_options(vi->mntdata, rpath, &ref); + mdata = cifs_compose_mount_options(vi->mntdata, rpath, &ref, + &devname); free_dfs_info_param(&ref); if (IS_ERR(mdata)) { @@ -1453,7 +1454,7 @@ static struct cifs_ses *find_root_ses(struct vol_info *vi, goto out; } - rc = cifs_setup_volume_info(&ctx); + rc = cifs_setup_volume_info(&ctx, NULL, devname); if (rc) { ses = ERR_PTR(rc); @@ -1472,6 +1473,7 @@ out: smb3_cleanup_fs_context_contents(&ctx); kfree(mdata); kfree(rpath); + kfree(devname); return ses; } diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 27354417e9889..5111aadfdb6b6 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -401,6 +401,37 @@ cifs_parse_smb_version(char *value, struct smb3_fs_context *ctx, bool is_smb3) return 0; } +int smb3_parse_opt(const char *options, const char *key, char **val) +{ + int rc = -ENOENT; + char *opts, *orig, *p; + + orig = opts = kstrdup(options, GFP_KERNEL); + if (!opts) + return -ENOMEM; + + while ((p = strsep(&opts, ","))) { + char *nval; + + if (!*p) + continue; + if (strncasecmp(p, key, strlen(key))) + continue; + nval = strchr(p, '='); + if (nval) { + if (nval == p) + continue; + *nval++ = 0; + *val = kstrndup(nval, strlen(nval), GFP_KERNEL); + rc = !*val ? -ENOMEM : 0; + goto out; + } + } +out: + kfree(orig); + return rc; +} + /* * Parse a devname into substrings and populate the ctx->UNC and ctx->prepath * fields with the result. Returns 0 on success and an error otherwise -- GitLab From 28af22c6c8dff6a16163e5b6a56211d5b535c97b Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 26 Jan 2021 18:39:39 +0100 Subject: [PATCH 1661/2012] net: adjust net_device layout for cacheline usage The current layout of net_device is not optimal for cacheline usage. The member adj_list.lower linked list is split between cacheline 2 and 3. The ifindex is placed together with stats (struct net_device_stats), although most modern drivers don't update this stats member. The members netdev_ops, mtu and hard_header_len are placed on three different cachelines. These members are accessed for XDP redirect into devmap, which were noticeably with perf tool. When not using the map redirect variant (like TC-BPF does), then ifindex is also used, which is placed on a separate fourth cacheline. These members are also accessed during forwarding with regular network stack. The members priv_flags and flags are on fast-path for network stack transmit path in __dev_queue_xmit (currently located together with mtu cacheline). This patch creates a read mostly cacheline, with the purpose of keeping the above mentioned members on the same cacheline. Some netdev_features_t members also becomes part of this cacheline, which is on purpose, as function netif_skb_features() is on fast-path via validate_xmit_skb(). Signed-off-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/161168277983.410784.12401225493601624417.stgit@firesoul Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 53 ++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9e8572533d8e6..e9e7ada07ea14 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1858,7 +1858,6 @@ struct net_device { unsigned long mem_end; unsigned long mem_start; unsigned long base_addr; - int irq; /* * Some hardware also needs these fields (state,dev_list, @@ -1880,6 +1879,23 @@ struct net_device { struct list_head lower; } adj_list; + /* Read-mostly cache-line for fast-path access */ + unsigned int flags; + unsigned int priv_flags; + const struct net_device_ops *netdev_ops; + int ifindex; + unsigned short gflags; + unsigned short hard_header_len; + + /* Note : dev->mtu is often read without holding a lock. + * Writers usually hold RTNL. + * It is recommended to use READ_ONCE() to annotate the reads, + * and to use WRITE_ONCE() to annotate the writes. + */ + unsigned int mtu; + unsigned short needed_headroom; + unsigned short needed_tailroom; + netdev_features_t features; netdev_features_t hw_features; netdev_features_t wanted_features; @@ -1888,10 +1904,15 @@ struct net_device { netdev_features_t mpls_features; netdev_features_t gso_partial_features; - int ifindex; + unsigned int min_mtu; + unsigned int max_mtu; + unsigned short type; + unsigned char min_header_len; + unsigned char name_assign_type; + int group; - struct net_device_stats stats; + struct net_device_stats stats; /* not used by modern drivers */ atomic_long_t rx_dropped; atomic_long_t tx_dropped; @@ -1905,7 +1926,6 @@ struct net_device { const struct iw_handler_def *wireless_handlers; struct iw_public_data *wireless_data; #endif - const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; #ifdef CONFIG_NET_L3_MASTER_DEV const struct l3mdev_ops *l3mdev_ops; @@ -1924,34 +1944,12 @@ struct net_device { const struct header_ops *header_ops; - unsigned int flags; - unsigned int priv_flags; - - unsigned short gflags; - unsigned short padded; - unsigned char operstate; unsigned char link_mode; unsigned char if_port; unsigned char dma; - /* Note : dev->mtu is often read without holding a lock. - * Writers usually hold RTNL. - * It is recommended to use READ_ONCE() to annotate the reads, - * and to use WRITE_ONCE() to annotate the writes. - */ - unsigned int mtu; - unsigned int min_mtu; - unsigned int max_mtu; - unsigned short type; - unsigned short hard_header_len; - unsigned char min_header_len; - unsigned char name_assign_type; - - unsigned short needed_headroom; - unsigned short needed_tailroom; - /* Interface address info. */ unsigned char perm_addr[MAX_ADDR_LEN]; unsigned char addr_assign_type; @@ -1962,7 +1960,10 @@ struct net_device { unsigned short neigh_priv_len; unsigned short dev_id; unsigned short dev_port; + unsigned short padded; + spinlock_t addr_list_lock; + int irq; struct netdev_hw_addr_list uc; struct netdev_hw_addr_list mc; -- GitLab From aa56e3e5cdb4f7e1aee6dd04f0934a7429d30246 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 26 Jan 2021 12:56:58 -0600 Subject: [PATCH 1662/2012] net: ipa: rename "tag status" symbols There is a set of functions and symbols related to performing "tag_process" immediate commands to clear the IPA pipeline. The name is related to one of the commands issued when doing this, but it doesn't really convey the overall purpose of taking this action. The purpose is to take some steps to "clear out" the hardware pipeline, and to wait until that process completes, to ensure the IPA hardware is in a well-defined state. Rename these symbols to use "pipeline_clear" in their names instead. Add some comments to explain a bit more about what's going on. Signed-off-by: Alex Elder Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_cmd.c | 26 ++++++++++++++++++-------- drivers/net/ipa/ipa_cmd.h | 17 +++++++---------- drivers/net/ipa/ipa_endpoint.c | 6 +++--- 3 files changed, 28 insertions(+), 21 deletions(-) diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c index 002e514485100..27630244512d8 100644 --- a/drivers/net/ipa/ipa_cmd.c +++ b/drivers/net/ipa/ipa_cmd.c @@ -567,33 +567,43 @@ static void ipa_cmd_transfer_add(struct gsi_trans *trans, u16 size) direction, opcode); } -void ipa_cmd_tag_process_add(struct gsi_trans *trans) +/* Add immediate commands to a transaction to clear the hardware pipeline */ +void ipa_cmd_pipeline_clear_add(struct gsi_trans *trans) { struct ipa *ipa = container_of(trans->gsi, struct ipa, gsi); struct ipa_endpoint *endpoint; - endpoint = ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]; - + /* Issue a no-op register write command (mask 0 means no write) */ ipa_cmd_register_write_add(trans, 0, 0, 0, true); + + /* Send a data packet through the IPA pipeline. The packet_init + * command says to send the next packet directly to the exception + * endpoint without any other IPA processing. The tag_status + * command requests that status be generated on completion of + * that transfer, and that it will contain the given tag value. + * Finally, the transfer command sends a small packet of data + * (instead of a command) using the command endpoint. + */ + endpoint = ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]; ipa_cmd_ip_packet_init_add(trans, endpoint->endpoint_id); ipa_cmd_ip_tag_status_add(trans, 0xcba987654321); ipa_cmd_transfer_add(trans, 4); } -/* Returns the number of commands required for the tag process */ -u32 ipa_cmd_tag_process_count(void) +/* Returns the number of commands required to clear the pipeline */ +u32 ipa_cmd_pipeline_clear_count(void) { return 4; } -void ipa_cmd_tag_process(struct ipa *ipa) +void ipa_cmd_pipeline_clear(struct ipa *ipa) { - u32 count = ipa_cmd_tag_process_count(); + u32 count = ipa_cmd_pipeline_clear_count(); struct gsi_trans *trans; trans = ipa_cmd_trans_alloc(ipa, count); if (trans) { - ipa_cmd_tag_process_add(trans); + ipa_cmd_pipeline_clear_add(trans); gsi_trans_commit_wait(trans); } else { dev_err(&ipa->pdev->dev, diff --git a/drivers/net/ipa/ipa_cmd.h b/drivers/net/ipa/ipa_cmd.h index 4ed09c486abc1..a41a58cc2c5ac 100644 --- a/drivers/net/ipa/ipa_cmd.h +++ b/drivers/net/ipa/ipa_cmd.h @@ -157,26 +157,23 @@ void ipa_cmd_dma_shared_mem_add(struct gsi_trans *trans, u32 offset, u16 size, dma_addr_t addr, bool toward_ipa); /** - * ipa_cmd_tag_process_add() - Add IPA tag process commands to a transaction + * ipa_cmd_pipeline_clear_add() - Add pipeline clear commands to a transaction * @trans: GSI transaction */ -void ipa_cmd_tag_process_add(struct gsi_trans *trans); +void ipa_cmd_pipeline_clear_add(struct gsi_trans *trans); /** - * ipa_cmd_tag_process_add_count() - Number of commands in a tag process + * ipa_cmd_pipeline_clear_count() - # commands required to clear pipeline * * Return: The number of elements to allocate in a transaction - * to hold tag process commands + * to hold commands to clear the pipeline */ -u32 ipa_cmd_tag_process_count(void); +u32 ipa_cmd_pipeline_clear_count(void); /** - * ipa_cmd_tag_process() - Perform a tag process - * - * @Return: The number of elements to allocate in a transaction - * to hold tag process commands + * ipa_cmd_pipeline_clear() - Clear the hardware pipeline */ -void ipa_cmd_tag_process(struct ipa *ipa); +void ipa_cmd_pipeline_clear(struct ipa *ipa); /** * ipa_cmd_trans_alloc() - Allocate a transaction for the command TX endpoint diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 688a3dd40510a..39ae0dd4e0471 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -399,7 +399,7 @@ int ipa_endpoint_modem_exception_reset_all(struct ipa *ipa) * That won't happen, and we could be more precise, but this is fine * for now. We need to end the transaction with a "tag process." */ - count = hweight32(initialized) + ipa_cmd_tag_process_count(); + count = hweight32(initialized) + ipa_cmd_pipeline_clear_count(); trans = ipa_cmd_trans_alloc(ipa, count); if (!trans) { dev_err(&ipa->pdev->dev, @@ -428,7 +428,7 @@ int ipa_endpoint_modem_exception_reset_all(struct ipa *ipa) ipa_cmd_register_write_add(trans, offset, 0, ~0, false); } - ipa_cmd_tag_process_add(trans); + ipa_cmd_pipeline_clear_add(trans); /* XXX This should have a 1 second timeout */ gsi_trans_commit_wait(trans); @@ -1564,7 +1564,7 @@ void ipa_endpoint_suspend(struct ipa *ipa) if (ipa->modem_netdev) ipa_modem_suspend(ipa->modem_netdev); - ipa_cmd_tag_process(ipa); + ipa_cmd_pipeline_clear(ipa); ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]); ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]); -- GitLab From 162fbc6f4519f9b083fbd96b60d908c78a9c87f6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 26 Jan 2021 12:56:59 -0600 Subject: [PATCH 1663/2012] net: ipa: minor update to handling of packet with status Rearrange some comments and assignments made when handling a packet that is received with status, aiming to improve understandability. Use DIV_ROUND_CLOSEST() to get a better per-packet true size estimate. Signed-off-by: Alex Elder Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_endpoint.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 39ae0dd4e0471..c5524215054c8 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -1213,12 +1213,11 @@ static void ipa_endpoint_status_parse(struct ipa_endpoint *endpoint, continue; } - /* Compute the amount of buffer space consumed by the - * packet, including the status element. If the hardware - * is configured to pad packet data to an aligned boundary, - * account for that. And if checksum offload is is enabled - * a trailer containing computed checksum information will - * be appended. + /* Compute the amount of buffer space consumed by the packet, + * including the status element. If the hardware is configured + * to pad packet data to an aligned boundary, account for that. + * And if checksum offload is enabled a trailer containing + * computed checksum information will be appended. */ align = endpoint->data->rx.pad_align ? : 1; len = le16_to_cpu(status->pkt_len); @@ -1226,16 +1225,21 @@ static void ipa_endpoint_status_parse(struct ipa_endpoint *endpoint, if (endpoint->data->checksum) len += sizeof(struct rmnet_map_dl_csum_trailer); - /* Charge the new packet with a proportional fraction of - * the unused space in the original receive buffer. - * XXX Charge a proportion of the *whole* receive buffer? - */ if (!ipa_status_drop_packet(status)) { - u32 extra = unused * len / total_len; - void *data2 = data + sizeof(*status); - u32 len2 = le16_to_cpu(status->pkt_len); + void *data2; + u32 extra; + u32 len2; /* Client receives only packet data (no status) */ + data2 = data + sizeof(*status); + len2 = le16_to_cpu(status->pkt_len); + + /* Have the true size reflect the extra unused space in + * the original receive buffer. Distribute the "cost" + * proportionately across all aggregated packets in the + * buffer. + */ + extra = DIV_ROUND_CLOSEST(unused * len, total_len); ipa_endpoint_skb_copy(endpoint, data2, len2, extra); } -- GitLab From f6aba7b5199ad2a73a3deb6c042dee770650719c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 26 Jan 2021 12:57:00 -0600 Subject: [PATCH 1664/2012] net: ipa: drop packet if status has valid tag Introduce ipa_endpoint_status_tag(), which returns true if received status indicates its tag field is valid. The endpoint parameter is not yet used. Call this from ipa_status_drop_packet(), and drop the packet if the status indicates the tag was valid. Pass the endpoint pointer to ipa_status_drop_packet(), and rename it ipa_endpoint_status_drop(). The endpoint will be used in the next patch. Signed-off-by: Alex Elder Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_endpoint.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index c5524215054c8..68970a3baa47a 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -69,8 +69,11 @@ struct ipa_status { }; /* Field masks for struct ipa_status structure fields */ +#define IPA_STATUS_MASK_TAG_VALID_FMASK GENMASK(4, 4) +#define IPA_STATUS_SRC_IDX_FMASK GENMASK(4, 0) #define IPA_STATUS_DST_IDX_FMASK GENMASK(4, 0) #define IPA_STATUS_FLAGS1_RT_RULE_ID_FMASK GENMASK(31, 22) +#define IPA_STATUS_FLAGS2_TAG_FMASK GENMASK_ULL(63, 16) #ifdef IPA_VALIDATE @@ -1172,11 +1175,22 @@ static bool ipa_endpoint_status_skip(struct ipa_endpoint *endpoint, return false; /* Don't skip this packet, process it */ } +static bool ipa_endpoint_status_tag(struct ipa_endpoint *endpoint, + const struct ipa_status *status) +{ + return !!le16_get_bits(status->mask, IPA_STATUS_MASK_TAG_VALID_FMASK); +} + /* Return whether the status indicates the packet should be dropped */ -static bool ipa_status_drop_packet(const struct ipa_status *status) +static bool ipa_endpoint_status_drop(struct ipa_endpoint *endpoint, + const struct ipa_status *status) { u32 val; + /* If the status indicates a tagged transfer, we'll drop the packet */ + if (ipa_endpoint_status_tag(endpoint, status)) + return true; + /* Deaggregation exceptions we drop; all other types we consume */ if (status->exception) return status->exception == IPA_STATUS_EXCEPTION_DEAGGR; @@ -1225,7 +1239,7 @@ static void ipa_endpoint_status_parse(struct ipa_endpoint *endpoint, if (endpoint->data->checksum) len += sizeof(struct rmnet_map_dl_csum_trailer); - if (!ipa_status_drop_packet(status)) { + if (!ipa_endpoint_status_drop(endpoint, status)) { void *data2; u32 extra; u32 len2; -- GitLab From 51c48ce264f85919b263a38e4defa50b80928877 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 26 Jan 2021 12:57:01 -0600 Subject: [PATCH 1665/2012] net: ipa: signal when tag transfer completes There are times, such as when the modem crashes, when we issue commands to clear the IPA hardware pipeline. These commands include a data transfer command that delivers a small packet directly to the default (AP<-LAN RX) endpoint. The places that do this wait for the transactions that contain these commands to complete, but the pipeline can't be assumed clear until the sent packet has been *received*. The small transfer will be delivered with a status structure, and that status will indicate its tag is valid. This is the only place we send a tagged packet, so we use the tag to determine when the pipeline clear packet has arrived. Add a completion to the IPA structure to to be used to signal the receipt of a pipeline clear packet. Create a new function ipa_cmd_pipeline_clear_wait() that will wait for that completion. Reinitialize the completion whenever pipeline clear commands are added to a transaction. Extend ipa_endpoint_status_tag() to check whether a packet whose status contains a valid tag was sent from the AP->command TX endpoint, and if so, signal the new IPA completion. Have all callers of ipa_cmd_pipeline_clear_add() wait for the pipeline clear indication after the transaction that clears the pipeline has completed. Signed-off-by: Alex Elder Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa.h | 2 ++ drivers/net/ipa/ipa_cmd.c | 9 +++++++++ drivers/net/ipa/ipa_cmd.h | 7 +++++++ drivers/net/ipa/ipa_endpoint.c | 27 ++++++++++++++++++++++++++- drivers/net/ipa/ipa_main.c | 1 + 5 files changed, 45 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h index c6c6a7f6909c1..8020776313716 100644 --- a/drivers/net/ipa/ipa.h +++ b/drivers/net/ipa/ipa.h @@ -43,6 +43,7 @@ enum ipa_flag { * @flags: Boolean state flags * @version: IPA hardware version * @pdev: Platform device + * @completion: Used to signal pipeline clear transfer complete * @smp2p: SMP2P information * @clock: IPA clocking information * @table_addr: DMA address of filter/route table content @@ -82,6 +83,7 @@ struct ipa { DECLARE_BITMAP(flags, IPA_FLAG_COUNT); enum ipa_version version; struct platform_device *pdev; + struct completion completion; struct notifier_block nb; void *notifier; struct ipa_smp2p *smp2p; diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c index 27630244512d8..7df0072bddcce 100644 --- a/drivers/net/ipa/ipa_cmd.c +++ b/drivers/net/ipa/ipa_cmd.c @@ -573,6 +573,9 @@ void ipa_cmd_pipeline_clear_add(struct gsi_trans *trans) struct ipa *ipa = container_of(trans->gsi, struct ipa, gsi); struct ipa_endpoint *endpoint; + /* This will complete when the transfer is received */ + reinit_completion(&ipa->completion); + /* Issue a no-op register write command (mask 0 means no write) */ ipa_cmd_register_write_add(trans, 0, 0, 0, true); @@ -596,6 +599,11 @@ u32 ipa_cmd_pipeline_clear_count(void) return 4; } +void ipa_cmd_pipeline_clear_wait(struct ipa *ipa) +{ + wait_for_completion(&ipa->completion); +} + void ipa_cmd_pipeline_clear(struct ipa *ipa) { u32 count = ipa_cmd_pipeline_clear_count(); @@ -605,6 +613,7 @@ void ipa_cmd_pipeline_clear(struct ipa *ipa) if (trans) { ipa_cmd_pipeline_clear_add(trans); gsi_trans_commit_wait(trans); + ipa_cmd_pipeline_clear_wait(ipa); } else { dev_err(&ipa->pdev->dev, "error allocating %u entry tag transaction\n", count); diff --git a/drivers/net/ipa/ipa_cmd.h b/drivers/net/ipa/ipa_cmd.h index a41a58cc2c5ac..6dd3d35cf315d 100644 --- a/drivers/net/ipa/ipa_cmd.h +++ b/drivers/net/ipa/ipa_cmd.h @@ -170,8 +170,15 @@ void ipa_cmd_pipeline_clear_add(struct gsi_trans *trans); */ u32 ipa_cmd_pipeline_clear_count(void); +/** + * ipa_cmd_pipeline_clear_wait() - Wait pipeline clear to complete + * @ipa: - IPA pointer + */ +void ipa_cmd_pipeline_clear_wait(struct ipa *ipa); + /** * ipa_cmd_pipeline_clear() - Clear the hardware pipeline + * @ipa: - IPA pointer */ void ipa_cmd_pipeline_clear(struct ipa *ipa); diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 68970a3baa47a..8313220d41e70 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -436,6 +436,8 @@ int ipa_endpoint_modem_exception_reset_all(struct ipa *ipa) /* XXX This should have a 1 second timeout */ gsi_trans_commit_wait(trans); + ipa_cmd_pipeline_clear_wait(ipa); + return 0; } @@ -1178,7 +1180,30 @@ static bool ipa_endpoint_status_skip(struct ipa_endpoint *endpoint, static bool ipa_endpoint_status_tag(struct ipa_endpoint *endpoint, const struct ipa_status *status) { - return !!le16_get_bits(status->mask, IPA_STATUS_MASK_TAG_VALID_FMASK); + struct ipa_endpoint *command_endpoint; + struct ipa *ipa = endpoint->ipa; + u32 endpoint_id; + + if (!le16_get_bits(status->mask, IPA_STATUS_MASK_TAG_VALID_FMASK)) + return false; /* No valid tag */ + + /* The status contains a valid tag. We know the packet was sent to + * this endpoint (already verified by ipa_endpoint_status_skip()). + * If the packet came from the AP->command TX endpoint we know + * this packet was sent as part of the pipeline clear process. + */ + endpoint_id = u8_get_bits(status->endp_src_idx, + IPA_STATUS_SRC_IDX_FMASK); + command_endpoint = ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]; + if (endpoint_id == command_endpoint->endpoint_id) { + complete(&ipa->completion); + } else { + dev_err(&ipa->pdev->dev, + "unexpected tagged packet from endpoint %u\n", + endpoint_id); + } + + return true; } /* Return whether the status indicates the packet should be dropped */ diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index ab0fd5cb49277..c10e7340b0318 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -831,6 +831,7 @@ static int ipa_probe(struct platform_device *pdev) dev_set_drvdata(dev, ipa); ipa->clock = clock; ipa->version = data->version; + init_completion(&ipa->completion); ret = ipa_reg_init(ipa); if (ret) -- GitLab From 792b75b14786bae709124615a58a474ded15aa7c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 26 Jan 2021 12:57:02 -0600 Subject: [PATCH 1666/2012] net: ipa: don't pass tag value to ipa_cmd_ip_tag_status_add() We only send a tagged packet from the AP->command TX endpoint when we're clearing the hardware pipeline. And when we receive the tagged packet we don't care what the actual tag value is. Stop passing a tag value to ipa_cmd_ip_tag_status_add(), and just encode 0 as the tag sent. Fix the function that encodes the tag so it uses the proper byte ordering. Signed-off-by: Alex Elder Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_cmd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c index 7df0072bddcce..eb50e7437359a 100644 --- a/drivers/net/ipa/ipa_cmd.c +++ b/drivers/net/ipa/ipa_cmd.c @@ -529,7 +529,7 @@ void ipa_cmd_dma_shared_mem_add(struct gsi_trans *trans, u32 offset, u16 size, direction, opcode); } -static void ipa_cmd_ip_tag_status_add(struct gsi_trans *trans, u64 tag) +static void ipa_cmd_ip_tag_status_add(struct gsi_trans *trans) { struct ipa *ipa = container_of(trans->gsi, struct ipa, gsi); enum ipa_cmd_opcode opcode = IPA_CMD_IP_PACKET_TAG_STATUS; @@ -543,7 +543,7 @@ static void ipa_cmd_ip_tag_status_add(struct gsi_trans *trans, u64 tag) cmd_payload = ipa_cmd_payload_alloc(ipa, &payload_addr); payload = &cmd_payload->ip_packet_tag_status; - payload->tag = u64_encode_bits(tag, IP_PACKET_TAG_STATUS_TAG_FMASK); + payload->tag = le64_encode_bits(0, IP_PACKET_TAG_STATUS_TAG_FMASK); gsi_trans_cmd_add(trans, payload, sizeof(*payload), payload_addr, direction, opcode); @@ -583,13 +583,13 @@ void ipa_cmd_pipeline_clear_add(struct gsi_trans *trans) * command says to send the next packet directly to the exception * endpoint without any other IPA processing. The tag_status * command requests that status be generated on completion of - * that transfer, and that it will contain the given tag value. + * that transfer, and that it will be tagged with a value. * Finally, the transfer command sends a small packet of data * (instead of a command) using the command endpoint. */ endpoint = ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]; ipa_cmd_ip_packet_init_add(trans, endpoint->endpoint_id); - ipa_cmd_ip_tag_status_add(trans, 0xcba987654321); + ipa_cmd_ip_tag_status_add(trans); ipa_cmd_transfer_add(trans, 4); } -- GitLab From 070740d389aa6baff32f2fa3593034675bf56d16 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 26 Jan 2021 12:57:03 -0600 Subject: [PATCH 1667/2012] net: ipa: don't pass size to ipa_cmd_transfer_add() The only time we transfer data (rather than issuing a command) out of the AP->command TX endpoint is when we're clearing the hardware pipeline. All that's needed is a "small" data buffer, and its contents aren't even important. For convenience, we just transfer a command structure in this case (it's already mapped for DMA). The TRE is added to a transaction using ipa_cmd_ip_tag_status_add(), but we ignore the size value provided to that function. So just get rid of the size argument. Signed-off-by: Alex Elder Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_cmd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c index eb50e7437359a..97b50fee60089 100644 --- a/drivers/net/ipa/ipa_cmd.c +++ b/drivers/net/ipa/ipa_cmd.c @@ -550,7 +550,7 @@ static void ipa_cmd_ip_tag_status_add(struct gsi_trans *trans) } /* Issue a small command TX data transfer */ -static void ipa_cmd_transfer_add(struct gsi_trans *trans, u16 size) +static void ipa_cmd_transfer_add(struct gsi_trans *trans) { struct ipa *ipa = container_of(trans->gsi, struct ipa, gsi); enum dma_data_direction direction = DMA_TO_DEVICE; @@ -558,8 +558,6 @@ static void ipa_cmd_transfer_add(struct gsi_trans *trans, u16 size) union ipa_cmd_payload *payload; dma_addr_t payload_addr; - /* assert(size <= sizeof(*payload)); */ - /* Just transfer a zero-filled payload structure */ payload = ipa_cmd_payload_alloc(ipa, &payload_addr); @@ -590,7 +588,7 @@ void ipa_cmd_pipeline_clear_add(struct gsi_trans *trans) endpoint = ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]; ipa_cmd_ip_packet_init_add(trans, endpoint->endpoint_id); ipa_cmd_ip_tag_status_add(trans); - ipa_cmd_transfer_add(trans, 4); + ipa_cmd_transfer_add(trans); } /* Returns the number of commands required to clear the pipeline */ -- GitLab From ea12f1b3c8289102620d3030de3547eedce6d9e8 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 28 Jan 2021 12:25:47 +0100 Subject: [PATCH 1668/2012] s390/qeth: clean up load/remove code for disciplines We have two usage patterns: 1. get & ->setup() a new discipline, or 2. ->remove() & put the currently loaded one. Add corresponding helpers that hide the internals & error handling. Signed-off-by: Julian Wiedmann Reviewed-by: Alexandra Winter Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_core.h | 4 +-- drivers/s390/net/qeth_core_main.c | 45 ++++++++++++++++--------------- drivers/s390/net/qeth_core_sys.c | 10 ++----- 3 files changed, 28 insertions(+), 31 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 28f637042d444..331dcbbf3e251 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -1067,8 +1067,8 @@ extern const struct device_type qeth_generic_devtype; const char *qeth_get_cardname_short(struct qeth_card *); int qeth_resize_buffer_pool(struct qeth_card *card, unsigned int count); -int qeth_core_load_discipline(struct qeth_card *, enum qeth_discipline_id); -void qeth_core_free_discipline(struct qeth_card *); +int qeth_setup_discipline(struct qeth_card *card, enum qeth_discipline_id disc); +void qeth_remove_discipline(struct qeth_card *card); /* exports for qeth discipline device drivers */ extern struct kmem_cache *qeth_core_header_cache; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index cf18d87da41e2..0a65213ab6065 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6349,9 +6349,11 @@ static int qeth_register_dbf_views(void) static DEFINE_MUTEX(qeth_mod_mutex); /* for synchronized module loading */ -int qeth_core_load_discipline(struct qeth_card *card, - enum qeth_discipline_id discipline) +int qeth_setup_discipline(struct qeth_card *card, + enum qeth_discipline_id discipline) { + int rc; + mutex_lock(&qeth_mod_mutex); switch (discipline) { case QETH_DISCIPLINE_LAYER3: @@ -6373,12 +6375,25 @@ int qeth_core_load_discipline(struct qeth_card *card, return -EINVAL; } + rc = card->discipline->setup(card->gdev); + if (rc) { + if (discipline == QETH_DISCIPLINE_LAYER2) + symbol_put(qeth_l2_discipline); + else + symbol_put(qeth_l3_discipline); + card->discipline = NULL; + + return rc; + } + card->options.layer = discipline; return 0; } -void qeth_core_free_discipline(struct qeth_card *card) +void qeth_remove_discipline(struct qeth_card *card) { + card->discipline->remove(card->gdev); + if (IS_LAYER2(card)) symbol_put(qeth_l2_discipline); else @@ -6586,23 +6601,18 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev) default: card->info.layer_enforced = true; /* It's so early that we don't need the discipline_mutex yet. */ - rc = qeth_core_load_discipline(card, enforced_disc); + rc = qeth_setup_discipline(card, enforced_disc); if (rc) - goto err_load; + goto err_setup_disc; gdev->dev.type = IS_OSN(card) ? &qeth_osn_devtype : card->discipline->devtype; - rc = card->discipline->setup(card->gdev); - if (rc) - goto err_disc; break; } return 0; -err_disc: - qeth_core_free_discipline(card); -err_load: +err_setup_disc: err_chp_desc: free_netdev(card->dev); err_card: @@ -6619,10 +6629,8 @@ static void qeth_core_remove_device(struct ccwgroup_device *gdev) QETH_CARD_TEXT(card, 2, "removedv"); mutex_lock(&card->discipline_mutex); - if (card->discipline) { - card->discipline->remove(gdev); - qeth_core_free_discipline(card); - } + if (card->discipline) + qeth_remove_discipline(card); mutex_unlock(&card->discipline_mutex); qeth_free_qdio_queues(card); @@ -6642,14 +6650,9 @@ static int qeth_core_set_online(struct ccwgroup_device *gdev) if (!card->discipline) { def_discipline = IS_IQD(card) ? QETH_DISCIPLINE_LAYER3 : QETH_DISCIPLINE_LAYER2; - rc = qeth_core_load_discipline(card, def_discipline); + rc = qeth_setup_discipline(card, def_discipline); if (rc) goto err; - rc = card->discipline->setup(card->gdev); - if (rc) { - qeth_core_free_discipline(card); - goto err; - } } rc = qeth_set_online(card, card->discipline); diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c index a0f777f76f66e..5815114da468c 100644 --- a/drivers/s390/net/qeth_core_sys.c +++ b/drivers/s390/net/qeth_core_sys.c @@ -384,19 +384,13 @@ static ssize_t qeth_dev_layer2_store(struct device *dev, goto out; } - card->discipline->remove(card->gdev); - qeth_core_free_discipline(card); + qeth_remove_discipline(card); free_netdev(card->dev); card->dev = ndev; } - rc = qeth_core_load_discipline(card, newdis); - if (rc) - goto out; + rc = qeth_setup_discipline(card, newdis); - rc = card->discipline->setup(card->gdev); - if (rc) - qeth_core_free_discipline(card); out: mutex_unlock(&card->discipline_mutex); return rc ? rc : count; -- GitLab From 17f3a8b5f5c9097c658d662df9beaff0932b0242 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 28 Jan 2021 12:25:48 +0100 Subject: [PATCH 1669/2012] s390/qeth: remove qeth_get_ip_version() Replace our home-grown helper with the more robust vlan_get_protocol(). This is pretty much a 1:1 replacement, we just need to pass around a proper ETH_P_* everyhwere and convert the old value range. For readability also convert the protocol checks in qeth_l3_hard_start_xmit() to a switch statement. Signed-off-by: Julian Wiedmann Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_core.h | 40 ++++++++------------- drivers/s390/net/qeth_core_main.c | 18 +++++----- drivers/s390/net/qeth_l2_main.c | 6 ++-- drivers/s390/net/qeth_l3_main.c | 60 ++++++++++++++++++------------- 4 files changed, 62 insertions(+), 62 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 331dcbbf3e251..a1da83b0b0ef3 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -956,24 +956,6 @@ static inline int qeth_get_elements_for_range(addr_t start, addr_t end) return PFN_UP(end) - PFN_DOWN(start); } -static inline int qeth_get_ip_version(struct sk_buff *skb) -{ - struct vlan_ethhdr *veth = vlan_eth_hdr(skb); - __be16 prot = veth->h_vlan_proto; - - if (prot == htons(ETH_P_8021Q)) - prot = veth->h_vlan_encapsulated_proto; - - switch (prot) { - case htons(ETH_P_IPV6): - return 6; - case htons(ETH_P_IP): - return 4; - default: - return 0; - } -} - static inline int qeth_get_ether_cast_type(struct sk_buff *skb) { u8 *addr = eth_hdr(skb)->h_dest; @@ -984,14 +966,20 @@ static inline int qeth_get_ether_cast_type(struct sk_buff *skb) return RTN_UNICAST; } -static inline struct dst_entry *qeth_dst_check_rcu(struct sk_buff *skb, int ipv) +static inline struct dst_entry *qeth_dst_check_rcu(struct sk_buff *skb, + __be16 proto) { struct dst_entry *dst = skb_dst(skb); struct rt6_info *rt; rt = (struct rt6_info *) dst; - if (dst) - dst = dst_check(dst, (ipv == 6) ? rt6_get_cookie(rt) : 0); + if (dst) { + if (proto == htons(ETH_P_IPV6)) + dst = dst_check(dst, rt6_get_cookie(rt)); + else + dst = dst_check(dst, 0); + } + return dst; } @@ -1014,11 +1002,11 @@ static inline struct in6_addr *qeth_next_hop_v6_rcu(struct sk_buff *skb, return &ipv6_hdr(skb)->daddr; } -static inline void qeth_tx_csum(struct sk_buff *skb, u8 *flags, int ipv) +static inline void qeth_tx_csum(struct sk_buff *skb, u8 *flags, __be16 proto) { *flags |= QETH_HDR_EXT_CSUM_TRANSP_REQ; - if ((ipv == 4 && ip_hdr(skb)->protocol == IPPROTO_UDP) || - (ipv == 6 && ipv6_hdr(skb)->nexthdr == IPPROTO_UDP)) + if ((proto == htons(ETH_P_IP) && ip_hdr(skb)->protocol == IPPROTO_UDP) || + (proto == htons(ETH_P_IPV6) && ipv6_hdr(skb)->nexthdr == IPPROTO_UDP)) *flags |= QETH_HDR_EXT_UDP; } @@ -1145,10 +1133,10 @@ int qeth_stop(struct net_device *dev); int qeth_vm_request_mac(struct qeth_card *card); int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, - struct qeth_qdio_out_q *queue, int ipv, + struct qeth_qdio_out_q *queue, __be16 proto, void (*fill_header)(struct qeth_qdio_out_q *queue, struct qeth_hdr *hdr, struct sk_buff *skb, - int ipv, unsigned int data_len)); + __be16 proto, unsigned int data_len)); /* exports for OSN */ int qeth_osn_assist(struct net_device *, void *, int); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 0a65213ab6065..de9d27e1c5290 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -825,7 +825,8 @@ static bool qeth_next_hop_is_local_v4(struct qeth_card *card, return false; rcu_read_lock(); - next_hop = qeth_next_hop_v4_rcu(skb, qeth_dst_check_rcu(skb, 4)); + next_hop = qeth_next_hop_v4_rcu(skb, + qeth_dst_check_rcu(skb, htons(ETH_P_IP))); key = ipv4_addr_hash(next_hop); hash_for_each_possible_rcu(card->local_addrs4, tmp, hnode, key) { @@ -851,7 +852,8 @@ static bool qeth_next_hop_is_local_v6(struct qeth_card *card, return false; rcu_read_lock(); - next_hop = qeth_next_hop_v6_rcu(skb, qeth_dst_check_rcu(skb, 6)); + next_hop = qeth_next_hop_v6_rcu(skb, + qeth_dst_check_rcu(skb, htons(ETH_P_IPV6))); key = ipv6_addr_hash(next_hop); hash_for_each_possible_rcu(card->local_addrs6, tmp, hnode, key) { @@ -3896,11 +3898,11 @@ int qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb) switch (card->qdio.do_prio_queueing) { case QETH_PRIO_Q_ING_TOS: case QETH_PRIO_Q_ING_PREC: - switch (qeth_get_ip_version(skb)) { - case 4: + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): tos = ipv4_get_dsfield(ip_hdr(skb)); break; - case 6: + case htons(ETH_P_IPV6): tos = ipv6_get_dsfield(ipv6_hdr(skb)); break; default: @@ -4365,10 +4367,10 @@ static void qeth_fill_tso_ext(struct qeth_hdr_tso *hdr, } int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, - struct qeth_qdio_out_q *queue, int ipv, + struct qeth_qdio_out_q *queue, __be16 proto, void (*fill_header)(struct qeth_qdio_out_q *queue, struct qeth_hdr *hdr, struct sk_buff *skb, - int ipv, unsigned int data_len)) + __be16 proto, unsigned int data_len)) { unsigned int proto_len, hw_hdr_len; unsigned int frame_len = skb->len; @@ -4401,7 +4403,7 @@ int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, data_offset = push_len + proto_len; } memset(hdr, 0, hw_hdr_len); - fill_header(queue, hdr, skb, ipv, frame_len); + fill_header(queue, hdr, skb, proto, frame_len); if (is_tso) qeth_fill_tso_ext((struct qeth_hdr_tso *) hdr, frame_len - proto_len, skb, proto_len); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 4254caf1d9b69..ca44421a6d6eb 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -157,7 +157,7 @@ static void qeth_l2_drain_rx_mode_cache(struct qeth_card *card) static void qeth_l2_fill_header(struct qeth_qdio_out_q *queue, struct qeth_hdr *hdr, struct sk_buff *skb, - int ipv, unsigned int data_len) + __be16 proto, unsigned int data_len) { int cast_type = qeth_get_ether_cast_type(skb); struct vlan_ethhdr *veth = vlan_eth_hdr(skb); @@ -169,7 +169,7 @@ static void qeth_l2_fill_header(struct qeth_qdio_out_q *queue, } else { hdr->hdr.l2.id = QETH_HEADER_TYPE_LAYER2; if (skb->ip_summed == CHECKSUM_PARTIAL) - qeth_tx_csum(skb, &hdr->hdr.l2.flags[1], ipv); + qeth_tx_csum(skb, &hdr->hdr.l2.flags[1], proto); } /* set byte byte 3 to casting flags */ @@ -551,7 +551,7 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, if (IS_OSN(card)) rc = qeth_l2_xmit_osn(card, skb, queue); else - rc = qeth_xmit(card, skb, queue, qeth_get_ip_version(skb), + rc = qeth_xmit(card, skb, queue, vlan_get_protocol(skb), qeth_l2_fill_header); if (!rc) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 4c2cae7ae9a7f..7a7465376e292 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1576,7 +1576,7 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) } static int qeth_l3_get_cast_type_rcu(struct sk_buff *skb, struct dst_entry *dst, - int ipv) + __be16 proto) { struct neighbour *n = NULL; @@ -1595,13 +1595,13 @@ static int qeth_l3_get_cast_type_rcu(struct sk_buff *skb, struct dst_entry *dst, } /* no neighbour (eg AF_PACKET), fall back to target's IP address ... */ - switch (ipv) { - case 4: + switch (proto) { + case htons(ETH_P_IP): if (ipv4_is_lbcast(ip_hdr(skb)->daddr)) return RTN_BROADCAST; return ipv4_is_multicast(ip_hdr(skb)->daddr) ? RTN_MULTICAST : RTN_UNICAST; - case 6: + case htons(ETH_P_IPV6): return ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ? RTN_MULTICAST : RTN_UNICAST; default: @@ -1612,13 +1612,13 @@ static int qeth_l3_get_cast_type_rcu(struct sk_buff *skb, struct dst_entry *dst, static int qeth_l3_get_cast_type(struct sk_buff *skb) { - int ipv = qeth_get_ip_version(skb); + __be16 proto = vlan_get_protocol(skb); struct dst_entry *dst; int cast_type; rcu_read_lock(); - dst = qeth_dst_check_rcu(skb, ipv); - cast_type = qeth_l3_get_cast_type_rcu(skb, dst, ipv); + dst = qeth_dst_check_rcu(skb, proto); + cast_type = qeth_l3_get_cast_type_rcu(skb, dst, proto); rcu_read_unlock(); return cast_type; @@ -1637,7 +1637,7 @@ static u8 qeth_l3_cast_type_to_flag(int cast_type) static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue, struct qeth_hdr *hdr, struct sk_buff *skb, - int ipv, unsigned int data_len) + __be16 proto, unsigned int data_len) { struct qeth_hdr_layer3 *l3_hdr = &hdr->hdr.l3; struct vlan_ethhdr *veth = vlan_eth_hdr(skb); @@ -1652,7 +1652,7 @@ static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue, } else { hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3; - if (skb->protocol == htons(ETH_P_AF_IUCV)) { + if (proto == htons(ETH_P_AF_IUCV)) { l3_hdr->flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST; l3_hdr->next_hop.addr.s6_addr16[0] = htons(0xfe80); memcpy(&l3_hdr->next_hop.addr.s6_addr32[2], @@ -1661,14 +1661,14 @@ static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue, } if (skb->ip_summed == CHECKSUM_PARTIAL) { - qeth_tx_csum(skb, &hdr->hdr.l3.ext_flags, ipv); + qeth_tx_csum(skb, &hdr->hdr.l3.ext_flags, proto); /* some HW requires combined L3+L4 csum offload: */ - if (ipv == 4) + if (proto == htons(ETH_P_IP)) hdr->hdr.l3.ext_flags |= QETH_HDR_EXT_CSUM_HDR_REQ; } } - if (ipv == 4 || IS_IQD(card)) { + if (proto == htons(ETH_P_IP) || IS_IQD(card)) { /* NETIF_F_HW_VLAN_CTAG_TX */ if (skb_vlan_tag_present(skb)) { hdr->hdr.l3.ext_flags |= QETH_HDR_EXT_VLAN_FRAME; @@ -1680,18 +1680,18 @@ static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue, } rcu_read_lock(); - dst = qeth_dst_check_rcu(skb, ipv); + dst = qeth_dst_check_rcu(skb, proto); if (IS_IQD(card) && skb_get_queue_mapping(skb) != QETH_IQD_MCAST_TXQ) cast_type = RTN_UNICAST; else - cast_type = qeth_l3_get_cast_type_rcu(skb, dst, ipv); + cast_type = qeth_l3_get_cast_type_rcu(skb, dst, proto); l3_hdr->flags |= qeth_l3_cast_type_to_flag(cast_type); - if (ipv == 4) { + if (proto == htons(ETH_P_IP)) { l3_hdr->next_hop.addr.s6_addr32[3] = qeth_next_hop_v4_rcu(skb, dst); - } else if (ipv == 6) { + } else if (proto == htons(ETH_P_IPV6)) { l3_hdr->next_hop.addr = *qeth_next_hop_v6_rcu(skb, dst); hdr->hdr.l3.flags |= QETH_HDR_IPV6; @@ -1719,7 +1719,7 @@ static void qeth_l3_fixup_headers(struct sk_buff *skb) } static int qeth_l3_xmit(struct qeth_card *card, struct sk_buff *skb, - struct qeth_qdio_out_q *queue, int ipv) + struct qeth_qdio_out_q *queue, __be16 proto) { unsigned int hw_hdr_len; int rc; @@ -1733,15 +1733,15 @@ static int qeth_l3_xmit(struct qeth_card *card, struct sk_buff *skb, skb_pull(skb, ETH_HLEN); qeth_l3_fixup_headers(skb); - return qeth_xmit(card, skb, queue, ipv, qeth_l3_fill_header); + return qeth_xmit(card, skb, queue, proto, qeth_l3_fill_header); } static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct qeth_card *card = dev->ml_priv; + __be16 proto = vlan_get_protocol(skb); u16 txq = skb_get_queue_mapping(skb); - int ipv = qeth_get_ip_version(skb); struct qeth_qdio_out_q *queue; int rc; @@ -1752,10 +1752,20 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, if (card->options.sniffer) goto tx_drop; - if ((card->options.cq != QETH_CQ_ENABLED && !ipv) || - (card->options.cq == QETH_CQ_ENABLED && - skb->protocol != htons(ETH_P_AF_IUCV))) + + switch (proto) { + case htons(ETH_P_AF_IUCV): + if (card->options.cq != QETH_CQ_ENABLED) + goto tx_drop; + break; + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + if (card->options.cq == QETH_CQ_ENABLED) + goto tx_drop; + break; + default: goto tx_drop; + } } else { queue = card->qdio.out_qs[txq]; } @@ -1764,10 +1774,10 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, qeth_l3_get_cast_type(skb) == RTN_BROADCAST) goto tx_drop; - if (ipv == 4 || IS_IQD(card)) - rc = qeth_l3_xmit(card, skb, queue, ipv); + if (proto == htons(ETH_P_IP) || IS_IQD(card)) + rc = qeth_l3_xmit(card, skb, queue, proto); else - rc = qeth_xmit(card, skb, queue, ipv, qeth_l3_fill_header); + rc = qeth_xmit(card, skb, queue, proto, qeth_l3_fill_header); if (!rc) return NETDEV_TX_OK; -- GitLab From c61dff3c1ef77f46e4aa605f71c68089b02b3d78 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 28 Jan 2021 12:25:49 +0100 Subject: [PATCH 1670/2012] s390/qeth: pass proto to qeth_l3_get_cast_type() qeth_l3_hard_start_xmit() already determined the skb's proto. Avoid doing so a second time when it calls qeth_l3_get_cast_type(). Signed-off-by: Julian Wiedmann Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_l3_main.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 7a7465376e292..4921afb51a1c3 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1610,9 +1610,8 @@ static int qeth_l3_get_cast_type_rcu(struct sk_buff *skb, struct dst_entry *dst, } } -static int qeth_l3_get_cast_type(struct sk_buff *skb) +static int qeth_l3_get_cast_type(struct sk_buff *skb, __be16 proto) { - __be16 proto = vlan_get_protocol(skb); struct dst_entry *dst; int cast_type; @@ -1771,7 +1770,7 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, } if (!(dev->flags & IFF_BROADCAST) && - qeth_l3_get_cast_type(skb) == RTN_BROADCAST) + qeth_l3_get_cast_type(skb, proto) == RTN_BROADCAST) goto tx_drop; if (proto == htons(ETH_P_IP) || IS_IQD(card)) @@ -1831,8 +1830,10 @@ static netdev_features_t qeth_l3_osa_features_check(struct sk_buff *skb, static u16 qeth_l3_iqd_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { - return qeth_iqd_select_queue(dev, skb, qeth_l3_get_cast_type(skb), - sb_dev); + __be16 proto = vlan_get_protocol(skb); + + return qeth_iqd_select_queue(dev, skb, + qeth_l3_get_cast_type(skb, proto), sb_dev); } static u16 qeth_l3_osa_select_queue(struct net_device *dev, struct sk_buff *skb, -- GitLab From a667fee181b2c44cbf162f6a24c3594904aa9da1 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 28 Jan 2021 12:25:50 +0100 Subject: [PATCH 1671/2012] s390/qeth: make cast type selection for af_iucv skbs robust As part of the TX queue selection for af_iucv skbs, qeth_l3_get_cast_type_rcu() ends up calling qeth_get_ether_cast_type(). Which is rather fragile, since such skbs don't have a proper ETH header and we rely on it being zeroed out in the right places. Add a separate case for ETH_P_AF_IUCV instead that does the right thing. When later building the HW header for such skbs, don't hard-code the cast type but follow the same path as for other protocol types. Here the cast type should naturally come from the skb's queue mapping. Signed-off-by: Julian Wiedmann Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_l3_main.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 4921afb51a1c3..dd441eaec66eb 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1604,8 +1604,10 @@ static int qeth_l3_get_cast_type_rcu(struct sk_buff *skb, struct dst_entry *dst, case htons(ETH_P_IPV6): return ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ? RTN_MULTICAST : RTN_UNICAST; + case htons(ETH_P_AF_IUCV): + return RTN_UNICAST; default: - /* ... and MAC address */ + /* OSA only: ... and MAC address */ return qeth_get_ether_cast_type(skb); } } @@ -1651,14 +1653,6 @@ static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue, } else { hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3; - if (proto == htons(ETH_P_AF_IUCV)) { - l3_hdr->flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST; - l3_hdr->next_hop.addr.s6_addr16[0] = htons(0xfe80); - memcpy(&l3_hdr->next_hop.addr.s6_addr32[2], - iucv_trans_hdr(skb)->destUserID, 8); - return; - } - if (skb->ip_summed == CHECKSUM_PARTIAL) { qeth_tx_csum(skb, &hdr->hdr.l3.ext_flags, proto); /* some HW requires combined L3+L4 csum offload: */ @@ -1687,16 +1681,25 @@ static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue, cast_type = qeth_l3_get_cast_type_rcu(skb, dst, proto); l3_hdr->flags |= qeth_l3_cast_type_to_flag(cast_type); - if (proto == htons(ETH_P_IP)) { + switch (proto) { + case htons(ETH_P_IP): l3_hdr->next_hop.addr.s6_addr32[3] = qeth_next_hop_v4_rcu(skb, dst); - } else if (proto == htons(ETH_P_IPV6)) { + break; + case htons(ETH_P_IPV6): l3_hdr->next_hop.addr = *qeth_next_hop_v6_rcu(skb, dst); hdr->hdr.l3.flags |= QETH_HDR_IPV6; if (!IS_IQD(card)) hdr->hdr.l3.flags |= QETH_HDR_PASSTHRU; - } else { + break; + case htons(ETH_P_AF_IUCV): + l3_hdr->next_hop.addr.s6_addr16[0] = htons(0xfe80); + memcpy(&l3_hdr->next_hop.addr.s6_addr32[2], + iucv_trans_hdr(skb)->destUserID, 8); + l3_hdr->flags |= QETH_HDR_IPV6; + break; + default: /* OSA only: */ l3_hdr->flags |= QETH_HDR_PASSTHRU; } -- GitLab From d6e5150315173da3dbefcbfca1b3ebe321e8c8eb Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 28 Jan 2021 12:25:51 +0100 Subject: [PATCH 1672/2012] s390/qeth: don't fake a TX completion interrupt after TX error When do_qdio() returns with an unexpected error, qeth_flush_buffers() kicks off a recovery action. In such a case there's no point in starting TX completion processing, the device gets torn down anyway. So take a closer look at do_qdio()'s return value, and skip the TX completion processing accordingly. Signed-off-by: Julian Wiedmann Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_core_main.c | 34 ++++++++++++++++--------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index de9d27e1c5290..ea2e139cd592d 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3692,24 +3692,27 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags, queue->queue_no, index, count); - /* Fake the TX completion interrupt: */ - if (IS_IQD(card)) { - unsigned int frames = READ_ONCE(queue->max_coalesced_frames); - unsigned int usecs = READ_ONCE(queue->coalesce_usecs); + switch (rc) { + case 0: + case -ENOBUFS: + /* ignore temporary SIGA errors without busy condition */ - if (frames && queue->coalesced_frames >= frames) { - napi_schedule(&queue->napi); - queue->coalesced_frames = 0; - QETH_TXQ_STAT_INC(queue, coal_frames); - } else if (usecs) { - qeth_tx_arm_timer(queue, usecs); + /* Fake the TX completion interrupt: */ + if (IS_IQD(card)) { + unsigned int frames = READ_ONCE(queue->max_coalesced_frames); + unsigned int usecs = READ_ONCE(queue->coalesce_usecs); + + if (frames && queue->coalesced_frames >= frames) { + napi_schedule(&queue->napi); + queue->coalesced_frames = 0; + QETH_TXQ_STAT_INC(queue, coal_frames); + } else if (usecs) { + qeth_tx_arm_timer(queue, usecs); + } } - } - if (rc) { - /* ignore temporary SIGA errors without busy condition */ - if (rc == -ENOBUFS) - return; + break; + default: QETH_CARD_TEXT(queue->card, 2, "flushbuf"); QETH_CARD_TEXT_(queue->card, 2, " q%d", queue->queue_no); QETH_CARD_TEXT_(queue->card, 2, " idx%d", index); @@ -3719,7 +3722,6 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index, /* this must not happen under normal circumstances. if it * happens something is really wrong -> recover */ qeth_schedule_recovery(queue->card); - return; } } -- GitLab From 27e9c1de529919d8dd7d072415d3bcae77709300 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Thu, 28 Jan 2021 12:41:04 +0100 Subject: [PATCH 1673/2012] net/af_iucv: remove WARN_ONCE on malformed RX packets syzbot reported the following finding: AF_IUCV failed to receive skb, len=0 WARNING: CPU: 0 PID: 522 at net/iucv/af_iucv.c:2039 afiucv_hs_rcv+0x174/0x190 net/iucv/af_iucv.c:2039 CPU: 0 PID: 522 Comm: syz-executor091 Not tainted 5.10.0-rc1-syzkaller-07082-g55027a88ec9f #0 Hardware name: IBM 3906 M04 701 (KVM/Linux) Call Trace: [<00000000b87ea538>] afiucv_hs_rcv+0x178/0x190 net/iucv/af_iucv.c:2039 ([<00000000b87ea534>] afiucv_hs_rcv+0x174/0x190 net/iucv/af_iucv.c:2039) [<00000000b796533e>] __netif_receive_skb_one_core+0x13e/0x188 net/core/dev.c:5315 [<00000000b79653ce>] __netif_receive_skb+0x46/0x1c0 net/core/dev.c:5429 [<00000000b79655fe>] netif_receive_skb_internal+0xb6/0x220 net/core/dev.c:5534 [<00000000b796ac3a>] netif_receive_skb+0x42/0x318 net/core/dev.c:5593 [<00000000b6fd45f4>] tun_rx_batched.isra.0+0x6fc/0x860 drivers/net/tun.c:1485 [<00000000b6fddc4e>] tun_get_user+0x1c26/0x27f0 drivers/net/tun.c:1939 [<00000000b6fe0f00>] tun_chr_write_iter+0x158/0x248 drivers/net/tun.c:1968 [<00000000b4f22bfa>] call_write_iter include/linux/fs.h:1887 [inline] [<00000000b4f22bfa>] new_sync_write+0x442/0x648 fs/read_write.c:518 [<00000000b4f238fe>] vfs_write.part.0+0x36e/0x5d8 fs/read_write.c:605 [<00000000b4f2984e>] vfs_write+0x10e/0x148 fs/read_write.c:615 [<00000000b4f29d0e>] ksys_write+0x166/0x290 fs/read_write.c:658 [<00000000b8dc4ab4>] system_call+0xe0/0x28c arch/s390/kernel/entry.S:415 Last Breaking-Event-Address: [<00000000b8dc64d4>] __s390_indirect_jump_r14+0x0/0xc Malformed RX packets shouldn't generate any warnings because debugging info already flows to dropmon via the kfree_skb(). Signed-off-by: Alexander Egorenkov Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- net/iucv/af_iucv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 882f028992c38..427a1abce0a8a 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2036,7 +2036,6 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, char nullstring[8]; if (!pskb_may_pull(skb, sizeof(*trans_hdr))) { - WARN_ONCE(1, "AF_IUCV failed to receive skb, len=%u", skb->len); kfree_skb(skb); return NET_RX_SUCCESS; } -- GitLab From c464444fa2ca41255817e2bdcfc47a658ec20645 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 28 Jan 2021 12:41:05 +0100 Subject: [PATCH 1674/2012] net/af_iucv: don't lookup the socket on TX notification Whoever called iucv_sk(sk)->sk_txnotify() must already know that they're dealing with an af_iucv socket. Signed-off-by: Julian Wiedmann Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- net/iucv/af_iucv.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 427a1abce0a8a..8683b6939f459 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2134,23 +2134,14 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, static void afiucv_hs_callback_txnotify(struct sk_buff *skb, enum iucv_tx_notify n) { - struct sock *isk = skb->sk; - struct sock *sk = NULL; - struct iucv_sock *iucv = NULL; + struct iucv_sock *iucv = iucv_sk(skb->sk); + struct sock *sk = skb->sk; struct sk_buff_head *list; struct sk_buff *list_skb; struct sk_buff *nskb; unsigned long flags; - read_lock_irqsave(&iucv_sk_list.lock, flags); - sk_for_each(sk, &iucv_sk_list.head) - if (sk == isk) { - iucv = iucv_sk(sk); - break; - } - read_unlock_irqrestore(&iucv_sk_list.lock, flags); - - if (!iucv || sock_flag(sk, SOCK_ZAPPED)) + if (sock_flag(sk, SOCK_ZAPPED)) return; list = &iucv->send_skb_q; -- GitLab From ef6af7bdb9e6c14eae8dc5fe852aefe1e089c85c Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 28 Jan 2021 12:41:06 +0100 Subject: [PATCH 1675/2012] net/af_iucv: count packets in the xmit path The TX code keeps track of all skbs that are in-flight but haven't actually been sent out yet. For native IUCV sockets that's not a huge deal, but with TRANS_HIPER sockets it would be much better if we didn't need to maintain a list of skb clones. Note that we actually only care about the _count_ of skbs in this stage of the TX pipeline. So as prep work for removing the skb tracking on TRANS_HIPER sockets, keep track of the skb count in a separate variable and pair any list {enqueue, unlink} with a count {increment, decrement}. Then replace all occurences where we currently look at the skb list's fill level. Signed-off-by: Julian Wiedmann Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- include/net/iucv/af_iucv.h | 1 + net/iucv/af_iucv.c | 30 ++++++++++++++++++++++++------ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index 9259ce2b22f3e..0816bcd44dd17 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -128,6 +128,7 @@ struct iucv_sock { u8 flags; u16 msglimit; u16 msglimit_peer; + atomic_t skbs_in_xmit; atomic_t msg_sent; atomic_t msg_recv; atomic_t pendings; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 8683b6939f459..e487f472027a9 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -182,7 +182,7 @@ static inline int iucv_below_msglim(struct sock *sk) if (sk->sk_state != IUCV_CONNECTED) return 1; if (iucv->transport == AF_IUCV_TRANS_IUCV) - return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim); + return (atomic_read(&iucv->skbs_in_xmit) < iucv->path->msglim); else return ((atomic_read(&iucv->msg_sent) < iucv->msglimit_peer) && (atomic_read(&iucv->pendings) <= 0)); @@ -269,8 +269,10 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, } skb_queue_tail(&iucv->send_skb_q, nskb); + atomic_inc(&iucv->skbs_in_xmit); err = dev_queue_xmit(skb); if (net_xmit_eval(err)) { + atomic_dec(&iucv->skbs_in_xmit); skb_unlink(nskb, &iucv->send_skb_q); kfree_skb(nskb); } else { @@ -424,7 +426,7 @@ static void iucv_sock_close(struct sock *sk) sk->sk_state = IUCV_CLOSING; sk->sk_state_change(sk); - if (!err && !skb_queue_empty(&iucv->send_skb_q)) { + if (!err && atomic_read(&iucv->skbs_in_xmit) > 0) { if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime) timeo = sk->sk_lingertime; else @@ -491,6 +493,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio, atomic_set(&iucv->pendings, 0); iucv->flags = 0; iucv->msglimit = 0; + atomic_set(&iucv->skbs_in_xmit, 0); atomic_set(&iucv->msg_sent, 0); atomic_set(&iucv->msg_recv, 0); iucv->path = NULL; @@ -1055,6 +1058,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, } } else { /* Classic VM IUCV transport */ skb_queue_tail(&iucv->send_skb_q, skb); + atomic_inc(&iucv->skbs_in_xmit); if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) && skb->len <= 7) { @@ -1063,6 +1067,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, /* on success: there is no message_complete callback */ /* for an IPRMDATA msg; remove skb from send queue */ if (err == 0) { + atomic_dec(&iucv->skbs_in_xmit); skb_unlink(skb, &iucv->send_skb_q); kfree_skb(skb); } @@ -1071,6 +1076,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, /* IUCV_IPRMDATA path flag is set... sever path */ if (err == 0x15) { pr_iucv->path_sever(iucv->path, NULL); + atomic_dec(&iucv->skbs_in_xmit); skb_unlink(skb, &iucv->send_skb_q); err = -EPIPE; goto fail; @@ -1109,6 +1115,8 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, } else { err = -EPIPE; } + + atomic_dec(&iucv->skbs_in_xmit); skb_unlink(skb, &iucv->send_skb_q); goto fail; } @@ -1748,10 +1756,14 @@ static void iucv_callback_txdone(struct iucv_path *path, { struct sock *sk = path->private; struct sk_buff *this = NULL; - struct sk_buff_head *list = &iucv_sk(sk)->send_skb_q; + struct sk_buff_head *list; struct sk_buff *list_skb; + struct iucv_sock *iucv; unsigned long flags; + iucv = iucv_sk(sk); + list = &iucv->send_skb_q; + bh_lock_sock(sk); spin_lock_irqsave(&list->lock, flags); @@ -1761,8 +1773,11 @@ static void iucv_callback_txdone(struct iucv_path *path, break; } } - if (this) + if (this) { + atomic_dec(&iucv->skbs_in_xmit); __skb_unlink(this, list); + } + spin_unlock_irqrestore(&list->lock, flags); if (this) { @@ -1772,7 +1787,7 @@ static void iucv_callback_txdone(struct iucv_path *path, } if (sk->sk_state == IUCV_CLOSING) { - if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) { + if (atomic_read(&iucv->skbs_in_xmit) == 0) { sk->sk_state = IUCV_CLOSED; sk->sk_state_change(sk); } @@ -2150,6 +2165,7 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb, if (skb_shinfo(list_skb) == skb_shinfo(skb)) { switch (n) { case TX_NOTIFY_OK: + atomic_dec(&iucv->skbs_in_xmit); __skb_unlink(list_skb, list); kfree_skb(list_skb); iucv_sock_wake_msglim(sk); @@ -2158,6 +2174,7 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb, atomic_inc(&iucv->pendings); break; case TX_NOTIFY_DELAYED_OK: + atomic_dec(&iucv->skbs_in_xmit); __skb_unlink(list_skb, list); atomic_dec(&iucv->pendings); if (atomic_read(&iucv->pendings) <= 0) @@ -2169,6 +2186,7 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb, case TX_NOTIFY_TPQFULL: /* not yet used */ case TX_NOTIFY_GENERALERROR: case TX_NOTIFY_DELAYED_GENERALERROR: + atomic_dec(&iucv->skbs_in_xmit); __skb_unlink(list_skb, list); kfree_skb(list_skb); if (sk->sk_state == IUCV_CONNECTED) { @@ -2183,7 +2201,7 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb, spin_unlock_irqrestore(&list->lock, flags); if (sk->sk_state == IUCV_CLOSING) { - if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) { + if (atomic_read(&iucv->skbs_in_xmit) == 0) { sk->sk_state = IUCV_CLOSED; sk->sk_state_change(sk); } -- GitLab From 80bc97aa0aaab974bbbfb99a78d7515414004616 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 28 Jan 2021 12:41:07 +0100 Subject: [PATCH 1676/2012] net/af_iucv: don't track individual TX skbs for TRANS_HIPER sockets Stop maintaining the skb_send_q list for TRANS_HIPER sockets. Not only is it extra overhead, but keeping around a list of skb clones means that we later also have to match the ->sk_txnotify() calls against these clones and free them accordingly. The current matching logic (comparing the skbs' shinfo location) is frustratingly fragile, and breaks if the skb's head is mangled in any sort of way while passing from dev_queue_xmit() to the device's HW queue. Also adjust the interface for ->sk_txnotify(), to make clear that we don't actually care about any skb internals. Signed-off-by: Julian Wiedmann Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/s390/net/qeth_core_main.c | 6 ++- include/net/iucv/af_iucv.h | 2 +- net/iucv/af_iucv.c | 80 ++++++++----------------------- 3 files changed, 26 insertions(+), 62 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index ea2e139cd592d..89b223885b0c5 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1409,10 +1409,12 @@ static void qeth_notify_skbs(struct qeth_qdio_out_q *q, struct sk_buff *skb; skb_queue_walk(&buf->skb_list, skb) { + struct sock *sk = skb->sk; + QETH_CARD_TEXT_(q->card, 5, "skbn%d", notification); QETH_CARD_TEXT_(q->card, 5, "%lx", (long) skb); - if (skb->sk && skb->sk->sk_family == PF_IUCV) - iucv_sk(skb->sk)->sk_txnotify(skb, notification); + if (sk && sk->sk_family == PF_IUCV) + iucv_sk(sk)->sk_txnotify(sk, notification); } } diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index 0816bcd44dd17..ff06246dbbb94 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -133,7 +133,7 @@ struct iucv_sock { atomic_t msg_recv; atomic_t pendings; int transport; - void (*sk_txnotify)(struct sk_buff *skb, + void (*sk_txnotify)(struct sock *sk, enum iucv_tx_notify n); }; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index e487f472027a9..0e0656db4ae71 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -89,7 +89,7 @@ static struct sock *iucv_accept_dequeue(struct sock *parent, static void iucv_sock_kill(struct sock *sk); static void iucv_sock_close(struct sock *sk); -static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify); +static void afiucv_hs_callback_txnotify(struct sock *sk, enum iucv_tx_notify); /* Call Back functions */ static void iucv_callback_rx(struct iucv_path *, struct iucv_message *); @@ -211,7 +211,6 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, { struct iucv_sock *iucv = iucv_sk(sock); struct af_iucv_trans_hdr *phs_hdr; - struct sk_buff *nskb; int err, confirm_recv = 0; phs_hdr = skb_push(skb, sizeof(*phs_hdr)); @@ -261,20 +260,10 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, } skb->protocol = cpu_to_be16(ETH_P_AF_IUCV); - __skb_header_release(skb); - nskb = skb_clone(skb, GFP_ATOMIC); - if (!nskb) { - err = -ENOMEM; - goto err_free; - } - - skb_queue_tail(&iucv->send_skb_q, nskb); atomic_inc(&iucv->skbs_in_xmit); err = dev_queue_xmit(skb); if (net_xmit_eval(err)) { atomic_dec(&iucv->skbs_in_xmit); - skb_unlink(nskb, &iucv->send_skb_q); - kfree_skb(nskb); } else { atomic_sub(confirm_recv, &iucv->msg_recv); WARN_ON(atomic_read(&iucv->msg_recv) < 0); @@ -2146,59 +2135,33 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, * afiucv_hs_callback_txnotify() - handle send notifcations from HiperSockets * transport **/ -static void afiucv_hs_callback_txnotify(struct sk_buff *skb, - enum iucv_tx_notify n) +static void afiucv_hs_callback_txnotify(struct sock *sk, enum iucv_tx_notify n) { - struct iucv_sock *iucv = iucv_sk(skb->sk); - struct sock *sk = skb->sk; - struct sk_buff_head *list; - struct sk_buff *list_skb; - struct sk_buff *nskb; - unsigned long flags; + struct iucv_sock *iucv = iucv_sk(sk); if (sock_flag(sk, SOCK_ZAPPED)) return; - list = &iucv->send_skb_q; - spin_lock_irqsave(&list->lock, flags); - skb_queue_walk_safe(list, list_skb, nskb) { - if (skb_shinfo(list_skb) == skb_shinfo(skb)) { - switch (n) { - case TX_NOTIFY_OK: - atomic_dec(&iucv->skbs_in_xmit); - __skb_unlink(list_skb, list); - kfree_skb(list_skb); - iucv_sock_wake_msglim(sk); - break; - case TX_NOTIFY_PENDING: - atomic_inc(&iucv->pendings); - break; - case TX_NOTIFY_DELAYED_OK: - atomic_dec(&iucv->skbs_in_xmit); - __skb_unlink(list_skb, list); - atomic_dec(&iucv->pendings); - if (atomic_read(&iucv->pendings) <= 0) - iucv_sock_wake_msglim(sk); - kfree_skb(list_skb); - break; - case TX_NOTIFY_UNREACHABLE: - case TX_NOTIFY_DELAYED_UNREACHABLE: - case TX_NOTIFY_TPQFULL: /* not yet used */ - case TX_NOTIFY_GENERALERROR: - case TX_NOTIFY_DELAYED_GENERALERROR: - atomic_dec(&iucv->skbs_in_xmit); - __skb_unlink(list_skb, list); - kfree_skb(list_skb); - if (sk->sk_state == IUCV_CONNECTED) { - sk->sk_state = IUCV_DISCONN; - sk->sk_state_change(sk); - } - break; - } - break; + switch (n) { + case TX_NOTIFY_OK: + atomic_dec(&iucv->skbs_in_xmit); + iucv_sock_wake_msglim(sk); + break; + case TX_NOTIFY_PENDING: + atomic_inc(&iucv->pendings); + break; + case TX_NOTIFY_DELAYED_OK: + atomic_dec(&iucv->skbs_in_xmit); + if (atomic_dec_return(&iucv->pendings) <= 0) + iucv_sock_wake_msglim(sk); + break; + default: + atomic_dec(&iucv->skbs_in_xmit); + if (sk->sk_state == IUCV_CONNECTED) { + sk->sk_state = IUCV_DISCONN; + sk->sk_state_change(sk); } } - spin_unlock_irqrestore(&list->lock, flags); if (sk->sk_state == IUCV_CLOSING) { if (atomic_read(&iucv->skbs_in_xmit) == 0) { @@ -2206,7 +2169,6 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb, sk->sk_state_change(sk); } } - } /* -- GitLab From 2c3b4456c812681f963ef67502c5b8e8f8e2933f Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Thu, 28 Jan 2021 12:41:08 +0100 Subject: [PATCH 1677/2012] net/af_iucv: build SG skbs for TRANS_HIPER sockets The TX path no longer falls apart when some of its SG skbs are later linearized by lower layers of the stack. So enable the use of SG skbs in iucv_sock_sendmsg() again. This effectively reverts commit dc5367bcc556 ("net/af_iucv: don't use paged skbs for TX on HiperSockets"). Signed-off-by: Julian Wiedmann Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- net/iucv/af_iucv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 0e0656db4ae71..6092d5cb71687 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -256,7 +256,9 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, err = -EMSGSIZE; goto err_free; } - skb_trim(skb, skb->dev->mtu); + err = pskb_trim(skb, skb->dev->mtu); + if (err) + goto err_free; } skb->protocol = cpu_to_be16(ETH_P_AF_IUCV); @@ -996,7 +998,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (iucv->transport == AF_IUCV_TRANS_HIPER) { headroom = sizeof(struct af_iucv_trans_hdr) + LL_RESERVED_SPACE(iucv->hs_dev); - linear = len; + linear = min(len, PAGE_SIZE - headroom); } else { if (len < PAGE_SIZE) { linear = len; -- GitLab From 5d1f0f09b5f0176494419a056db3a6647becd316 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 28 Jan 2021 13:49:13 +0100 Subject: [PATCH 1678/2012] nexthop: Rename nexthop_free_mpath nexthop_free_mpath really should be nexthop_free_group. Rename it. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e6dfca4262424..1deb9e4df1de5 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -209,7 +209,7 @@ static void nexthop_devhash_add(struct net *net, struct nh_info *nhi) hlist_add_head(&nhi->dev_hash, head); } -static void nexthop_free_mpath(struct nexthop *nh) +static void nexthop_free_group(struct nexthop *nh) { struct nh_group *nhg; int i; @@ -249,7 +249,7 @@ void nexthop_free_rcu(struct rcu_head *head) struct nexthop *nh = container_of(head, struct nexthop, rcu); if (nh->is_group) - nexthop_free_mpath(nh); + nexthop_free_group(nh); else nexthop_free_single(nh); -- GitLab From 79bc55e3fee9f6169756d1a9d68c3ba9e774c3b1 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:14 +0100 Subject: [PATCH 1679/2012] nexthop: Dispatch nexthop_select_path() by group type The logic for selecting path depends on the next-hop group type. Adapt the nexthop_select_path() to dispatch according to the group type. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 1deb9e4df1de5..43bb5f4513436 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -680,16 +680,11 @@ static bool ipv4_good_nh(const struct fib_nh *nh) return !!(state & NUD_VALID); } -struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) +static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash) { struct nexthop *rc = NULL; - struct nh_group *nhg; int i; - if (!nh->is_group) - return nh; - - nhg = rcu_dereference(nh->nh_grp); for (i = 0; i < nhg->num_nh; ++i) { struct nh_grp_entry *nhge = &nhg->nh_entries[i]; struct nh_info *nhi; @@ -721,6 +716,21 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) return rc; } + +struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) +{ + struct nh_group *nhg; + + if (!nh->is_group) + return nh; + + nhg = rcu_dereference(nh->nh_grp); + if (nhg->mpath) + return nexthop_select_path_mp(nhg, hash); + + /* Unreachable. */ + return NULL; +} EXPORT_SYMBOL_GPL(nexthop_select_path); int nexthop_for_each_fib6_nh(struct nexthop *nh, -- GitLab From b9bae61be46645aa3b8b5d79d5cdfabe55ae1507 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:15 +0100 Subject: [PATCH 1680/2012] nexthop: Introduce to struct nh_grp_entry a per-type union The values that a next-hop group needs to keep track of depend on the group type. Introduce a union to separate fields specific to the mpath groups from fields specific to other group types. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- include/net/nexthop.h | 7 ++++++- net/ipv4/nexthop.c | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 226930d66b637..d0e245b0635d9 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -66,7 +66,12 @@ struct nh_info { struct nh_grp_entry { struct nexthop *nh; u8 weight; - atomic_t upper_bound; + + union { + struct { + atomic_t upper_bound; + } mpath; + }; struct list_head nh_list; struct nexthop *nh_parent; /* nexthop of group with this entry */ diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 43bb5f4513436..7a30df5aea755 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -689,7 +689,7 @@ static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash) struct nh_grp_entry *nhge = &nhg->nh_entries[i]; struct nh_info *nhi; - if (hash > atomic_read(&nhge->upper_bound)) + if (hash > atomic_read(&nhge->mpath.upper_bound)) continue; nhi = rcu_dereference(nhge->nh->nh_info); @@ -924,7 +924,7 @@ static void nh_group_rebalance(struct nh_group *nhg) w += nhge->weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1; - atomic_set(&nhge->upper_bound, upper_bound); + atomic_set(&nhge->mpath.upper_bound, upper_bound); } } -- GitLab From 720ccd9a728506ca4721b18a22a2157a9d48ed60 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:16 +0100 Subject: [PATCH 1681/2012] nexthop: Assert the invariant that a NH group is of only one type Most of the code that deals with nexthop groups relies on the fact that the group is of exactly one well-known type. Currently there is only one type, "mpath", but as more next-hop group types come, it becomes desirable to have a central place where the setting is validated. Introduce such place into nexthop_create_group(), such that the check is done before the code that relies on that invariant is invoked. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 7a30df5aea755..c09b8231f56ae 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1466,10 +1466,13 @@ static struct nexthop *nexthop_create_group(struct net *net, nhg->nh_entries[i].nh_parent = nh; } - if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) { + if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) nhg->mpath = 1; + + WARN_ON_ONCE(nhg->mpath != 1); + + if (nhg->mpath) nh_group_rebalance(nhg); - } if (cfg->nh_fdb) nhg->fdb_nh = 1; -- GitLab From 09ad6becf5355fe0645f500f518fbbd531715722 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 28 Jan 2021 13:49:17 +0100 Subject: [PATCH 1682/2012] nexthop: Use enum to encode notification type Currently there are only two types of in-kernel nexthop notification. The two are distinguished by the 'is_grp' boolean field in 'struct nh_notifier_info'. As more notification types are introduced for more next-hop group types, a boolean is not an easily extensible interface. Instead, convert it to an enum. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 54 ++++++++++++++----- drivers/net/netdevsim/fib.c | 23 ++++---- include/net/nexthop.h | 7 ++- net/ipv4/nexthop.c | 14 ++--- 4 files changed, 69 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 41424ee909a08..0ac7014703aab 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -4309,11 +4309,18 @@ static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp, if (event != NEXTHOP_EVENT_REPLACE) return 0; - if (!info->is_grp) + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh, info->extack); - return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp, info->nh_grp, - info->extack); + case NH_NOTIFIER_INFO_TYPE_GRP: + return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp, + info->nh_grp, + info->extack); + default: + NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type"); + return -EOPNOTSUPP; + } } static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp, @@ -4321,13 +4328,17 @@ static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp, { const struct net_device *dev; - if (info->is_grp) + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: + dev = info->nh->dev; + return info->nh->gw_family || info->nh->is_reject || + mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); + case NH_NOTIFIER_INFO_TYPE_GRP: /* Already validated earlier. */ return true; - - dev = info->nh->dev; - return info->nh->gw_family || info->nh->is_reject || - mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); + default: + return false; + } } static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp, @@ -4410,11 +4421,22 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, struct nh_notifier_info *info) { - unsigned int nhs = info->is_grp ? info->nh_grp->num_nh : 1; struct mlxsw_sp_nexthop_group_info *nhgi; struct mlxsw_sp_nexthop *nh; + unsigned int nhs; int err, i; + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: + nhs = 1; + break; + case NH_NOTIFIER_INFO_TYPE_GRP: + nhs = info->nh_grp->num_nh; + break; + default: + return -EINVAL; + } + nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL); if (!nhgi) return -ENOMEM; @@ -4427,12 +4449,18 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, int weight; nh = &nhgi->nexthops[i]; - if (info->is_grp) { - nh_obj = &info->nh_grp->nh_entries[i].nh; - weight = info->nh_grp->nh_entries[i].weight; - } else { + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: nh_obj = info->nh; weight = 1; + break; + case NH_NOTIFIER_INFO_TYPE_GRP: + nh_obj = &info->nh_grp->nh_entries[i].nh; + weight = info->nh_grp->nh_entries[i].weight; + break; + default: + err = -EINVAL; + goto err_nexthop_obj_init; } err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, weight); diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 45d8a7790bd5b..f140bbca98c5d 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -860,7 +860,7 @@ static struct nsim_nexthop *nsim_nexthop_create(struct nsim_fib_data *data, nexthop = kzalloc(sizeof(*nexthop), GFP_KERNEL); if (!nexthop) - return NULL; + return ERR_PTR(-ENOMEM); nexthop->id = info->id; @@ -868,15 +868,20 @@ static struct nsim_nexthop *nsim_nexthop_create(struct nsim_fib_data *data, * occupy. */ - if (!info->is_grp) { + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: occ = 1; - goto out; + break; + case NH_NOTIFIER_INFO_TYPE_GRP: + for (i = 0; i < info->nh_grp->num_nh; i++) + occ += info->nh_grp->nh_entries[i].weight; + break; + default: + NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type"); + kfree(nexthop); + return ERR_PTR(-EOPNOTSUPP); } - for (i = 0; i < info->nh_grp->num_nh; i++) - occ += info->nh_grp->nh_entries[i].weight; - -out: nexthop->occ = occ; return nexthop; } @@ -972,8 +977,8 @@ static int nsim_nexthop_insert(struct nsim_fib_data *data, int err; nexthop = nsim_nexthop_create(data, info); - if (!nexthop) - return -ENOMEM; + if (IS_ERR(nexthop)) + return PTR_ERR(nexthop); nexthop_old = rhashtable_lookup_fast(&data->nexthop_ht, &info->id, nsim_nexthop_ht_params); diff --git a/include/net/nexthop.h b/include/net/nexthop.h index d0e245b0635d9..7bc057aee40b1 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -114,6 +114,11 @@ enum nexthop_event_type { NEXTHOP_EVENT_REPLACE, }; +enum nh_notifier_info_type { + NH_NOTIFIER_INFO_TYPE_SINGLE, + NH_NOTIFIER_INFO_TYPE_GRP, +}; + struct nh_notifier_single_info { struct net_device *dev; u8 gw_family; @@ -142,7 +147,7 @@ struct nh_notifier_info { struct net *net; struct netlink_ext_ack *extack; u32 id; - bool is_grp; + enum nh_notifier_info_type type; union { struct nh_notifier_single_info *nh; struct nh_notifier_grp_info *nh_grp; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index c09b8231f56ae..12f812b9538da 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -71,6 +71,7 @@ __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info, static int nh_notifier_single_info_init(struct nh_notifier_info *info, const struct nexthop *nh) { + info->type = NH_NOTIFIER_INFO_TYPE_SINGLE; info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL); if (!info->nh) return -ENOMEM; @@ -92,6 +93,7 @@ static int nh_notifier_grp_info_init(struct nh_notifier_info *info, u16 num_nh = nhg->num_nh; int i; + info->type = NH_NOTIFIER_INFO_TYPE_GRP; info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh), GFP_KERNEL); if (!info->nh_grp) @@ -121,17 +123,17 @@ static int nh_notifier_info_init(struct nh_notifier_info *info, const struct nexthop *nh) { info->id = nh->id; - info->is_grp = nh->is_group; - if (info->is_grp) + if (nh->is_group) return nh_notifier_grp_info_init(info, nh); else return nh_notifier_single_info_init(info, nh); } -static void nh_notifier_info_fini(struct nh_notifier_info *info) +static void nh_notifier_info_fini(struct nh_notifier_info *info, + const struct nexthop *nh) { - if (info->is_grp) + if (nh->is_group) nh_notifier_grp_info_fini(info); else nh_notifier_single_info_fini(info); @@ -161,7 +163,7 @@ static int call_nexthop_notifiers(struct net *net, err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, event_type, &info); - nh_notifier_info_fini(&info); + nh_notifier_info_fini(&info, nh); return notifier_to_errno(err); } @@ -182,7 +184,7 @@ static int call_nexthop_notifier(struct notifier_block *nb, struct net *net, return err; err = nb->notifier_call(nb, event_type, &info); - nh_notifier_info_fini(&info); + nh_notifier_info_fini(&info, nh); return notifier_to_errno(err); } -- GitLab From da230501f2c95a39eaa0856afd0122d35bda9e5d Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:18 +0100 Subject: [PATCH 1683/2012] nexthop: Dispatch notifier init()/fini() by group type After there are several next-hop group types, initialization and finalization of notifier type needs to reflect the actual type. Transform nh_notifier_grp_info_init() and _fini() to make extending them easier. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 12f812b9538da..7149b12c4703c 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -86,10 +86,9 @@ static void nh_notifier_single_info_fini(struct nh_notifier_info *info) kfree(info->nh); } -static int nh_notifier_grp_info_init(struct nh_notifier_info *info, - const struct nexthop *nh) +static int nh_notifier_mp_info_init(struct nh_notifier_info *info, + struct nh_group *nhg) { - struct nh_group *nhg = rtnl_dereference(nh->nh_grp); u16 num_nh = nhg->num_nh; int i; @@ -114,9 +113,23 @@ static int nh_notifier_grp_info_init(struct nh_notifier_info *info, return 0; } -static void nh_notifier_grp_info_fini(struct nh_notifier_info *info) +static int nh_notifier_grp_info_init(struct nh_notifier_info *info, + const struct nexthop *nh) { - kfree(info->nh_grp); + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + + if (nhg->mpath) + return nh_notifier_mp_info_init(info, nhg); + return -EINVAL; +} + +static void nh_notifier_grp_info_fini(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + + if (nhg->mpath) + kfree(info->nh_grp); } static int nh_notifier_info_init(struct nh_notifier_info *info, @@ -134,7 +147,7 @@ static void nh_notifier_info_fini(struct nh_notifier_info *info, const struct nexthop *nh) { if (nh->is_group) - nh_notifier_grp_info_fini(info); + nh_notifier_grp_info_fini(info, nh); else nh_notifier_single_info_fini(info); } -- GitLab From 56450ec6b7fc2824d6402fc3a60bff1a6fe32c04 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:19 +0100 Subject: [PATCH 1684/2012] nexthop: Extract dump filtering parameters into a single structure Requests to dump nexthops have many attributes in common with those that requests to dump buckets of resilient NH groups will have. In order to make reuse of this code simpler, convert the code to use a single structure with filtering configuration instead of passing around the parameters one by one. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 7149b12c4703c..ad48e5d71bf99 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1971,16 +1971,23 @@ errout_free: goto out; } -static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx, - bool group_filter, u8 family) +struct nh_dump_filter { + int dev_idx; + int master_idx; + bool group_filter; + bool fdb_filter; +}; + +static bool nh_dump_filtered(struct nexthop *nh, + struct nh_dump_filter *filter, u8 family) { const struct net_device *dev; const struct nh_info *nhi; - if (group_filter && !nh->is_group) + if (filter->group_filter && !nh->is_group) return true; - if (!dev_idx && !master_idx && !family) + if (!filter->dev_idx && !filter->master_idx && !family) return false; if (nh->is_group) @@ -1991,26 +1998,26 @@ static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx, return true; dev = nhi->fib_nhc.nhc_dev; - if (dev_idx && (!dev || dev->ifindex != dev_idx)) + if (filter->dev_idx && (!dev || dev->ifindex != filter->dev_idx)) return true; - if (master_idx) { + if (filter->master_idx) { struct net_device *master; if (!dev) return true; master = netdev_master_upper_dev_get((struct net_device *)dev); - if (!master || master->ifindex != master_idx) + if (!master || master->ifindex != filter->master_idx) return true; } return false; } -static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, - int *master_idx, bool *group_filter, - bool *fdb_filter, struct netlink_callback *cb) +static int nh_valid_dump_req(const struct nlmsghdr *nlh, + struct nh_dump_filter *filter, + struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; @@ -2030,7 +2037,7 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, NL_SET_ERR_MSG(extack, "Invalid device index"); return -EINVAL; } - *dev_idx = idx; + filter->dev_idx = idx; } if (tb[NHA_MASTER]) { idx = nla_get_u32(tb[NHA_MASTER]); @@ -2038,10 +2045,10 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, NL_SET_ERR_MSG(extack, "Invalid master device index"); return -EINVAL; } - *master_idx = idx; + filter->master_idx = idx; } - *group_filter = nla_get_flag(tb[NHA_GROUPS]); - *fdb_filter = nla_get_flag(tb[NHA_FDB]); + filter->group_filter = nla_get_flag(tb[NHA_GROUPS]); + filter->fdb_filter = nla_get_flag(tb[NHA_FDB]); nhm = nlmsg_data(nlh); if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { @@ -2055,17 +2062,15 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, /* rtnl */ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) { - bool group_filter = false, fdb_filter = false; struct nhmsg *nhm = nlmsg_data(cb->nlh); - int dev_filter_idx = 0, master_idx = 0; struct net *net = sock_net(skb->sk); struct rb_root *root = &net->nexthop.rb_root; + struct nh_dump_filter filter = {}; struct rb_node *node; int idx = 0, s_idx; int err; - err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx, - &group_filter, &fdb_filter, cb); + err = nh_valid_dump_req(cb->nlh, &filter, cb); if (err < 0) return err; @@ -2077,8 +2082,7 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) goto cont; nh = rb_entry(node, struct nexthop, rb_node); - if (nh_dump_filtered(nh, dev_filter_idx, master_idx, - group_filter, nhm->nh_family)) + if (nh_dump_filtered(nh, &filter, nhm->nh_family)) goto cont; err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, -- GitLab From b9ebea127661e8982c03065d99422dbf0f73e4d1 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:20 +0100 Subject: [PATCH 1685/2012] nexthop: Extract a common helper for parsing dump attributes Requests to dump nexthops have many attributes in common with those that requests to dump buckets of resilient NH groups will have. However, they have different policies. To allow reuse of this code, extract a policy-agnostic wrapper out of nh_valid_dump_req(), and convert this function into a thin wrapper around it. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index ad48e5d71bf99..1c4f10fe3b4eb 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -2015,22 +2015,13 @@ static bool nh_dump_filtered(struct nexthop *nh, return false; } -static int nh_valid_dump_req(const struct nlmsghdr *nlh, - struct nh_dump_filter *filter, - struct netlink_callback *cb) +static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb, + struct nh_dump_filter *filter, + struct netlink_ext_ack *extack) { - struct netlink_ext_ack *extack = cb->extack; - struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; struct nhmsg *nhm; - int err; u32 idx; - err = nlmsg_parse(nlh, sizeof(*nhm), tb, - ARRAY_SIZE(rtm_nh_policy_dump) - 1, - rtm_nh_policy_dump, NULL); - if (err < 0) - return err; - if (tb[NHA_OIF]) { idx = nla_get_u32(tb[NHA_OIF]); if (idx > INT_MAX) { @@ -2059,6 +2050,22 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, return 0; } +static int nh_valid_dump_req(const struct nlmsghdr *nlh, + struct nh_dump_filter *filter, + struct netlink_callback *cb) +{ + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; + int err; + + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_dump) - 1, + rtm_nh_policy_dump, cb->extack); + if (err < 0) + return err; + + return __nh_valid_dump_req(nlh, tb, filter, cb->extack); +} + /* rtnl */ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) { -- GitLab From a6fbbaa64c3b0e744e7e421a13658a7441f5a9f3 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:21 +0100 Subject: [PATCH 1686/2012] nexthop: Strongly-type context of rtm_dump_nexthop() The dump operations need to keep state from one invocation to another. A scratch area is dedicated for this purpose in the passed-in argument, cb, namely via two aliased arrays, struct netlink_callback.args and .ctx. Dumping of buckets will end up having to iterate over next hops as well, and it would be nice to be able to reuse the iteration logic with the NH dumper. The fact that the logic currently relies on fixed index to the .args array, and the indices would have to be coordinated between the two dumpers, makes this somewhat awkward. To make the access patters clearer, introduce a helper struct with a NH index, and instead of using the .args array directly, use it through this structure. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 1c4f10fe3b4eb..7ae197efa5a96 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -2066,9 +2066,23 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, return __nh_valid_dump_req(nlh, tb, filter, cb->extack); } +struct rtm_dump_nh_ctx { + u32 idx; +}; + +static struct rtm_dump_nh_ctx * +rtm_dump_nh_ctx(struct netlink_callback *cb) +{ + struct rtm_dump_nh_ctx *ctx = (void *)cb->ctx; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); + return ctx; +} + /* rtnl */ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) { + struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb); struct nhmsg *nhm = nlmsg_data(cb->nlh); struct net *net = sock_net(skb->sk); struct rb_root *root = &net->nexthop.rb_root; @@ -2081,7 +2095,7 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) return err; - s_idx = cb->args[0]; + s_idx = ctx->idx; for (node = rb_first(root); node; node = rb_next(node)) { struct nexthop *nh; @@ -2108,7 +2122,7 @@ cont: out: err = skb->len; out_err: - cb->args[0] = idx; + ctx->idx = idx; cb->seq = net->nexthop.seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -- GitLab From cbee18071e72b68d63b055655ac96ae97126776e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:22 +0100 Subject: [PATCH 1687/2012] nexthop: Extract a helper for walking the next-hop tree Extract from rtm_dump_nexthop() a helper to walk the next hop tree. A separate function for this will be reusable from the bucket dumper. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 52 +++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 7ae197efa5a96..e5175f531ffb0 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -2079,22 +2079,17 @@ rtm_dump_nh_ctx(struct netlink_callback *cb) return ctx; } -/* rtnl */ -static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) +static int rtm_dump_walk_nexthops(struct sk_buff *skb, + struct netlink_callback *cb, + struct rb_root *root, + struct rtm_dump_nh_ctx *ctx, + struct nh_dump_filter *filter) { - struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb); struct nhmsg *nhm = nlmsg_data(cb->nlh); - struct net *net = sock_net(skb->sk); - struct rb_root *root = &net->nexthop.rb_root; - struct nh_dump_filter filter = {}; struct rb_node *node; int idx = 0, s_idx; int err; - err = nh_valid_dump_req(cb->nlh, &filter, cb); - if (err < 0) - return err; - s_idx = ctx->idx; for (node = rb_first(root); node; node = rb_next(node)) { struct nexthop *nh; @@ -2103,29 +2098,48 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) goto cont; nh = rb_entry(node, struct nexthop, rb_node); - if (nh_dump_filtered(nh, &filter, nhm->nh_family)) + if (nh_dump_filtered(nh, filter, nhm->nh_family)) goto cont; + ctx->idx = idx; err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); - if (err < 0) { - if (likely(skb->len)) - goto out; - - goto out_err; - } + if (err < 0) + return err; cont: idx++; } + ctx->idx = idx; + return 0; +} + +/* rtnl */ +static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb); + struct net *net = sock_net(skb->sk); + struct rb_root *root = &net->nexthop.rb_root; + struct nh_dump_filter filter = {}; + int err; + + err = nh_valid_dump_req(cb->nlh, &filter, cb); + if (err < 0) + return err; + + err = rtm_dump_walk_nexthops(skb, cb, root, ctx, &filter); + if (err < 0) { + if (likely(skb->len)) + goto out; + goto out_err; + } + out: err = skb->len; out_err: - ctx->idx = idx; cb->seq = net->nexthop.seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); - return err; } -- GitLab From e948217d258f35b248578f7b400d6df23ac562da Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:23 +0100 Subject: [PATCH 1688/2012] nexthop: Add a callback parameter to rtm_dump_walk_nexthops() In order to allow different handling for next-hop tree dumper and for bucket dumper, parameterize the next-hop tree walker with a callback. Add rtm_dump_nexthop_cb() with just the bits relevant for next-hop tree dumping. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e5175f531ffb0..9536cf2f6aca0 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -2083,9 +2083,11 @@ static int rtm_dump_walk_nexthops(struct sk_buff *skb, struct netlink_callback *cb, struct rb_root *root, struct rtm_dump_nh_ctx *ctx, - struct nh_dump_filter *filter) + int (*nh_cb)(struct sk_buff *skb, + struct netlink_callback *cb, + struct nexthop *nh, void *data), + void *data) { - struct nhmsg *nhm = nlmsg_data(cb->nlh); struct rb_node *node; int idx = 0, s_idx; int err; @@ -2098,14 +2100,9 @@ static int rtm_dump_walk_nexthops(struct sk_buff *skb, goto cont; nh = rb_entry(node, struct nexthop, rb_node); - if (nh_dump_filtered(nh, filter, nhm->nh_family)) - goto cont; - ctx->idx = idx; - err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI); - if (err < 0) + err = nh_cb(skb, cb, nh, data); + if (err) return err; cont: idx++; @@ -2115,6 +2112,20 @@ cont: return 0; } +static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb, + struct nexthop *nh, void *data) +{ + struct nhmsg *nhm = nlmsg_data(cb->nlh); + struct nh_dump_filter *filter = data; + + if (nh_dump_filtered(nh, filter, nhm->nh_family)) + return 0; + + return nh_fill_node(skb, nh, RTM_NEWNEXTHOP, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI); +} + /* rtnl */ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) { @@ -2128,7 +2139,8 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) return err; - err = rtm_dump_walk_nexthops(skb, cb, root, ctx, &filter); + err = rtm_dump_walk_nexthops(skb, cb, root, ctx, + &rtm_dump_nexthop_cb, &filter); if (err < 0) { if (likely(skb->len)) goto out; -- GitLab From 0bccf8ed8aa6ecdd12cd2b3b0a73ff2c4c88d62b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:24 +0100 Subject: [PATCH 1689/2012] nexthop: Extract a helper for validation of get/del RTNL requests Validation of messages for get / del of a next hop is the same as will be validation of messages for get of a resilient next hop group bucket. The difference is that policy for resilient next hop group buckets is a superset of that used for next-hop get. It is therefore possible to reuse the code that validates the nhmsg fields, extracts the next-hop ID, and validates that. To that end, extract from nh_valid_get_del_req() a helper __nh_valid_get_del_req() that does just that. Make the nlh argument const so that the function can be called from the dump context, which only has a const nlh. Propagate the constness to nh_valid_get_del_req(). Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 9536cf2f6aca0..f1c6cbdb9e436 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1872,37 +1872,44 @@ static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } -static int nh_valid_get_del_req(struct nlmsghdr *nlh, u32 *id, - struct netlink_ext_ack *extack) +static int __nh_valid_get_del_req(const struct nlmsghdr *nlh, + struct nlattr **tb, u32 *id, + struct netlink_ext_ack *extack) { struct nhmsg *nhm = nlmsg_data(nlh); - struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)]; - int err; - - err = nlmsg_parse(nlh, sizeof(*nhm), tb, - ARRAY_SIZE(rtm_nh_policy_get) - 1, - rtm_nh_policy_get, extack); - if (err < 0) - return err; - err = -EINVAL; if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { NL_SET_ERR_MSG(extack, "Invalid values in header"); - goto out; + return -EINVAL; } if (!tb[NHA_ID]) { NL_SET_ERR_MSG(extack, "Nexthop id is missing"); - goto out; + return -EINVAL; } *id = nla_get_u32(tb[NHA_ID]); - if (!(*id)) + if (!(*id)) { NL_SET_ERR_MSG(extack, "Invalid nexthop id"); - else - err = 0; -out: - return err; + return -EINVAL; + } + + return 0; +} + +static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)]; + int err; + + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_get) - 1, + rtm_nh_policy_get, extack); + if (err < 0) + return err; + + return __nh_valid_get_del_req(nlh, tb, id, extack); } /* rtnl */ -- GitLab From 1d3f9bb1be8556bce237934ae82b3cdeda3242fd Mon Sep 17 00:00:00 2001 From: dingsenjie Date: Wed, 27 Jan 2021 10:28:01 +0800 Subject: [PATCH 1690/2012] linux/qed: fix spelling typo in qed_chain.h allocted -> allocated Signed-off-by: dingsenjie Link: https://lore.kernel.org/r/20210127022801.8028-1-dingsenjie@163.com Signed-off-by: Jakub Kicinski --- include/linux/qed/qed_chain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 4d58dc8943f09..e339b48de32dd 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -470,7 +470,7 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain) /** * @brief qed_chain_reset - Resets the chain to its start state * - * @param p_chain pointer to a previously allocted chain + * @param p_chain pointer to a previously allocated chain */ static inline void qed_chain_reset(struct qed_chain *p_chain) { -- GitLab From 46eb3c108fe1744d0a6abfda69ef8c1d4f0e92d4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 27 Jan 2021 12:13:59 -0600 Subject: [PATCH 1691/2012] octeontx2-af: Fix 'physical' typos Fix misspellings of "physical". Signed-off-by: Bjorn Helgaas Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20210127181359.3008316-1-helgaas@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 2 +- drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c | 2 +- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 0b6bf9f0c6f0f..4ef7fc8bbb197 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -646,7 +646,7 @@ setup_vfmsix: } /* HW interprets RVU_AF_MSIXTR_BASE address as an IOVA, hence - * create a IOMMU mapping for the physcial address configured by + * create an IOMMU mapping for the physical address configured by * firmware and reconfig RVU_AF_MSIXTR_BASE with IOVA. */ cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_CONST); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index d6b5bf247e31b..0dbbf38e05975 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Marvell OcteonTx2 RVU Physcial Function ethernet driver +/* Marvell OcteonTx2 RVU Physical Function ethernet driver * * Copyright (C) 2020 Marvell. */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 634d60655a74a..07ec85aebcca9 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Marvell OcteonTx2 RVU Physcial Function ethernet driver +/* Marvell OcteonTx2 RVU Physical Function ethernet driver * * Copyright (C) 2020 Marvell International Ltd. * -- GitLab From 28eb119c042e8d3420b577b5b3ea851a111e7b2d Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 15 Oct 2020 23:06:31 +0200 Subject: [PATCH 1692/2012] can: mcp251xfd: mcp251xfd_probe(): fix errata reference This patch fixes the reference to the errata for both the mcp2517fd and the mcp2518fd. Fixes: f5b84dedf7eb ("can: mcp25xxfd: mcp25xxfd_probe(): add SPI clk limit related errata information") Link: https://lore.kernel.org/r/20210128104644.2982125-2-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 3638b474d86b9..0152274223729 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2932,7 +2932,7 @@ static int mcp251xfd_probe(struct spi_device *spi) spi_get_device_id(spi)->driver_data; /* Errata Reference: - * mcp2517fd: DS80000789B, mcp2518fd: DS80000792C 4. + * mcp2517fd: DS80000792C 5., mcp2518fd: DS80000789C 4. * * The SPI can write corrupted data to the RAM at fast SPI * speeds: -- GitLab From b98e68e91cded8f0f905010e15f701b8e2539a5f Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 15 Oct 2020 23:06:31 +0200 Subject: [PATCH 1693/2012] can: mcp251xfd: mcp251xfd_probe(): sort errata table alphabetically, fix indention This patch sorts the errata table alphabetically and fixes the indention. Link: https://lore.kernel.org/r/20210128104644.2982125-3-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 0152274223729..c6ccd00541523 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2953,10 +2953,10 @@ static int mcp251xfd_probe(struct spi_device *spi) * 2518 20 MHz allwinner,sun8i-h3 allwinner,sun8i-h3-spi 9375000 Hz 93.75% 600000000 Hz bad assigned-clocks = <&ccu CLK_SPIx> * 2518 40 MHz allwinner,sun8i-h3 allwinner,sun8i-h3-spi 16666667 Hz 83.33% 600000000 Hz good assigned-clocks = <&ccu CLK_SPIx> * 2518 40 MHz allwinner,sun8i-h3 allwinner,sun8i-h3-spi 18750000 Hz 93.75% 600000000 Hz bad assigned-clocks = <&ccu CLK_SPIx> + * 2517 40 MHz atmel,sama5d27 atmel,at91rm9200-spi 16400000 Hz 82.00% 82000000 Hz good default + * 2518 40 MHz atmel,sama5d27 atmel,at91rm9200-spi 16400000 Hz 82.00% 82000000 Hz good default * 2517 20 MHz fsl,imx8mm fsl,imx51-ecspi 8333333 Hz 83.33% 16666667 Hz good assigned-clocks = <&clk IMX8MM_CLK_ECSPIx_ROOT> * 2517 20 MHz fsl,imx8mm fsl,imx51-ecspi 9523809 Hz 95.34% 28571429 Hz bad assigned-clocks = <&clk IMX8MM_CLK_ECSPIx_ROOT> - * 2517 40 MHz atmel,sama5d27 atmel,at91rm9200-spi 16400000 Hz 82.00% 82000000 Hz good default - * 2518 40 MHz atmel,sama5d27 atmel,at91rm9200-spi 16400000 Hz 82.00% 82000000 Hz good default * */ priv->spi_max_speed_hz_orig = spi->max_speed_hz; -- GitLab From 01b2a0e5a0414eaa805e0ed550b1f735ccf309ca Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 15 Oct 2020 23:06:31 +0200 Subject: [PATCH 1694/2012] can: mcp251xfd: mcp251xfd_probe(): remove known bad combinations from errata tabe The published errata specify the maximum allowed SPI frequency to be max 85% of (FSYSCLK/2). So there's no need to track known bad clock settings in the driver. As the setup of known good values is a bit tricky, keep them. Link: https://lore.kernel.org/r/20210128104644.2982125-4-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index c6ccd00541523..d51d11fd7064d 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2945,18 +2945,15 @@ static int mcp251xfd_probe(struct spi_device *spi) * Ensure that FSCK is less than or equal to 0.85 * * (FSYSCLK/2). * - * Known good and bad combinations are: + * Known good combinations are: * - * MCP ext-clk SoC SPI SPI-clk max-clk parent-clk Status config + * MCP ext-clk SoC SPI SPI-clk max-clk parent-clk config * - * 2518 20 MHz allwinner,sun8i-h3 allwinner,sun8i-h3-spi 8333333 Hz 83.33% 600000000 Hz good assigned-clocks = <&ccu CLK_SPIx> - * 2518 20 MHz allwinner,sun8i-h3 allwinner,sun8i-h3-spi 9375000 Hz 93.75% 600000000 Hz bad assigned-clocks = <&ccu CLK_SPIx> - * 2518 40 MHz allwinner,sun8i-h3 allwinner,sun8i-h3-spi 16666667 Hz 83.33% 600000000 Hz good assigned-clocks = <&ccu CLK_SPIx> - * 2518 40 MHz allwinner,sun8i-h3 allwinner,sun8i-h3-spi 18750000 Hz 93.75% 600000000 Hz bad assigned-clocks = <&ccu CLK_SPIx> - * 2517 40 MHz atmel,sama5d27 atmel,at91rm9200-spi 16400000 Hz 82.00% 82000000 Hz good default - * 2518 40 MHz atmel,sama5d27 atmel,at91rm9200-spi 16400000 Hz 82.00% 82000000 Hz good default - * 2517 20 MHz fsl,imx8mm fsl,imx51-ecspi 8333333 Hz 83.33% 16666667 Hz good assigned-clocks = <&clk IMX8MM_CLK_ECSPIx_ROOT> - * 2517 20 MHz fsl,imx8mm fsl,imx51-ecspi 9523809 Hz 95.34% 28571429 Hz bad assigned-clocks = <&clk IMX8MM_CLK_ECSPIx_ROOT> + * 2518 20 MHz allwinner,sun8i-h3 allwinner,sun8i-h3-spi 8333333 Hz 83.33% 600000000 Hz assigned-clocks = <&ccu CLK_SPIx> + * 2518 40 MHz allwinner,sun8i-h3 allwinner,sun8i-h3-spi 16666667 Hz 83.33% 600000000 Hz assigned-clocks = <&ccu CLK_SPIx> + * 2517 40 MHz atmel,sama5d27 atmel,at91rm9200-spi 16400000 Hz 82.00% 82000000 Hz default + * 2518 40 MHz atmel,sama5d27 atmel,at91rm9200-spi 16400000 Hz 82.00% 82000000 Hz default + * 2517 20 MHz fsl,imx8mm fsl,imx51-ecspi 8333333 Hz 83.33% 16666667 Hz assigned-clocks = <&clk IMX8MM_CLK_ECSPIx_ROOT> * */ priv->spi_max_speed_hz_orig = spi->max_speed_hz; -- GitLab From 9f1fbc1c9c7c52d05a62aba092e7e3f274dbb931 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 15 Oct 2020 23:06:31 +0200 Subject: [PATCH 1695/2012] can: mcp251xfd: mcp251xfd_probe(): add imx6 to errata table This patch adds an imx6 as known good to the errata table. Link: https://lore.kernel.org/r/20210128104644.2982125-5-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index d51d11fd7064d..f76cc62cbac40 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2953,6 +2953,7 @@ static int mcp251xfd_probe(struct spi_device *spi) * 2518 40 MHz allwinner,sun8i-h3 allwinner,sun8i-h3-spi 16666667 Hz 83.33% 600000000 Hz assigned-clocks = <&ccu CLK_SPIx> * 2517 40 MHz atmel,sama5d27 atmel,at91rm9200-spi 16400000 Hz 82.00% 82000000 Hz default * 2518 40 MHz atmel,sama5d27 atmel,at91rm9200-spi 16400000 Hz 82.00% 82000000 Hz default + * 2518 40 MHz fsl,imx6dl fsl,imx51-ecspi 15000000 Hz 75.00% 30000000 Hz default * 2517 20 MHz fsl,imx8mm fsl,imx51-ecspi 8333333 Hz 83.33% 16666667 Hz assigned-clocks = <&clk IMX8MM_CLK_ECSPIx_ROOT> * */ -- GitLab From f93486a79aa651fb10bd3079019525943cc37f69 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Thu, 22 Oct 2020 23:14:54 +0200 Subject: [PATCH 1696/2012] can: mcp251xfd: unify error messages and commets This patch unifies the error messages: - have a "." and the end of each message - write controller with a small "c", if not the first word of an error message. Link: https://lore.kernel.org/r/20210128104644.2982125-6-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index f76cc62cbac40..5c7fdf31dce80 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -651,7 +651,7 @@ static int mcp251xfd_chip_softreset_check(const struct mcp251xfd_priv *priv) if (osc != osc_reference) { netdev_info(priv->ndev, - "Controller failed to reset. osc=0x%08x, reference value=0x%08x\n", + "Controller failed to reset. osc=0x%08x, reference value=0x%08x.\n", osc, osc_reference); return -ETIMEDOUT; } @@ -666,7 +666,7 @@ static int mcp251xfd_chip_softreset(const struct mcp251xfd_priv *priv) for (i = 0; i < MCP251XFD_SOFTRESET_RETRIES_MAX; i++) { if (i) netdev_info(priv->ndev, - "Retrying to reset Controller.\n"); + "Retrying to reset controller.\n"); err = mcp251xfd_chip_softreset_do(priv); if (err == -ETIMEDOUT) @@ -1239,7 +1239,7 @@ mcp251xfd_handle_tefif_recover(const struct mcp251xfd_priv *priv, const u32 seq) } netdev_info(priv->ndev, - "Transmit Event FIFO buffer %s. (seq=0x%08x, tef_tail=0x%08x, tef_head=0x%08x, tx_head=0x%08x)\n", + "Transmit Event FIFO buffer %s. (seq=0x%08x, tef_tail=0x%08x, tef_head=0x%08x, tx_head=0x%08x).\n", tef_sta & MCP251XFD_REG_TEFSTA_TEFFIF ? "full" : tef_sta & MCP251XFD_REG_TEFSTA_TEFNEIF ? "not empty" : "empty", @@ -1891,7 +1891,7 @@ mcp251xfd_handle_modif(const struct mcp251xfd_priv *priv, bool *set_normal_mode) "Controller changed into %s Mode (%u).\n", mcp251xfd_get_mode_str(mode), mode); - /* After the application requests Normal mode, the Controller + /* After the application requests Normal mode, the controller * will automatically attempt to retransmit the message that * caused the TX MAB underflow. * -- GitLab From 49ffacbc4cd974bf2765af8640500ed7f1f93bff Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Tue, 20 Oct 2020 17:10:14 +0200 Subject: [PATCH 1697/2012] can: mcp251xfd: add missing _MASK postfix to MCP251XFD_OBJ_FLAGS_DLC As MCP251XFD_OBJ_FLAGS_DLC is a mask, add the missing _MASK postfix, that all other masks in the driver have. Link: https://lore.kernel.org/r/20210128104644.2982125-7-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 4 ++-- drivers/net/can/spi/mcp251xfd/mcp251xfd.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 5c7fdf31dce80..04870a741792d 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1465,7 +1465,7 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv, hw_rx_obj->id); } - dlc = FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC, hw_rx_obj->flags); + dlc = FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC_MASK, hw_rx_obj->flags); /* CANFD */ if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_FDF) { @@ -2359,7 +2359,7 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv, priv->can.ctrlmode); } - flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_DLC, dlc); + flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_DLC_MASK, dlc); load_buf = &tx_obj->buf; if (priv->devtype_data.quirks & MCP251XFD_QUIRK_CRC_TX) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h index cb6398c2a5606..480bd4480bdf5 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h @@ -305,7 +305,7 @@ #define MCP251XFD_OBJ_FLAGS_BRS BIT(6) #define MCP251XFD_OBJ_FLAGS_RTR BIT(5) #define MCP251XFD_OBJ_FLAGS_IDE BIT(4) -#define MCP251XFD_OBJ_FLAGS_DLC GENMASK(3, 0) +#define MCP251XFD_OBJ_FLAGS_DLC_MASK GENMASK(3, 0) #define MCP251XFD_REG_FRAME_EFF_SID_MASK GENMASK(28, 18) #define MCP251XFD_REG_FRAME_EFF_EID_MASK GENMASK(17, 0) -- GitLab From dfe99ba29e6283d0533ca9bf5135c5a6c0f7a952 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 16 Oct 2020 20:49:30 +0200 Subject: [PATCH 1698/2012] can: mcp251xfd: mcp251xfd_chip_clock_enable(): simplify return This patch simplifies the return of the mcp251xfd_chip_clock_enable() function by direct returning the error. Link: https://lore.kernel.org/r/20210128104644.2982125-8-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 04870a741792d..da5e8e4164403 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -596,11 +596,9 @@ static int mcp251xfd_chip_clock_enable(const struct mcp251xfd_priv *priv) "Timeout waiting for Oscillator Ready (osc=0x%08x, osc_reference=0x%08x)\n", osc, osc_reference); return -ETIMEDOUT; - } else if (err) { - return err; } - return 0; + return err; } static int mcp251xfd_chip_softreset_do(const struct mcp251xfd_priv *priv) -- GitLab From cf8ee6de2543a0fa6d9471ddbb7216464a9681a1 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 16 Oct 2020 18:03:59 +0200 Subject: [PATCH 1699/2012] can: mcp251xfd: mcp251xfd_probe(): use dev_err_probe() to simplify error handling dev_err_probe() can reduce code size, uniform error handling and record the defer probe reason etc., use it to simplify the code. Link: https://lore.kernel.org/r/20210128104644.2982125-9-mkl@pengutronix.de Signed-off-by: Marc Kleine-Budde --- .../net/can/spi/mcp251xfd/mcp251xfd-core.c | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index da5e8e4164403..3c5b92911d469 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2849,32 +2849,28 @@ static int mcp251xfd_probe(struct spi_device *spi) rx_int = devm_gpiod_get_optional(&spi->dev, "microchip,rx-int", GPIOD_IN); - if (PTR_ERR(rx_int) == -EPROBE_DEFER) - return -EPROBE_DEFER; - else if (IS_ERR(rx_int)) - return PTR_ERR(rx_int); + if (IS_ERR(rx_int)) + return dev_err_probe(&spi->dev, PTR_ERR(rx_int), + "Failed to get RX-INT!\n"); reg_vdd = devm_regulator_get_optional(&spi->dev, "vdd"); - if (PTR_ERR(reg_vdd) == -EPROBE_DEFER) - return -EPROBE_DEFER; - else if (PTR_ERR(reg_vdd) == -ENODEV) + if (PTR_ERR(reg_vdd) == -ENODEV) reg_vdd = NULL; else if (IS_ERR(reg_vdd)) - return PTR_ERR(reg_vdd); + return dev_err_probe(&spi->dev, PTR_ERR(reg_vdd), + "Failed to get VDD regulator!\n"); reg_xceiver = devm_regulator_get_optional(&spi->dev, "xceiver"); - if (PTR_ERR(reg_xceiver) == -EPROBE_DEFER) - return -EPROBE_DEFER; - else if (PTR_ERR(reg_xceiver) == -ENODEV) + if (PTR_ERR(reg_xceiver) == -ENODEV) reg_xceiver = NULL; else if (IS_ERR(reg_xceiver)) - return PTR_ERR(reg_xceiver); + return dev_err_probe(&spi->dev, PTR_ERR(reg_xceiver), + "Failed to get Transceiver regulator!\n"); clk = devm_clk_get(&spi->dev, NULL); - if (IS_ERR(clk)) { - dev_err(&spi->dev, "No Oscillator (clock) defined.\n"); - return PTR_ERR(clk); - } + if (IS_ERR(clk)) + dev_err_probe(&spi->dev, PTR_ERR(clk), + "Failed to get Oscillator (clock)!\n"); freq = clk_get_rate(clk); /* Sanity check */ -- GitLab From 1074f8ec288f537f3b8462d09997a69b40f87e38 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 29 Jan 2021 15:00:23 +0100 Subject: [PATCH 1700/2012] clang-format: Update with the latest for_each macro list Re-run the shell fragment that generated the original list. Signed-off-by: Miguel Ojeda --- .clang-format | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.clang-format b/.clang-format index 10dc5a9a61b3e..01a341ceec6c0 100644 --- a/.clang-format +++ b/.clang-format @@ -122,6 +122,7 @@ ForEachMacros: - 'drm_for_each_bridge_in_chain' - 'drm_for_each_connector_iter' - 'drm_for_each_crtc' + - 'drm_for_each_crtc_reverse' - 'drm_for_each_encoder' - 'drm_for_each_encoder_mask' - 'drm_for_each_fb' @@ -203,14 +204,13 @@ ForEachMacros: - 'for_each_matching_node' - 'for_each_matching_node_and_match' - 'for_each_member' - - 'for_each_mem_region' - - 'for_each_memblock_type' - 'for_each_memcg_cache_index' - 'for_each_mem_pfn_range' - '__for_each_mem_range' - 'for_each_mem_range' - '__for_each_mem_range_rev' - 'for_each_mem_range_rev' + - 'for_each_mem_region' - 'for_each_migratetype_order' - 'for_each_msi_entry' - 'for_each_msi_entry_safe' @@ -276,10 +276,8 @@ ForEachMacros: - 'for_each_reserved_mem_range' - 'for_each_reserved_mem_region' - 'for_each_rtd_codec_dais' - - 'for_each_rtd_codec_dais_rollback' - 'for_each_rtd_components' - 'for_each_rtd_cpu_dais' - - 'for_each_rtd_cpu_dais_rollback' - 'for_each_rtd_dais' - 'for_each_set_bit' - 'for_each_set_bit_from' @@ -298,6 +296,7 @@ ForEachMacros: - '__for_each_thread' - 'for_each_thread' - 'for_each_unicast_dest_pgid' + - 'for_each_vsi' - 'for_each_wakeup_source' - 'for_each_zone' - 'for_each_zone_zonelist' @@ -330,6 +329,7 @@ ForEachMacros: - 'hlist_for_each_entry_rcu_bh' - 'hlist_for_each_entry_rcu_notrace' - 'hlist_for_each_entry_safe' + - 'hlist_for_each_entry_srcu' - '__hlist_for_each_rcu' - 'hlist_for_each_safe' - 'hlist_nulls_for_each_entry' @@ -378,6 +378,7 @@ ForEachMacros: - 'list_for_each_entry_safe_continue' - 'list_for_each_entry_safe_from' - 'list_for_each_entry_safe_reverse' + - 'list_for_each_entry_srcu' - 'list_for_each_prev' - 'list_for_each_prev_safe' - 'list_for_each_safe' @@ -411,6 +412,8 @@ ForEachMacros: - 'of_property_for_each_string' - 'of_property_for_each_u32' - 'pci_bus_for_each_resource' + - 'pcl_for_each_chunk' + - 'pcl_for_each_segment' - 'pcm_for_each_format' - 'ping_portaddr_for_each_entry' - 'plist_for_each' -- GitLab From cd92cdb9c8bcfc27a8f28bcbf7c414a0ea79e5ec Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 29 Jan 2021 23:47:25 +0900 Subject: [PATCH 1701/2012] null_blk: cleanup zoned mode initialization To avoid potential compilation problems, replaced the badly written MB_TO_SECTS() macro (missing parenthesis around the argument use) with the inline function mb_to_sects(). And while at it, simplify the calculation of the total number of zones of the device using the round_up() macro. Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- drivers/block/null_blk/zoned.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 148b871f263ba..fce0a54df0e5f 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -6,7 +6,10 @@ #define CREATE_TRACE_POINTS #include "trace.h" -#define MB_TO_SECTS(mb) (((sector_t)mb * SZ_1M) >> SECTOR_SHIFT) +static inline sector_t mb_to_sects(unsigned long mb) +{ + return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT; +} static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect) { @@ -77,12 +80,11 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) return -EINVAL; } - zone_capacity_sects = MB_TO_SECTS(dev->zone_capacity); - dev_capacity_sects = MB_TO_SECTS(dev->size); - dev->zone_size_sects = MB_TO_SECTS(dev->zone_size); - dev->nr_zones = dev_capacity_sects >> ilog2(dev->zone_size_sects); - if (dev_capacity_sects & (dev->zone_size_sects - 1)) - dev->nr_zones++; + zone_capacity_sects = mb_to_sects(dev->zone_capacity); + dev_capacity_sects = mb_to_sects(dev->size); + dev->zone_size_sects = mb_to_sects(dev->zone_size); + dev->nr_zones = round_up(dev_capacity_sects, dev->zone_size_sects) + >> ilog2(dev->zone_size_sects); dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct nullb_zone), GFP_KERNEL | __GFP_ZERO); -- GitLab From a9cbbb80e3e7dd38ceac166e0698f161862a18ae Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 29 Jan 2021 12:28:20 -0800 Subject: [PATCH 1702/2012] tty: avoid using vfs_iocb_iter_write() for redirected console writes It turns out that the vfs_iocb_iter_{read,write}() functions are entirely broken, and don't actually use the passed-in file pointer for IO - only for the preparatory work (permission checking and for the write_iter function lookup). That worked fine for overlayfs, which always builds the new iocb with the same file pointer that it passes in, but in the general case it ends up doing nonsensical things (and could cause an iterator call that doesn't even match the passed-in file pointer). This subtly broke the tty conversion to write_iter in commit 9bb48c82aced ("tty: implement write_iter"), because the console redirection didn't actually end up redirecting anything, since the passed-in file pointer was basically ignored, and the actual write was done with the original non-redirected console tty after all. The main visible effect of this is that the console messages were no longer logged to /var/log/boot.log during graphical boot. Fix the issue by simply not using the vfs write "helper" function at all, and just redirecting the write entirely internally to the tty layer. Do the target writability permission checks when actually registering the target tty with TIOCCONS instead of at write time. Fixes: 9bb48c82aced ("tty: implement write_iter") Reported-and-tested-by: Hans de Goede Cc: Greg Kroah-Hartman Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- drivers/tty/tty_io.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 48de20916ca78..816e709afa561 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1026,9 +1026,8 @@ void tty_write_message(struct tty_struct *tty, char *msg) * write method will not be invoked in parallel for each device. */ -static ssize_t tty_write(struct kiocb *iocb, struct iov_iter *from) +static ssize_t file_tty_write(struct file *file, struct kiocb *iocb, struct iov_iter *from) { - struct file *file = iocb->ki_filp; struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; ssize_t ret; @@ -1051,6 +1050,11 @@ static ssize_t tty_write(struct kiocb *iocb, struct iov_iter *from) return ret; } +static ssize_t tty_write(struct kiocb *iocb, struct iov_iter *from) +{ + return file_tty_write(iocb->ki_filp, iocb, from); +} + ssize_t redirected_tty_write(struct kiocb *iocb, struct iov_iter *iter) { struct file *p = NULL; @@ -1060,9 +1064,13 @@ ssize_t redirected_tty_write(struct kiocb *iocb, struct iov_iter *iter) p = get_file(redirect); spin_unlock(&redirect_lock); + /* + * We know the redirected tty is just another tty, we can can + * call file_tty_write() directly with that file pointer. + */ if (p) { ssize_t res; - res = vfs_iocb_iter_write(p, iocb, iter); + res = file_tty_write(p, iocb, iter); fput(p); return res; } @@ -2308,6 +2316,12 @@ static int tioccons(struct file *file) fput(f); return 0; } + if (file->f_op->write_iter != tty_write) + return -ENOTTY; + if (!(file->f_mode & FMODE_WRITE)) + return -EBADF; + if (!(file->f_mode & FMODE_CAN_WRITE)) + return -EINVAL; spin_lock(&redirect_lock); if (redirect) { spin_unlock(&redirect_lock); -- GitLab From 1bea2a937dadd188de70198b0cf3915e05a506e4 Mon Sep 17 00:00:00 2001 From: David Gow Date: Tue, 26 Jan 2021 19:36:04 -0800 Subject: [PATCH 1703/2012] soc: litex: Properly depend on HAS_IOMEM The LiteX SOC controller driver makes use of IOMEM functions like devm_platform_ioremap_resource(), which are only available if CONFIG_HAS_IOMEM is defined. This causes the driver to be enable under make ARCH=um allyesconfig, even though it won't build. By adding a dependency on HAS_IOMEM, the driver will not be enabled on architectures which don't support it. Fixes: 22447a99c97e ("drivers/soc/litex: add LiteX SoC Controller driver") Signed-off-by: David Gow [shorne@gmail.com: Fix typo in commit message pointed out in review] Signed-off-by: Stafford Horne --- drivers/soc/litex/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/soc/litex/Kconfig b/drivers/soc/litex/Kconfig index 7c6b009b6f6cf..7a7c38282e114 100644 --- a/drivers/soc/litex/Kconfig +++ b/drivers/soc/litex/Kconfig @@ -8,6 +8,7 @@ config LITEX config LITEX_SOC_CONTROLLER tristate "Enable LiteX SoC Controller driver" depends on OF || COMPILE_TEST + depends on HAS_IOMEM select LITEX help This option enables the SoC Controller Driver which verifies -- GitLab From 32ada6b0980d86133d080d62371a5787ea2ec5ed Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 28 Jan 2021 13:45:15 -0600 Subject: [PATCH 1704/2012] dt-bindings: Cleanup standard unit properties Properties with standard unit suffixes already have a type and don't need type definitions. They also default to a single entry, so 'maxItems: 1' can be dropped. adi,ad5758 is an oddball which defined an enum of arrays. While a valid schema, it is simpler as a whole to only define scalar constraints. Cc: Jean Delvare Cc: Lars-Peter Clausen Cc: Dmitry Torokhov Cc: Ulf Hansson Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Serge Semin Cc: linux-hwmon@vger.kernel.org Cc: linux-i2c@vger.kernel.org Cc: linux-iio@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-input@vger.kernel.org Cc: linux-mmc@vger.kernel.org Cc: netdev@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: linux-rtc@vger.kernel.org Cc: linux-serial@vger.kernel.org Cc: alsa-devel@alsa-project.org Cc: linux-watchdog@vger.kernel.org Signed-off-by: Rob Herring Acked-by: Guenter Roeck Acked-by: Mark Brown Acked-by: Wolfram Sang # for I2C Acked-by: Alexandre Belloni Acked-by: Greg Kroah-Hartman Acked-by: Sebastian Reichel # for power-supply Acked-by: Jonathan Cameron #for-iio Acked-by: Alexandre TORGUE Link: https://lore.kernel.org/r/20210128194515.743252-1-robh@kernel.org --- .../devicetree/bindings/arm/cpus.yaml | 1 - .../bindings/extcon/wlf,arizona.yaml | 1 - .../bindings/hwmon/adi,ltc2947.yaml | 1 - .../bindings/hwmon/baikal,bt1-pvt.yaml | 8 ++-- .../devicetree/bindings/hwmon/ti,tmp513.yaml | 1 - .../devicetree/bindings/i2c/i2c-gpio.yaml | 2 - .../bindings/i2c/snps,designware-i2c.yaml | 3 -- .../bindings/iio/adc/maxim,max9611.yaml | 1 - .../bindings/iio/adc/st,stm32-adc.yaml | 1 - .../bindings/iio/adc/ti,palmas-gpadc.yaml | 2 - .../bindings/iio/dac/adi,ad5758.yaml | 41 ++++++++++++------- .../bindings/iio/health/maxim,max30100.yaml | 1 - .../input/touchscreen/touchscreen.yaml | 2 - .../bindings/mmc/mmc-controller.yaml | 1 - .../bindings/mmc/mmc-pwrseq-simple.yaml | 2 - .../bindings/net/ethernet-controller.yaml | 2 - .../devicetree/bindings/net/snps,dwmac.yaml | 1 - .../bindings/power/supply/battery.yaml | 3 -- .../bindings/power/supply/bq2515x.yaml | 1 - .../bindings/regulator/dlg,da9121.yaml | 1 - .../bindings/regulator/fixed-regulator.yaml | 2 - .../devicetree/bindings/rtc/rtc.yaml | 2 - .../devicetree/bindings/serial/pl011.yaml | 2 - .../devicetree/bindings/sound/sgtl5000.yaml | 2 - .../bindings/watchdog/watchdog.yaml | 1 - 25 files changed, 29 insertions(+), 56 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml index 14cd727d3c4b7..f02fd10de604b 100644 --- a/Documentation/devicetree/bindings/arm/cpus.yaml +++ b/Documentation/devicetree/bindings/arm/cpus.yaml @@ -232,7 +232,6 @@ properties: by this cpu (see ./idle-states.yaml). capacity-dmips-mhz: - $ref: '/schemas/types.yaml#/definitions/uint32' description: u32 value representing CPU capacity (see ./cpu-capacity.txt) in DMIPS/MHz, relative to highest capacity-dmips-mhz diff --git a/Documentation/devicetree/bindings/extcon/wlf,arizona.yaml b/Documentation/devicetree/bindings/extcon/wlf,arizona.yaml index 5fe784f487c5e..efdf59abb2e18 100644 --- a/Documentation/devicetree/bindings/extcon/wlf,arizona.yaml +++ b/Documentation/devicetree/bindings/extcon/wlf,arizona.yaml @@ -85,7 +85,6 @@ properties: wlf,micd-timeout-ms: description: Timeout for microphone detection, specified in milliseconds. - $ref: "/schemas/types.yaml#/definitions/uint32" wlf,micd-force-micbias: description: diff --git a/Documentation/devicetree/bindings/hwmon/adi,ltc2947.yaml b/Documentation/devicetree/bindings/hwmon/adi,ltc2947.yaml index eef614962b103..bf04151b63d2b 100644 --- a/Documentation/devicetree/bindings/hwmon/adi,ltc2947.yaml +++ b/Documentation/devicetree/bindings/hwmon/adi,ltc2947.yaml @@ -49,7 +49,6 @@ properties: description: This property controls the Accumulation Dead band which allows to set the level of current below which no accumulation takes place. - $ref: /schemas/types.yaml#/definitions/uint32 maximum: 255 default: 0 diff --git a/Documentation/devicetree/bindings/hwmon/baikal,bt1-pvt.yaml b/Documentation/devicetree/bindings/hwmon/baikal,bt1-pvt.yaml index 00a6511354e6c..5d3ce641fcdeb 100644 --- a/Documentation/devicetree/bindings/hwmon/baikal,bt1-pvt.yaml +++ b/Documentation/devicetree/bindings/hwmon/baikal,bt1-pvt.yaml @@ -73,11 +73,9 @@ properties: description: | Temperature sensor trimming factor. It can be used to manually adjust the temperature measurements within 7.130 degrees Celsius. - maxItems: 1 - items: - default: 0 - minimum: 0 - maximum: 7130 + default: 0 + minimum: 0 + maximum: 7130 additionalProperties: false diff --git a/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml b/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml index 8020d739a078e..1502b22c77ccb 100644 --- a/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml +++ b/Documentation/devicetree/bindings/hwmon/ti,tmp513.yaml @@ -52,7 +52,6 @@ properties: ti,bus-range-microvolt: description: | This is the operating range of the bus voltage in microvolt - $ref: /schemas/types.yaml#/definitions/uint32 enum: [16000000, 32000000] default: 32000000 diff --git a/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml b/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml index cc3aa2a5e70bf..ff99344788ab8 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml @@ -39,11 +39,9 @@ properties: i2c-gpio,delay-us: description: delay between GPIO operations (may depend on each platform) - $ref: /schemas/types.yaml#/definitions/uint32 i2c-gpio,timeout-ms: description: timeout to get data - $ref: /schemas/types.yaml#/definitions/uint32 # Deprecated properties, do not use in new device tree sources: gpios: diff --git a/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml b/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml index c22b66b6219ea..d9293c57f573c 100644 --- a/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/snps,designware-i2c.yaml @@ -66,21 +66,18 @@ properties: default: 400000 i2c-sda-hold-time-ns: - maxItems: 1 description: | The property should contain the SDA hold time in nanoseconds. This option is only supported in hardware blocks version 1.11a or newer or on Microsemi SoCs. i2c-scl-falling-time-ns: - maxItems: 1 description: | The property should contain the SCL falling time in nanoseconds. This value is used to compute the tLOW period. default: 300 i2c-sda-falling-time-ns: - maxItems: 1 description: | The property should contain the SDA falling time in nanoseconds. This value is used to compute the tHIGH period. diff --git a/Documentation/devicetree/bindings/iio/adc/maxim,max9611.yaml b/Documentation/devicetree/bindings/iio/adc/maxim,max9611.yaml index 9475a9e6e9207..95774a55629df 100644 --- a/Documentation/devicetree/bindings/iio/adc/maxim,max9611.yaml +++ b/Documentation/devicetree/bindings/iio/adc/maxim,max9611.yaml @@ -23,7 +23,6 @@ properties: maxItems: 1 shunt-resistor-micro-ohms: - $ref: /schemas/types.yaml#/definitions/uint32 description: | Value in micro Ohms of the shunt resistor connected between the RS+ and RS- inputs, across which the current is measured. Value needed to compute diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml index 28417b31b5589..517e32976c304 100644 --- a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml @@ -246,7 +246,6 @@ patternProperties: Resolution (bits) to use for conversions: - can be 6, 8, 10 or 12 on stm32f4 - can be 8, 10, 12, 14 or 16 on stm32h7 and stm32mp1 - $ref: /schemas/types.yaml#/definitions/uint32 st,adc-channels: description: | diff --git a/Documentation/devicetree/bindings/iio/adc/ti,palmas-gpadc.yaml b/Documentation/devicetree/bindings/iio/adc/ti,palmas-gpadc.yaml index 692dacd0fee5f..7b895784e0085 100644 --- a/Documentation/devicetree/bindings/iio/adc/ti,palmas-gpadc.yaml +++ b/Documentation/devicetree/bindings/iio/adc/ti,palmas-gpadc.yaml @@ -42,7 +42,6 @@ properties: const: 1 ti,channel0-current-microamp: - $ref: /schemas/types.yaml#/definitions/uint32 description: Channel 0 current in uA. enum: - 0 @@ -51,7 +50,6 @@ properties: - 20 ti,channel3-current-microamp: - $ref: /schemas/types.yaml#/definitions/uint32 description: Channel 3 current in uA. enum: - 0 diff --git a/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml b/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml index 626ccb6fe21e1..fd4edca34a285 100644 --- a/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml +++ b/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml @@ -46,31 +46,42 @@ properties: two properties must be present: adi,range-microvolt: - $ref: /schemas/types.yaml#/definitions/int32-array description: | Voltage output range specified as - enum: - - [[0, 5000000]] - - [[0, 10000000]] - - [[-5000000, 5000000]] - - [[-10000000, 10000000]] + oneOf: + - items: + - const: 0 + - enum: [5000000, 10000000] + - items: + - const: -5000000 + - const: 5000000 + - items: + - const: -10000000 + - const: 10000000 adi,range-microamp: - $ref: /schemas/types.yaml#/definitions/int32-array description: | Current output range specified as - enum: - - [[0, 20000]] - - [[0, 24000]] - - [[4, 24000]] - - [[-20000, 20000]] - - [[-24000, 24000]] - - [[-1000, 22000]] + oneOf: + - items: + - const: 0 + - enum: [20000, 24000] + - items: + - const: 4 + - const: 24000 + - items: + - const: -20000 + - const: 20000 + - items: + - const: -24000 + - const: 24000 + - items: + - const: -1000 + - const: 22000 reset-gpios: true adi,dc-dc-ilim-microamp: - $ref: /schemas/types.yaml#/definitions/uint32 enum: [150000, 200000, 250000, 300000, 350000, 400000] description: | The dc-to-dc converter current limit. diff --git a/Documentation/devicetree/bindings/iio/health/maxim,max30100.yaml b/Documentation/devicetree/bindings/iio/health/maxim,max30100.yaml index 64b8626370393..967778fb0ce89 100644 --- a/Documentation/devicetree/bindings/iio/health/maxim,max30100.yaml +++ b/Documentation/devicetree/bindings/iio/health/maxim,max30100.yaml @@ -21,7 +21,6 @@ properties: description: Connected to ADC_RDY pin. maxim,led-current-microamp: - $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 2 maxItems: 2 description: | diff --git a/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml index a771a15f053fa..046ace461cc9d 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml @@ -70,11 +70,9 @@ properties: touchscreen-x-mm: description: horizontal length in mm of the touchscreen - $ref: /schemas/types.yaml#/definitions/uint32 touchscreen-y-mm: description: vertical length in mm of the touchscreen - $ref: /schemas/types.yaml#/definitions/uint32 dependencies: touchscreen-size-x: [ touchscreen-size-y ] diff --git a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml index 186f04ba93579..e674bba52ee97 100644 --- a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml +++ b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml @@ -259,7 +259,6 @@ properties: waiting for I/O signalling and card power supply to be stable, regardless of whether pwrseq-simple is used. Default to 10ms if no available. - $ref: /schemas/types.yaml#/definitions/uint32 default: 10 supports-cqe: diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml index 6cd57863c1dbd..226fb191913d2 100644 --- a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml +++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml @@ -41,13 +41,11 @@ properties: description: Delay in ms after powering the card and de-asserting the reset-gpios (if any). - $ref: /schemas/types.yaml#/definitions/uint32 power-off-delay-us: description: Delay in us after asserting the reset-gpios (if any) during power off of the card. - $ref: /schemas/types.yaml#/definitions/uint32 required: - compatible diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 0965f6515f9ec..dac4aadb6e2e7 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -122,7 +122,6 @@ properties: such as flow control thresholds. rx-internal-delay-ps: - $ref: /schemas/types.yaml#/definitions/uint32 description: | RGMII Receive Clock Delay defined in pico seconds. This is used for controllers that have configurable RX internal delays. @@ -140,7 +139,6 @@ properties: is used for components that can have configurable fifo sizes. tx-internal-delay-ps: - $ref: /schemas/types.yaml#/definitions/uint32 description: | RGMII Transmit Clock Delay defined in pico seconds. This is used for controllers that have configurable TX internal delays. diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index b2f6083f556af..9ac77b8cb7670 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -208,7 +208,6 @@ properties: Triplet of delays. The 1st cell is reset pre-delay in micro seconds. The 2nd cell is reset pulse in micro seconds. The 3rd cell is reset post-delay in micro seconds. - $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 3 maxItems: 3 diff --git a/Documentation/devicetree/bindings/power/supply/battery.yaml b/Documentation/devicetree/bindings/power/supply/battery.yaml index 0c7e2e44793ba..c3b4b75435918 100644 --- a/Documentation/devicetree/bindings/power/supply/battery.yaml +++ b/Documentation/devicetree/bindings/power/supply/battery.yaml @@ -83,21 +83,18 @@ properties: for each of the battery capacity lookup table. operating-range-celsius: - $ref: /schemas/types.yaml#/definitions/uint32-array description: operating temperature range of a battery items: - description: minimum temperature at which battery can operate - description: maximum temperature at which battery can operate ambient-celsius: - $ref: /schemas/types.yaml#/definitions/uint32-array description: safe range of ambient temperature items: - description: alert when ambient temperature is lower than this value - description: alert when ambient temperature is higher than this value alert-celsius: - $ref: /schemas/types.yaml#/definitions/uint32-array description: safe range of battery temperature items: - description: alert when battery temperature is lower than this value diff --git a/Documentation/devicetree/bindings/power/supply/bq2515x.yaml b/Documentation/devicetree/bindings/power/supply/bq2515x.yaml index 75a56773be4a0..813d6afde6069 100644 --- a/Documentation/devicetree/bindings/power/supply/bq2515x.yaml +++ b/Documentation/devicetree/bindings/power/supply/bq2515x.yaml @@ -50,7 +50,6 @@ properties: maxItems: 1 input-current-limit-microamp: - $ref: /schemas/types.yaml#/definitions/uint32 description: Maximum input current in micro Amps. minimum: 50000 maximum: 500000 diff --git a/Documentation/devicetree/bindings/regulator/dlg,da9121.yaml b/Documentation/devicetree/bindings/regulator/dlg,da9121.yaml index 6f2164f7bc577..228018c87bea8 100644 --- a/Documentation/devicetree/bindings/regulator/dlg,da9121.yaml +++ b/Documentation/devicetree/bindings/regulator/dlg,da9121.yaml @@ -62,7 +62,6 @@ properties: description: IRQ line information. dlg,irq-polling-delay-passive-ms: - $ref: "/schemas/types.yaml#/definitions/uint32" minimum: 1000 maximum: 10000 description: | diff --git a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml index d3d0dc13dd8b8..8850c01bd4706 100644 --- a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml @@ -72,11 +72,9 @@ properties: startup-delay-us: description: startup time in microseconds - $ref: /schemas/types.yaml#/definitions/uint32 off-on-delay-us: description: off delay time in microseconds - $ref: /schemas/types.yaml#/definitions/uint32 enable-active-high: description: diff --git a/Documentation/devicetree/bindings/rtc/rtc.yaml b/Documentation/devicetree/bindings/rtc/rtc.yaml index d30dc045aac64..0ec3551f12dd3 100644 --- a/Documentation/devicetree/bindings/rtc/rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/rtc.yaml @@ -27,7 +27,6 @@ properties: 1: chargeable quartz-load-femtofarads: - $ref: /schemas/types.yaml#/definitions/uint32 description: The capacitive load of the quartz(x-tal), expressed in femto Farad (fF). The default value shall be listed (if optional), @@ -47,7 +46,6 @@ properties: deprecated: true trickle-resistor-ohms: - $ref: /schemas/types.yaml#/definitions/uint32 description: Selected resistor for trickle charger. Should be given if trickle charger should be enabled. diff --git a/Documentation/devicetree/bindings/serial/pl011.yaml b/Documentation/devicetree/bindings/serial/pl011.yaml index c23c93b400f06..07fa6d26f2b43 100644 --- a/Documentation/devicetree/bindings/serial/pl011.yaml +++ b/Documentation/devicetree/bindings/serial/pl011.yaml @@ -88,14 +88,12 @@ properties: description: Rate at which poll occurs when auto-poll is set. default 100ms. - $ref: /schemas/types.yaml#/definitions/uint32 default: 100 poll-timeout-ms: description: Poll timeout when auto-poll is set, default 3000ms. - $ref: /schemas/types.yaml#/definitions/uint32 default: 3000 required: diff --git a/Documentation/devicetree/bindings/sound/sgtl5000.yaml b/Documentation/devicetree/bindings/sound/sgtl5000.yaml index d116c174b545f..70b4a88310732 100644 --- a/Documentation/devicetree/bindings/sound/sgtl5000.yaml +++ b/Documentation/devicetree/bindings/sound/sgtl5000.yaml @@ -41,14 +41,12 @@ properties: values of 2k, 4k or 8k. If set to 0 it will be off. If this node is not mentioned or if the value is unknown, then micbias resistor is set to 4k. - $ref: "/schemas/types.yaml#/definitions/uint32" enum: [ 0, 2, 4, 8 ] micbias-voltage-m-volts: description: The bias voltage to be used in mVolts. The voltage can take values from 1.25V to 3V by 250mV steps. If this node is not mentioned or the value is unknown, then the value is set to 1.25V. - $ref: "/schemas/types.yaml#/definitions/uint32" enum: [ 1250, 1500, 1750, 2000, 2250, 2500, 2750, 3000 ] lrclk-strength: diff --git a/Documentation/devicetree/bindings/watchdog/watchdog.yaml b/Documentation/devicetree/bindings/watchdog/watchdog.yaml index 4e2c26cd981d9..e3dfb02f0ca52 100644 --- a/Documentation/devicetree/bindings/watchdog/watchdog.yaml +++ b/Documentation/devicetree/bindings/watchdog/watchdog.yaml @@ -19,7 +19,6 @@ properties: pattern: "^watchdog(@.*|-[0-9a-f])?$" timeout-sec: - $ref: /schemas/types.yaml#/definitions/uint32 description: Contains the watchdog timeout in seconds. -- GitLab From 0ba35fe91ce34f2d0feff626efd0062dac41781c Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Tue, 26 Jan 2021 17:29:07 +0100 Subject: [PATCH 1705/2012] hv_netvsc: Copy packets sent by Hyper-V out of the receive buffer Pointers to receive-buffer packets sent by Hyper-V are used within the guest VM. Hyper-V can send packets with erroneous values or modify packet fields after they are processed by the guest. To defend against these scenarios, copy (sections of) the incoming packet after validating their length and offset fields in netvsc_filter_receive(). In this way, the packet can no longer be modified by the host. Reported-by: Juan Vazquez Signed-off-by: Andrea Parri (Microsoft) Link: https://lore.kernel.org/r/20210126162907.21056-1-parri.andrea@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/hyperv_net.h | 93 +++++++++++++++-------------- drivers/net/hyperv/netvsc.c | 20 +++++++ drivers/net/hyperv/netvsc_drv.c | 24 ++++---- drivers/net/hyperv/rndis_filter.c | 99 +++++++++++++++++++++---------- 4 files changed, 150 insertions(+), 86 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 2a87cfa27ac02..e1a497d3c9ba4 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -105,9 +105,43 @@ struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */ u32 processor_masks_entry_size; }; -/* Fwd declaration */ -struct ndis_tcp_ip_checksum_info; -struct ndis_pkt_8021q_info; +struct ndis_tcp_ip_checksum_info { + union { + struct { + u32 is_ipv4:1; + u32 is_ipv6:1; + u32 tcp_checksum:1; + u32 udp_checksum:1; + u32 ip_header_checksum:1; + u32 reserved:11; + u32 tcp_header_offset:10; + } transmit; + struct { + u32 tcp_checksum_failed:1; + u32 udp_checksum_failed:1; + u32 ip_checksum_failed:1; + u32 tcp_checksum_succeeded:1; + u32 udp_checksum_succeeded:1; + u32 ip_checksum_succeeded:1; + u32 loopback:1; + u32 tcp_checksum_value_invalid:1; + u32 ip_checksum_value_invalid:1; + } receive; + u32 value; + }; +}; + +struct ndis_pkt_8021q_info { + union { + struct { + u32 pri:3; /* User Priority */ + u32 cfi:1; /* Canonical Format ID */ + u32 vlanid:12; /* VLAN ID */ + u32 reserved:16; + }; + u32 value; + }; +}; /* * Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame @@ -194,7 +228,8 @@ int netvsc_send(struct net_device *net, struct sk_buff *skb, bool xdp_tx); void netvsc_linkstatus_callback(struct net_device *net, - struct rndis_message *resp); + struct rndis_message *resp, + void *data); int netvsc_recv_callback(struct net_device *net, struct netvsc_device *nvdev, struct netvsc_channel *nvchan); @@ -884,9 +919,10 @@ struct multi_recv_comp { #define NVSP_RSC_MAX 562 /* Max #RSC frags in a vmbus xfer page pkt */ struct nvsc_rsc { - const struct ndis_pkt_8021q_info *vlan; - const struct ndis_tcp_ip_checksum_info *csum_info; - const u32 *hash_info; + struct ndis_pkt_8021q_info vlan; + struct ndis_tcp_ip_checksum_info csum_info; + u32 hash_info; + u8 ppi_flags; /* valid/present bits for the above PPIs */ u8 is_last; /* last RNDIS msg in a vmtransfer_page */ u32 cnt; /* #fragments in an RSC packet */ u32 pktlen; /* Full packet length */ @@ -894,6 +930,10 @@ struct nvsc_rsc { u32 len[NVSP_RSC_MAX]; }; +#define NVSC_RSC_VLAN BIT(0) /* valid/present bit for 'vlan' */ +#define NVSC_RSC_CSUM_INFO BIT(1) /* valid/present bit for 'csum_info' */ +#define NVSC_RSC_HASH_INFO BIT(2) /* valid/present bit for 'hash_info' */ + struct netvsc_stats { u64 packets; u64 bytes; @@ -1002,6 +1042,7 @@ struct net_device_context { struct netvsc_channel { struct vmbus_channel *channel; struct netvsc_device *net_device; + void *recv_buf; /* buffer to copy packets out from the receive buffer */ const struct vmpacket_descriptor *desc; struct napi_struct napi; struct multi_send_data msd; @@ -1234,18 +1275,6 @@ struct rndis_pktinfo_id { u16 pkt_id; }; -struct ndis_pkt_8021q_info { - union { - struct { - u32 pri:3; /* User Priority */ - u32 cfi:1; /* Canonical Format ID */ - u32 vlanid:12; /* VLAN ID */ - u32 reserved:16; - }; - u32 value; - }; -}; - struct ndis_object_header { u8 type; u8 revision; @@ -1436,32 +1465,6 @@ struct ndis_offload_params { }; }; -struct ndis_tcp_ip_checksum_info { - union { - struct { - u32 is_ipv4:1; - u32 is_ipv6:1; - u32 tcp_checksum:1; - u32 udp_checksum:1; - u32 ip_header_checksum:1; - u32 reserved:11; - u32 tcp_header_offset:10; - } transmit; - struct { - u32 tcp_checksum_failed:1; - u32 udp_checksum_failed:1; - u32 ip_checksum_failed:1; - u32 tcp_checksum_succeeded:1; - u32 udp_checksum_succeeded:1; - u32 ip_checksum_succeeded:1; - u32 loopback:1; - u32 tcp_checksum_value_invalid:1; - u32 ip_checksum_value_invalid:1; - } receive; - u32 value; - }; -}; - struct ndis_tcp_lso_info { union { struct { diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 6184e99c7f31f..0fba8257fc119 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -131,6 +131,7 @@ static void free_netvsc_device(struct rcu_head *head) for (i = 0; i < VRSS_CHANNEL_MAX; i++) { xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq); + kfree(nvdev->chan_table[i].recv_buf); vfree(nvdev->chan_table[i].mrc.slots); } @@ -1284,6 +1285,19 @@ static int netvsc_receive(struct net_device *ndev, continue; } + /* We're going to copy (sections of) the packet into nvchan->recv_buf; + * make sure that nvchan->recv_buf is large enough to hold the packet. + */ + if (unlikely(buflen > net_device->recv_section_size)) { + nvchan->rsc.cnt = 0; + status = NVSP_STAT_FAIL; + netif_err(net_device_ctx, rx_err, ndev, + "Packet too big: buflen=%u recv_section_size=%u\n", + buflen, net_device->recv_section_size); + + continue; + } + data = recv_buf + offset; nvchan->rsc.is_last = (i == count - 1); @@ -1535,6 +1549,12 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, for (i = 0; i < VRSS_CHANNEL_MAX; i++) { struct netvsc_channel *nvchan = &net_device->chan_table[i]; + nvchan->recv_buf = kzalloc(device_info->recv_section_size, GFP_KERNEL); + if (nvchan->recv_buf == NULL) { + ret = -ENOMEM; + goto cleanup2; + } + nvchan->channel = device->channel; nvchan->net_device = net_device; u64_stats_init(&nvchan->tx_stats.syncp); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index ac20c432d4d8f..8176fa0c8b168 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -743,7 +743,8 @@ static netdev_tx_t netvsc_start_xmit(struct sk_buff *skb, * netvsc_linkstatus_callback - Link up/down notification */ void netvsc_linkstatus_callback(struct net_device *net, - struct rndis_message *resp) + struct rndis_message *resp, + void *data) { struct rndis_indicate_status *indicate = &resp->msg.indicate_status; struct net_device_context *ndev_ctx = netdev_priv(net); @@ -757,6 +758,9 @@ void netvsc_linkstatus_callback(struct net_device *net, return; } + /* Copy the RNDIS indicate status into nvchan->recv_buf */ + memcpy(indicate, data + RNDIS_HEADER_SIZE, sizeof(*indicate)); + /* Update the physical link speed when changing to another vSwitch */ if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) { u32 speed; @@ -771,8 +775,7 @@ void netvsc_linkstatus_callback(struct net_device *net, return; } - speed = *(u32 *)((void *)indicate - + indicate->status_buf_offset) / 10000; + speed = *(u32 *)(data + RNDIS_HEADER_SIZE + indicate->status_buf_offset) / 10000; ndev_ctx->speed = speed; return; } @@ -827,10 +830,11 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, struct xdp_buff *xdp) { struct napi_struct *napi = &nvchan->napi; - const struct ndis_pkt_8021q_info *vlan = nvchan->rsc.vlan; + const struct ndis_pkt_8021q_info *vlan = &nvchan->rsc.vlan; const struct ndis_tcp_ip_checksum_info *csum_info = - nvchan->rsc.csum_info; - const u32 *hash_info = nvchan->rsc.hash_info; + &nvchan->rsc.csum_info; + const u32 *hash_info = &nvchan->rsc.hash_info; + u8 ppi_flags = nvchan->rsc.ppi_flags; struct sk_buff *skb; void *xbuf = xdp->data_hard_start; int i; @@ -874,7 +878,7 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, * We compute it here if the flags are set, because on Linux, the IP * checksum is always checked. */ - if (csum_info && csum_info->receive.ip_checksum_value_invalid && + if ((ppi_flags & NVSC_RSC_CSUM_INFO) && csum_info->receive.ip_checksum_value_invalid && csum_info->receive.ip_checksum_succeeded && skb->protocol == htons(ETH_P_IP)) { /* Check that there is enough space to hold the IP header. */ @@ -886,16 +890,16 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, } /* Do L4 checksum offload if enabled and present. */ - if (csum_info && (net->features & NETIF_F_RXCSUM)) { + if ((ppi_flags & NVSC_RSC_CSUM_INFO) && (net->features & NETIF_F_RXCSUM)) { if (csum_info->receive.tcp_checksum_succeeded || csum_info->receive.udp_checksum_succeeded) skb->ip_summed = CHECKSUM_UNNECESSARY; } - if (hash_info && (net->features & NETIF_F_RXHASH)) + if ((ppi_flags & NVSC_RSC_HASH_INFO) && (net->features & NETIF_F_RXHASH)) skb_set_hash(skb, *hash_info, PKT_HASH_TYPE_L4); - if (vlan) { + if (ppi_flags & NVSC_RSC_VLAN) { u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT) | (vlan->cfi ? VLAN_CFI_MASK : 0); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index c8534b6619b8d..6c48a4d627368 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -127,12 +127,13 @@ static void put_rndis_request(struct rndis_device *dev, } static void dump_rndis_message(struct net_device *netdev, - const struct rndis_message *rndis_msg) + const struct rndis_message *rndis_msg, + const void *data) { switch (rndis_msg->ndis_msg_type) { case RNDIS_MSG_PACKET: if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >= sizeof(struct rndis_packet)) { - const struct rndis_packet *pkt = &rndis_msg->msg.pkt; + const struct rndis_packet *pkt = data + RNDIS_HEADER_SIZE; netdev_dbg(netdev, "RNDIS_MSG_PACKET (len %u, " "data offset %u data len %u, # oob %u, " "oob offset %u, oob len %u, pkt offset %u, " @@ -152,7 +153,7 @@ static void dump_rndis_message(struct net_device *netdev, if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >= sizeof(struct rndis_initialize_complete)) { const struct rndis_initialize_complete *init_complete = - &rndis_msg->msg.init_complete; + data + RNDIS_HEADER_SIZE; netdev_dbg(netdev, "RNDIS_MSG_INIT_C " "(len %u, id 0x%x, status 0x%x, major %d, minor %d, " "device flags %d, max xfer size 0x%x, max pkts %u, " @@ -173,7 +174,7 @@ static void dump_rndis_message(struct net_device *netdev, if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >= sizeof(struct rndis_query_complete)) { const struct rndis_query_complete *query_complete = - &rndis_msg->msg.query_complete; + data + RNDIS_HEADER_SIZE; netdev_dbg(netdev, "RNDIS_MSG_QUERY_C " "(len %u, id 0x%x, status 0x%x, buf len %u, " "buf offset %u)\n", @@ -188,7 +189,7 @@ static void dump_rndis_message(struct net_device *netdev, case RNDIS_MSG_SET_C: if (rndis_msg->msg_len - RNDIS_HEADER_SIZE + sizeof(struct rndis_set_complete)) { const struct rndis_set_complete *set_complete = - &rndis_msg->msg.set_complete; + data + RNDIS_HEADER_SIZE; netdev_dbg(netdev, "RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n", rndis_msg->msg_len, @@ -201,7 +202,7 @@ static void dump_rndis_message(struct net_device *netdev, if (rndis_msg->msg_len - RNDIS_HEADER_SIZE >= sizeof(struct rndis_indicate_status)) { const struct rndis_indicate_status *indicate_status = - &rndis_msg->msg.indicate_status; + data + RNDIS_HEADER_SIZE; netdev_dbg(netdev, "RNDIS_MSG_INDICATE " "(len %u, status 0x%x, buf len %u, buf offset %u)\n", rndis_msg->msg_len, @@ -286,8 +287,10 @@ static void rndis_set_link_state(struct rndis_device *rdev, static void rndis_filter_receive_response(struct net_device *ndev, struct netvsc_device *nvdev, - const struct rndis_message *resp) + struct rndis_message *resp, + void *data) { + u32 *req_id = &resp->msg.init_complete.req_id; struct rndis_device *dev = nvdev->extension; struct rndis_request *request = NULL; bool found = false; @@ -312,14 +315,16 @@ static void rndis_filter_receive_response(struct net_device *ndev, return; } + /* Copy the request ID into nvchan->recv_buf */ + *req_id = *(u32 *)(data + RNDIS_HEADER_SIZE); + spin_lock_irqsave(&dev->request_lock, flags); list_for_each_entry(request, &dev->req_list, list_ent) { /* * All request/response message contains RequestId as the 1st * field */ - if (request->request_msg.msg.init_req.req_id - == resp->msg.init_complete.req_id) { + if (request->request_msg.msg.init_req.req_id == *req_id) { found = true; break; } @@ -329,8 +334,10 @@ static void rndis_filter_receive_response(struct net_device *ndev, if (found) { if (resp->msg_len <= sizeof(struct rndis_message) + RNDIS_EXT_LEN) { - memcpy(&request->response_msg, resp, - resp->msg_len); + memcpy(&request->response_msg, resp, RNDIS_HEADER_SIZE + sizeof(*req_id)); + memcpy((void *)&request->response_msg + RNDIS_HEADER_SIZE + sizeof(*req_id), + data + RNDIS_HEADER_SIZE + sizeof(*req_id), + resp->msg_len - RNDIS_HEADER_SIZE - sizeof(*req_id)); if (request->request_msg.ndis_msg_type == RNDIS_MSG_QUERY && request->request_msg.msg. query_req.oid == RNDIS_OID_GEN_MEDIA_CONNECT_STATUS) @@ -359,7 +366,7 @@ static void rndis_filter_receive_response(struct net_device *ndev, netdev_err(ndev, "no rndis request found for this response " "(id 0x%x res type 0x%x)\n", - resp->msg.init_complete.req_id, + *req_id, resp->ndis_msg_type); } } @@ -371,7 +378,7 @@ static void rndis_filter_receive_response(struct net_device *ndev, static inline void *rndis_get_ppi(struct net_device *ndev, struct rndis_packet *rpkt, u32 rpkt_len, u32 type, u8 internal, - u32 ppi_size) + u32 ppi_size, void *data) { struct rndis_per_packet_info *ppi; int len; @@ -396,6 +403,8 @@ static inline void *rndis_get_ppi(struct net_device *ndev, ppi = (struct rndis_per_packet_info *)((ulong)rpkt + rpkt->per_pkt_info_offset); + /* Copy the PPIs into nvchan->recv_buf */ + memcpy(ppi, data + RNDIS_HEADER_SIZE + rpkt->per_pkt_info_offset, rpkt->per_pkt_info_len); len = rpkt->per_pkt_info_len; while (len > 0) { @@ -438,10 +447,29 @@ void rsc_add_data(struct netvsc_channel *nvchan, if (cnt) { nvchan->rsc.pktlen += len; } else { - nvchan->rsc.vlan = vlan; - nvchan->rsc.csum_info = csum_info; + /* The data/values pointed by vlan, csum_info and hash_info are shared + * across the different 'fragments' of the RSC packet; store them into + * the packet itself. + */ + if (vlan != NULL) { + memcpy(&nvchan->rsc.vlan, vlan, sizeof(*vlan)); + nvchan->rsc.ppi_flags |= NVSC_RSC_VLAN; + } else { + nvchan->rsc.ppi_flags &= ~NVSC_RSC_VLAN; + } + if (csum_info != NULL) { + memcpy(&nvchan->rsc.csum_info, csum_info, sizeof(*csum_info)); + nvchan->rsc.ppi_flags |= NVSC_RSC_CSUM_INFO; + } else { + nvchan->rsc.ppi_flags &= ~NVSC_RSC_CSUM_INFO; + } nvchan->rsc.pktlen = len; - nvchan->rsc.hash_info = hash_info; + if (hash_info != NULL) { + nvchan->rsc.csum_info = *csum_info; + nvchan->rsc.ppi_flags |= NVSC_RSC_HASH_INFO; + } else { + nvchan->rsc.ppi_flags &= ~NVSC_RSC_HASH_INFO; + } } nvchan->rsc.data[cnt] = data; @@ -453,7 +481,7 @@ static int rndis_filter_receive_data(struct net_device *ndev, struct netvsc_device *nvdev, struct netvsc_channel *nvchan, struct rndis_message *msg, - u32 data_buflen) + void *data, u32 data_buflen) { struct rndis_packet *rndis_pkt = &msg->msg.pkt; const struct ndis_tcp_ip_checksum_info *csum_info; @@ -461,7 +489,6 @@ static int rndis_filter_receive_data(struct net_device *ndev, const struct rndis_pktinfo_id *pktinfo_id; const u32 *hash_info; u32 data_offset, rpkt_len; - void *data; bool rsc_more = false; int ret; @@ -472,6 +499,9 @@ static int rndis_filter_receive_data(struct net_device *ndev, return NVSP_STAT_FAIL; } + /* Copy the RNDIS packet into nvchan->recv_buf */ + memcpy(rndis_pkt, data + RNDIS_HEADER_SIZE, sizeof(*rndis_pkt)); + /* Validate rndis_pkt offset */ if (rndis_pkt->data_offset >= data_buflen - RNDIS_HEADER_SIZE) { netdev_err(ndev, "invalid rndis packet offset: %u\n", @@ -497,18 +527,17 @@ static int rndis_filter_receive_data(struct net_device *ndev, return NVSP_STAT_FAIL; } - vlan = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, IEEE_8021Q_INFO, 0, sizeof(*vlan)); + vlan = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, IEEE_8021Q_INFO, 0, sizeof(*vlan), + data); csum_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, TCPIP_CHKSUM_PKTINFO, 0, - sizeof(*csum_info)); + sizeof(*csum_info), data); hash_info = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, NBL_HASH_VALUE, 0, - sizeof(*hash_info)); + sizeof(*hash_info), data); pktinfo_id = rndis_get_ppi(ndev, rndis_pkt, rpkt_len, RNDIS_PKTINFO_ID, 1, - sizeof(*pktinfo_id)); - - data = (void *)msg + data_offset; + sizeof(*pktinfo_id), data); /* Identify RSC frags, drop erroneous packets */ if (pktinfo_id && (pktinfo_id->flag & RNDIS_PKTINFO_SUBALLOC)) { @@ -537,7 +566,7 @@ static int rndis_filter_receive_data(struct net_device *ndev, * the data packet to the stack, without the rndis trailer padding */ rsc_add_data(nvchan, vlan, csum_info, hash_info, - data, rndis_pkt->data_len); + data + data_offset, rndis_pkt->data_len); if (rsc_more) return NVSP_STAT_SUCCESS; @@ -559,10 +588,18 @@ int rndis_filter_receive(struct net_device *ndev, void *data, u32 buflen) { struct net_device_context *net_device_ctx = netdev_priv(ndev); - struct rndis_message *rndis_msg = data; + struct rndis_message *rndis_msg = nvchan->recv_buf; + + if (buflen < RNDIS_HEADER_SIZE) { + netdev_err(ndev, "Invalid rndis_msg (buflen: %u)\n", buflen); + return NVSP_STAT_FAIL; + } + + /* Copy the RNDIS msg header into nvchan->recv_buf */ + memcpy(rndis_msg, data, RNDIS_HEADER_SIZE); /* Validate incoming rndis_message packet */ - if (buflen < RNDIS_HEADER_SIZE || rndis_msg->msg_len < RNDIS_HEADER_SIZE || + if (rndis_msg->msg_len < RNDIS_HEADER_SIZE || buflen < rndis_msg->msg_len) { netdev_err(ndev, "Invalid rndis_msg (buflen: %u, msg_len: %u)\n", buflen, rndis_msg->msg_len); @@ -570,22 +607,22 @@ int rndis_filter_receive(struct net_device *ndev, } if (netif_msg_rx_status(net_device_ctx)) - dump_rndis_message(ndev, rndis_msg); + dump_rndis_message(ndev, rndis_msg, data); switch (rndis_msg->ndis_msg_type) { case RNDIS_MSG_PACKET: return rndis_filter_receive_data(ndev, net_dev, nvchan, - rndis_msg, buflen); + rndis_msg, data, buflen); case RNDIS_MSG_INIT_C: case RNDIS_MSG_QUERY_C: case RNDIS_MSG_SET_C: /* completion msgs */ - rndis_filter_receive_response(ndev, net_dev, rndis_msg); + rndis_filter_receive_response(ndev, net_dev, rndis_msg, data); break; case RNDIS_MSG_INDICATE: /* notification msgs */ - netvsc_linkstatus_callback(ndev, rndis_msg); + netvsc_linkstatus_callback(ndev, rndis_msg, data); break; default: netdev_err(ndev, -- GitLab From 8c22475148a8d3222be712bd02a74d7279d50daf Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Wed, 27 Jan 2021 04:33:02 -0800 Subject: [PATCH 1706/2012] net: packet: make pkt_sk() inline It's better make 'pkt_sk()' inline here, as non-inline function shouldn't occur in headers. Besides, this function is simple enough to be inline. Signed-off-by: Menglong Dong Link: https://lore.kernel.org/r/20210127123302.29842-1-dong.menglong@zte.com.cn Signed-off-by: Jakub Kicinski --- net/packet/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/packet/internal.h b/net/packet/internal.h index baafc3f3fa252..5f61e59ebbffa 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -139,7 +139,7 @@ struct packet_sock { atomic_t tp_drops ____cacheline_aligned_in_smp; }; -static struct packet_sock *pkt_sk(struct sock *sk) +static inline struct packet_sock *pkt_sk(struct sock *sk) { return (struct packet_sock *)sk; } -- GitLab From e624e6c3e777fb3dfed036b9da4d433aee3608a5 Mon Sep 17 00:00:00 2001 From: Bongsu Jeon Date: Wed, 27 Jan 2021 22:08:28 +0900 Subject: [PATCH 1707/2012] nfc: Add a virtual nci device driver NCI virtual device simulates a NCI device to the user. It can be used to validate the NCI module and applications. This driver supports communication between the virtual NCI device and NCI module. Signed-off-by: Bongsu Jeon Signed-off-by: Jakub Kicinski --- drivers/nfc/Kconfig | 11 ++ drivers/nfc/Makefile | 1 + drivers/nfc/virtual_ncidev.c | 215 +++++++++++++++++++++++++++++++++++ 3 files changed, 227 insertions(+) create mode 100644 drivers/nfc/virtual_ncidev.c diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig index 75c65d339018b..288c6f1c69792 100644 --- a/drivers/nfc/Kconfig +++ b/drivers/nfc/Kconfig @@ -49,6 +49,17 @@ config NFC_PORT100 If unsure, say N. +config NFC_VIRTUAL_NCI + tristate "NCI device simulator driver" + depends on NFC_NCI + help + NCI virtual device simulates a NCI device to the user. + It can be used to validate the NCI module and applications. + This driver supports communication between the virtual NCI device and + module. + + If unsure, say N. + source "drivers/nfc/fdp/Kconfig" source "drivers/nfc/pn544/Kconfig" source "drivers/nfc/pn533/Kconfig" diff --git a/drivers/nfc/Makefile b/drivers/nfc/Makefile index 5393ba59b17d9..7b1bfde1d971a 100644 --- a/drivers/nfc/Makefile +++ b/drivers/nfc/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_NFC_ST_NCI) += st-nci/ obj-$(CONFIG_NFC_NXP_NCI) += nxp-nci/ obj-$(CONFIG_NFC_S3FWRN5) += s3fwrn5/ obj-$(CONFIG_NFC_ST95HF) += st95hf/ +obj-$(CONFIG_NFC_VIRTUAL_NCI) += virtual_ncidev.o diff --git a/drivers/nfc/virtual_ncidev.c b/drivers/nfc/virtual_ncidev.c new file mode 100644 index 0000000000000..f73ee0bf35939 --- /dev/null +++ b/drivers/nfc/virtual_ncidev.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Virtual NCI device simulation driver + * + * Copyright (C) 2020 Samsung Electrnoics + * Bongsu Jeon + */ + +#include +#include +#include +#include +#include + +enum virtual_ncidev_mode { + virtual_ncidev_enabled, + virtual_ncidev_disabled, + virtual_ncidev_disabling, +}; + +#define IOCTL_GET_NCIDEV_IDX 0 +#define VIRTUAL_NFC_PROTOCOLS (NFC_PROTO_JEWEL_MASK | \ + NFC_PROTO_MIFARE_MASK | \ + NFC_PROTO_FELICA_MASK | \ + NFC_PROTO_ISO14443_MASK | \ + NFC_PROTO_ISO14443_B_MASK | \ + NFC_PROTO_ISO15693_MASK) + +static enum virtual_ncidev_mode state; +static struct miscdevice miscdev; +static struct sk_buff *send_buff; +static struct nci_dev *ndev; +static DEFINE_MUTEX(nci_mutex); + +static int virtual_nci_open(struct nci_dev *ndev) +{ + return 0; +} + +static int virtual_nci_close(struct nci_dev *ndev) +{ + mutex_lock(&nci_mutex); + kfree_skb(send_buff); + send_buff = NULL; + mutex_unlock(&nci_mutex); + + return 0; +} + +static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb) +{ + mutex_lock(&nci_mutex); + if (state != virtual_ncidev_enabled) { + mutex_unlock(&nci_mutex); + return 0; + } + + if (send_buff) { + mutex_unlock(&nci_mutex); + return -1; + } + send_buff = skb_copy(skb, GFP_KERNEL); + mutex_unlock(&nci_mutex); + + return 0; +} + +static struct nci_ops virtual_nci_ops = { + .open = virtual_nci_open, + .close = virtual_nci_close, + .send = virtual_nci_send +}; + +static ssize_t virtual_ncidev_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + size_t actual_len; + + mutex_lock(&nci_mutex); + if (!send_buff) { + mutex_unlock(&nci_mutex); + return 0; + } + + actual_len = min_t(size_t, count, send_buff->len); + + if (copy_to_user(buf, send_buff->data, actual_len)) { + mutex_unlock(&nci_mutex); + return -EFAULT; + } + + skb_pull(send_buff, actual_len); + if (send_buff->len == 0) { + consume_skb(send_buff); + send_buff = NULL; + } + mutex_unlock(&nci_mutex); + + return actual_len; +} + +static ssize_t virtual_ncidev_write(struct file *file, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct sk_buff *skb; + + skb = alloc_skb(count, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + if (copy_from_user(skb_put(skb, count), buf, count)) { + kfree_skb(skb); + return -EFAULT; + } + + nci_recv_frame(ndev, skb); + return count; +} + +static int virtual_ncidev_open(struct inode *inode, struct file *file) +{ + int ret = 0; + + mutex_lock(&nci_mutex); + if (state != virtual_ncidev_disabled) { + mutex_unlock(&nci_mutex); + return -EBUSY; + } + + ndev = nci_allocate_device(&virtual_nci_ops, VIRTUAL_NFC_PROTOCOLS, + 0, 0); + if (!ndev) { + mutex_unlock(&nci_mutex); + return -ENOMEM; + } + + ret = nci_register_device(ndev); + if (ret < 0) { + nci_free_device(ndev); + mutex_unlock(&nci_mutex); + return ret; + } + state = virtual_ncidev_enabled; + mutex_unlock(&nci_mutex); + + return 0; +} + +static int virtual_ncidev_close(struct inode *inode, struct file *file) +{ + mutex_lock(&nci_mutex); + + if (state == virtual_ncidev_enabled) { + state = virtual_ncidev_disabling; + mutex_unlock(&nci_mutex); + + nci_unregister_device(ndev); + nci_free_device(ndev); + + mutex_lock(&nci_mutex); + } + + state = virtual_ncidev_disabled; + mutex_unlock(&nci_mutex); + + return 0; +} + +static long virtual_ncidev_ioctl(struct file *flip, unsigned int cmd, + unsigned long arg) +{ + struct nfc_dev *nfc_dev = ndev->nfc_dev; + void __user *p = (void __user *)arg; + + if (cmd != IOCTL_GET_NCIDEV_IDX) + return -ENOTTY; + + if (copy_to_user(p, &nfc_dev->idx, sizeof(nfc_dev->idx))) + return -EFAULT; + + return 0; +} + +static const struct file_operations virtual_ncidev_fops = { + .owner = THIS_MODULE, + .read = virtual_ncidev_read, + .write = virtual_ncidev_write, + .open = virtual_ncidev_open, + .release = virtual_ncidev_close, + .unlocked_ioctl = virtual_ncidev_ioctl +}; + +static int __init virtual_ncidev_init(void) +{ + state = virtual_ncidev_disabled; + miscdev.minor = MISC_DYNAMIC_MINOR; + miscdev.name = "virtual_nci"; + miscdev.fops = &virtual_ncidev_fops; + miscdev.mode = S_IALLUGO; + + return misc_register(&miscdev); +} + +static void __exit virtual_ncidev_exit(void) +{ + misc_deregister(&miscdev); +} + +module_init(virtual_ncidev_init); +module_exit(virtual_ncidev_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Virtual NCI device simulation driver"); +MODULE_AUTHOR("Bongsu Jeon "); -- GitLab From f595cf1242f3d64d78f9c96fa56bb5e22146d0ca Mon Sep 17 00:00:00 2001 From: Bongsu Jeon Date: Wed, 27 Jan 2021 22:08:29 +0900 Subject: [PATCH 1708/2012] selftests: Add nci suite This is the NCI test suite. It tests the NFC/NCI module using virtual NCI device. Test cases consist of making the virtual NCI device on/off and controlling the device's polling for NCI1.0 and NCI2.0 version. Signed-off-by: Bongsu Jeon Signed-off-by: Jakub Kicinski --- MAINTAINERS | 8 + tools/testing/selftests/Makefile | 1 + tools/testing/selftests/nci/Makefile | 6 + tools/testing/selftests/nci/config | 3 + tools/testing/selftests/nci/nci_dev.c | 599 ++++++++++++++++++++++++++ 5 files changed, 617 insertions(+) create mode 100644 tools/testing/selftests/nci/Makefile create mode 100644 tools/testing/selftests/nci/config create mode 100644 tools/testing/selftests/nci/nci_dev.c diff --git a/MAINTAINERS b/MAINTAINERS index bed957ba73fb1..627e90a9e2961 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12540,6 +12540,14 @@ F: include/net/nfc/ F: include/uapi/linux/nfc.h F: net/nfc/ +NFC VIRTUAL NCI DEVICE DRIVER +M: Bongsu Jeon +L: netdev@vger.kernel.org +L: linux-nfc@lists.01.org (moderated for non-subscribers) +S: Supported +F: drivers/nfc/virtual_ncidev.c +F: tools/testing/selftests/nci/ + NFS, SUNRPC, AND LOCKD CLIENTS M: Trond Myklebust M: Anna Schumaker diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 8a917cb4426a0..c42aacec50386 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -34,6 +34,7 @@ TARGETS += memory-hotplug TARGETS += mincore TARGETS += mount TARGETS += mqueue +TARGETS += nci TARGETS += net TARGETS += net/forwarding TARGETS += net/mptcp diff --git a/tools/testing/selftests/nci/Makefile b/tools/testing/selftests/nci/Makefile new file mode 100644 index 0000000000000..47669a1d6a598 --- /dev/null +++ b/tools/testing/selftests/nci/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -Wl,-no-as-needed -Wall +LDFLAGS += -lpthread + +TEST_GEN_PROGS := nci_dev +include ../lib.mk diff --git a/tools/testing/selftests/nci/config b/tools/testing/selftests/nci/config new file mode 100644 index 0000000000000..b084e78276be4 --- /dev/null +++ b/tools/testing/selftests/nci/config @@ -0,0 +1,3 @@ +CONFIG_NFC=y +CONFIG_NFC_NCI=y +CONFIG_NFC_VIRTUAL_NCI=y diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c new file mode 100644 index 0000000000000..57b505cb15618 --- /dev/null +++ b/tools/testing/selftests/nci/nci_dev.c @@ -0,0 +1,599 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 Samsung Electrnoics + * Bongsu Jeon + * + * Test code for nci + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest_harness.h" + +#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) +#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) +#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) +#define NLA_PAYLOAD(len) ((len) - NLA_HDRLEN) + +#define MAX_MSG_SIZE 1024 + +#define IOCTL_GET_NCIDEV_IDX 0 +#define VIRTUAL_NFC_PROTOCOLS (NFC_PROTO_JEWEL_MASK | \ + NFC_PROTO_MIFARE_MASK | \ + NFC_PROTO_FELICA_MASK | \ + NFC_PROTO_ISO14443_MASK | \ + NFC_PROTO_ISO14443_B_MASK | \ + NFC_PROTO_ISO15693_MASK) + +const __u8 nci_reset_cmd[] = {0x20, 0x00, 0x01, 0x01}; +const __u8 nci_init_cmd[] = {0x20, 0x01, 0x00}; +const __u8 nci_rf_discovery_cmd[] = {0x21, 0x03, 0x09, 0x04, 0x00, 0x01, + 0x01, 0x01, 0x02, 0x01, 0x06, 0x01}; +const __u8 nci_init_cmd_v2[] = {0x20, 0x01, 0x02, 0x00, 0x00}; +const __u8 nci_rf_disc_map_cmd[] = {0x21, 0x00, 0x07, 0x02, 0x04, 0x03, + 0x02, 0x05, 0x03, 0x03}; +const __u8 nci_rf_deact_cmd[] = {0x21, 0x06, 0x01, 0x00}; +const __u8 nci_reset_rsp[] = {0x40, 0x00, 0x03, 0x00, 0x10, 0x01}; +const __u8 nci_reset_rsp_v2[] = {0x40, 0x00, 0x01, 0x00}; +const __u8 nci_reset_ntf[] = {0x60, 0x00, 0x09, 0x02, 0x01, 0x20, 0x0e, + 0x04, 0x61, 0x00, 0x04, 0x02}; +const __u8 nci_init_rsp[] = {0x40, 0x01, 0x14, 0x00, 0x02, 0x0e, 0x02, + 0x00, 0x03, 0x01, 0x02, 0x03, 0x02, 0xc8, + 0x00, 0xff, 0x10, 0x00, 0x0e, 0x12, 0x00, + 0x00, 0x04}; +const __u8 nci_init_rsp_v2[] = {0x40, 0x01, 0x1c, 0x00, 0x1a, 0x7e, 0x06, + 0x00, 0x02, 0x92, 0x04, 0xff, 0xff, 0x01, + 0x00, 0x40, 0x06, 0x00, 0x00, 0x01, 0x01, + 0x00, 0x02, 0x00, 0x03, 0x01, 0x01, 0x06, + 0x00, 0x80, 0x00}; +const __u8 nci_rf_disc_map_rsp[] = {0x41, 0x00, 0x01, 0x00}; +const __u8 nci_rf_disc_rsp[] = {0x41, 0x03, 0x01, 0x00}; +const __u8 nci_rf_deact_rsp[] = {0x41, 0x06, 0x01, 0x00}; + +struct msgtemplate { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[MAX_MSG_SIZE]; +}; + +static int create_nl_socket(void) +{ + int fd; + struct sockaddr_nl local; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (fd < 0) + return -1; + + memset(&local, 0, sizeof(local)); + local.nl_family = AF_NETLINK; + + if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) + goto error; + + return fd; +error: + close(fd); + return -1; +} + +static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, + __u8 genl_cmd, int nla_num, __u16 nla_type[], + void *nla_data[], int nla_len[]) +{ + struct sockaddr_nl nladdr; + struct msgtemplate msg; + struct nlattr *na; + int cnt, prv_len; + int r, buflen; + char *buf; + + msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); + msg.n.nlmsg_type = nlmsg_type; + msg.n.nlmsg_flags = NLM_F_REQUEST; + msg.n.nlmsg_seq = 0; + msg.n.nlmsg_pid = nlmsg_pid; + msg.g.cmd = genl_cmd; + msg.g.version = 0x1; + + prv_len = 0; + for (cnt = 0; cnt < nla_num; cnt++) { + na = (struct nlattr *)(GENLMSG_DATA(&msg) + prv_len); + na->nla_type = nla_type[cnt]; + na->nla_len = nla_len[cnt] + NLA_HDRLEN; + + if (nla_len > 0) + memcpy(NLA_DATA(na), nla_data[cnt], nla_len[cnt]); + + msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); + prv_len = na->nla_len; + } + + buf = (char *)&msg; + buflen = msg.n.nlmsg_len; + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + + while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *)&nladdr, + sizeof(nladdr))) < buflen) { + if (r > 0) { + buf += r; + buflen -= r; + } else if (errno != EAGAIN) { + return -1; + } + } + return 0; +} + +static int send_get_nfc_family(int sd, __u32 pid) +{ + __u16 nla_get_family_type = CTRL_ATTR_FAMILY_NAME; + void *nla_get_family_data; + int nla_get_family_len; + char family_name[100]; + + nla_get_family_len = strlen(NFC_GENL_NAME) + 1; + strcpy(family_name, NFC_GENL_NAME); + nla_get_family_data = family_name; + + return send_cmd_mt_nla(sd, GENL_ID_CTRL, pid, CTRL_CMD_GETFAMILY, + 1, &nla_get_family_type, + &nla_get_family_data, &nla_get_family_len); +} + +static int get_family_id(int sd, __u32 pid) +{ + struct { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[512]; + } ans; + struct nlattr *na; + int rep_len; + __u16 id; + int rc; + + rc = send_get_nfc_family(sd, pid); + + if (rc < 0) + return 0; + + rep_len = recv(sd, &ans, sizeof(ans), 0); + + if (ans.n.nlmsg_type == NLMSG_ERROR || rep_len < 0 || + !NLMSG_OK(&ans.n, rep_len)) + return 0; + + na = (struct nlattr *)GENLMSG_DATA(&ans); + na = (struct nlattr *)((char *)na + NLA_ALIGN(na->nla_len)); + if (na->nla_type == CTRL_ATTR_FAMILY_ID) + id = *(__u16 *)NLA_DATA(na); + + return id; +} + +static int send_cmd_with_idx(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, + __u8 genl_cmd, int dev_id) +{ + __u16 nla_type = NFC_ATTR_DEVICE_INDEX; + void *nla_data = &dev_id; + int nla_len = 4; + + return send_cmd_mt_nla(sd, nlmsg_type, nlmsg_pid, genl_cmd, 1, + &nla_type, &nla_data, &nla_len); +} + +static int get_nci_devid(int sd, __u16 fid, __u32 pid, int dev_id, struct msgtemplate *msg) +{ + int rc, rep_len; + + rc = send_cmd_with_idx(sd, fid, pid, NFC_CMD_GET_DEVICE, dev_id); + if (rc < 0) { + rc = -1; + goto error; + } + + rep_len = recv(sd, msg, sizeof(*msg), 0); + if (rep_len < 0) { + rc = -2; + goto error; + } + + if (msg->n.nlmsg_type == NLMSG_ERROR || + !NLMSG_OK(&msg->n, rep_len)) { + rc = -3; + goto error; + } + + return 0; +error: + return rc; +} + +static __u8 get_dev_enable_state(struct msgtemplate *msg) +{ + struct nlattr *na; + int rep_len; + int len; + + rep_len = GENLMSG_PAYLOAD(&msg->n); + na = (struct nlattr *)GENLMSG_DATA(msg); + len = 0; + + while (len < rep_len) { + len += NLA_ALIGN(na->nla_len); + if (na->nla_type == NFC_ATTR_DEVICE_POWERED) + return *(char *)NLA_DATA(na); + na = (struct nlattr *)(GENLMSG_DATA(msg) + len); + } + + return rep_len; +} + +FIXTURE(NCI) { + int virtual_nci_fd; + bool open_state; + int dev_idex; + bool isNCI2; + int proto; + __u32 pid; + __u16 fid; + int sd; +}; + +FIXTURE_VARIANT(NCI) { + bool isNCI2; +}; + +FIXTURE_VARIANT_ADD(NCI, NCI1_0) { + .isNCI2 = false, +}; + +FIXTURE_VARIANT_ADD(NCI, NCI2_0) { + .isNCI2 = true, +}; + +static void *virtual_dev_open(void *data) +{ + char buf[258]; + int dev_fd; + int len; + + dev_fd = *(int *)data; + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_reset_cmd)) + goto error; + if (memcmp(nci_reset_cmd, buf, len)) + goto error; + write(dev_fd, nci_reset_rsp, sizeof(nci_reset_rsp)); + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_init_cmd)) + goto error; + if (memcmp(nci_init_cmd, buf, len)) + goto error; + write(dev_fd, nci_init_rsp, sizeof(nci_init_rsp)); + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_rf_disc_map_cmd)) + goto error; + if (memcmp(nci_rf_disc_map_cmd, buf, len)) + goto error; + write(dev_fd, nci_rf_disc_map_rsp, sizeof(nci_rf_disc_map_rsp)); + + return (void *)0; +error: + return (void *)-1; +} + +static void *virtual_dev_open_v2(void *data) +{ + char buf[258]; + int dev_fd; + int len; + + dev_fd = *(int *)data; + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_reset_cmd)) + goto error; + if (memcmp(nci_reset_cmd, buf, len)) + goto error; + write(dev_fd, nci_reset_rsp_v2, sizeof(nci_reset_rsp_v2)); + write(dev_fd, nci_reset_ntf, sizeof(nci_reset_ntf)); + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_init_cmd_v2)) + goto error; + if (memcmp(nci_init_cmd_v2, buf, len)) + goto error; + write(dev_fd, nci_init_rsp_v2, sizeof(nci_init_rsp_v2)); + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_rf_disc_map_cmd)) + goto error; + if (memcmp(nci_rf_disc_map_cmd, buf, len)) + goto error; + write(dev_fd, nci_rf_disc_map_rsp, sizeof(nci_rf_disc_map_rsp)); + + return (void *)0; +error: + return (void *)-1; +} + +FIXTURE_SETUP(NCI) +{ + struct msgtemplate msg; + pthread_t thread_t; + int status; + int rc; + + self->open_state = false; + self->proto = VIRTUAL_NFC_PROTOCOLS; + self->isNCI2 = variant->isNCI2; + + self->sd = create_nl_socket(); + ASSERT_NE(self->sd, -1); + + self->pid = getpid(); + self->fid = get_family_id(self->sd, self->pid); + ASSERT_NE(self->fid, -1); + + self->virtual_nci_fd = open("/dev/virtual_nci", O_RDWR); + ASSERT_GT(self->virtual_nci_fd, -1); + + rc = ioctl(self->virtual_nci_fd, IOCTL_GET_NCIDEV_IDX, &self->dev_idex); + ASSERT_EQ(rc, 0); + + rc = get_nci_devid(self->sd, self->fid, self->pid, self->dev_idex, &msg); + ASSERT_EQ(rc, 0); + EXPECT_EQ(get_dev_enable_state(&msg), 0); + + if (self->isNCI2) + rc = pthread_create(&thread_t, NULL, virtual_dev_open_v2, + (void *)&self->virtual_nci_fd); + else + rc = pthread_create(&thread_t, NULL, virtual_dev_open, + (void *)&self->virtual_nci_fd); + ASSERT_GT(rc, -1); + + rc = send_cmd_with_idx(self->sd, self->fid, self->pid, + NFC_CMD_DEV_UP, self->dev_idex); + EXPECT_EQ(rc, 0); + + pthread_join(thread_t, (void **)&status); + ASSERT_EQ(status, 0); + self->open_state = true; +} + +static void *virtual_deinit(void *data) +{ + char buf[258]; + int dev_fd; + int len; + + dev_fd = *(int *)data; + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_reset_cmd)) + goto error; + if (memcmp(nci_reset_cmd, buf, len)) + goto error; + write(dev_fd, nci_reset_rsp, sizeof(nci_reset_rsp)); + + return (void *)0; +error: + return (void *)-1; +} + +static void *virtual_deinit_v2(void *data) +{ + char buf[258]; + int dev_fd; + int len; + + dev_fd = *(int *)data; + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_reset_cmd)) + goto error; + if (memcmp(nci_reset_cmd, buf, len)) + goto error; + write(dev_fd, nci_reset_rsp_v2, sizeof(nci_reset_rsp_v2)); + write(dev_fd, nci_reset_ntf, sizeof(nci_reset_ntf)); + + return (void *)0; +error: + return (void *)-1; +} + +FIXTURE_TEARDOWN(NCI) +{ + pthread_t thread_t; + int status; + int rc; + + if (self->open_state) { + if (self->isNCI2) + rc = pthread_create(&thread_t, NULL, + virtual_deinit_v2, + (void *)&self->virtual_nci_fd); + else + rc = pthread_create(&thread_t, NULL, virtual_deinit, + (void *)&self->virtual_nci_fd); + + ASSERT_GT(rc, -1); + rc = send_cmd_with_idx(self->sd, self->fid, self->pid, + NFC_CMD_DEV_DOWN, self->dev_idex); + EXPECT_EQ(rc, 0); + + pthread_join(thread_t, (void **)&status); + ASSERT_EQ(status, 0); + } + + close(self->sd); + close(self->virtual_nci_fd); + self->open_state = false; +} + +TEST_F(NCI, init) +{ + struct msgtemplate msg; + int rc; + + rc = get_nci_devid(self->sd, self->fid, self->pid, self->dev_idex, + &msg); + ASSERT_EQ(rc, 0); + EXPECT_EQ(get_dev_enable_state(&msg), 1); +} + +static void *virtual_poll_start(void *data) +{ + char buf[258]; + int dev_fd; + int len; + + dev_fd = *(int *)data; + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_rf_discovery_cmd)) + goto error; + if (memcmp(nci_rf_discovery_cmd, buf, len)) + goto error; + write(dev_fd, nci_rf_disc_rsp, sizeof(nci_rf_disc_rsp)) + ; + + return (void *)0; +error: + return (void *)-1; +} + +static void *virtual_poll_stop(void *data) +{ + char buf[258]; + int dev_fd; + int len; + + dev_fd = *(int *)data; + + while ((len = read(dev_fd, buf, 258)) == 0) + ; + if (len <= 0) + goto error; + if (len != sizeof(nci_rf_deact_cmd)) + goto error; + if (memcmp(nci_rf_deact_cmd, buf, len)) + goto error; + write(dev_fd, nci_rf_deact_rsp, sizeof(nci_rf_deact_rsp)); + + return (void *)0; +error: + return (void *)-1; +} + +TEST_F(NCI, start_poll) +{ + __u16 nla_start_poll_type[2] = {NFC_ATTR_DEVICE_INDEX, + NFC_ATTR_PROTOCOLS}; + void *nla_start_poll_data[2] = {&self->dev_idex, &self->proto}; + int nla_start_poll_len[2] = {4, 4}; + pthread_t thread_t; + int status; + int rc; + + rc = pthread_create(&thread_t, NULL, virtual_poll_start, + (void *)&self->virtual_nci_fd); + ASSERT_GT(rc, -1); + + rc = send_cmd_mt_nla(self->sd, self->fid, self->pid, + NFC_CMD_START_POLL, 2, nla_start_poll_type, + nla_start_poll_data, nla_start_poll_len); + EXPECT_EQ(rc, 0); + + pthread_join(thread_t, (void **)&status); + ASSERT_EQ(status, 0); + + rc = pthread_create(&thread_t, NULL, virtual_poll_stop, + (void *)&self->virtual_nci_fd); + ASSERT_GT(rc, -1); + + rc = send_cmd_with_idx(self->sd, self->fid, self->pid, + NFC_CMD_STOP_POLL, self->dev_idex); + EXPECT_EQ(rc, 0); + + pthread_join(thread_t, (void **)&status); + ASSERT_EQ(status, 0); +} + +TEST_F(NCI, deinit) +{ + struct msgtemplate msg; + pthread_t thread_t; + int status; + int rc; + + rc = get_nci_devid(self->sd, self->fid, self->pid, self->dev_idex, + &msg); + ASSERT_EQ(rc, 0); + EXPECT_EQ(get_dev_enable_state(&msg), 1); + + if (self->isNCI2) + rc = pthread_create(&thread_t, NULL, virtual_deinit_v2, + (void *)&self->virtual_nci_fd); + else + rc = pthread_create(&thread_t, NULL, virtual_deinit, + (void *)&self->virtual_nci_fd); + ASSERT_GT(rc, -1); + + rc = send_cmd_with_idx(self->sd, self->fid, self->pid, + NFC_CMD_DEV_DOWN, self->dev_idex); + EXPECT_EQ(rc, 0); + + pthread_join(thread_t, (void **)&status); + self->open_state = 0; + ASSERT_EQ(status, 0); + + rc = get_nci_devid(self->sd, self->fid, self->pid, self->dev_idex, + &msg); + ASSERT_EQ(rc, 0); + EXPECT_EQ(get_dev_enable_state(&msg), 0); +} + +TEST_HARNESS_MAIN -- GitLab From 8c85d18ce647ac2517a1a1bb01b02648e23700e6 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 27 Jan 2021 16:32:45 +0200 Subject: [PATCH 1709/2012] net/sched: cls_flower: Add match on the ct_state reply flag Add match on the ct_state reply flag. Example: $ tc filter add dev ens1f0_0 ingress prio 1 chain 1 proto ip flower \ ct_state +trk+est+rpl \ action mirred egress redirect dev ens1f0_1 $ tc filter add dev ens1f0_1 ingress prio 1 chain 1 proto ip flower \ ct_state +trk+est-rpl \ action mirred egress redirect dev ens1f0_0 Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_cls.h | 1 + net/sched/cls_flower.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 709668e264b06..afe6836e44b15 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -592,6 +592,7 @@ enum { TCA_FLOWER_KEY_CT_FLAGS_RELATED = 1 << 2, /* Related to an established connection. */ TCA_FLOWER_KEY_CT_FLAGS_TRACKED = 1 << 3, /* Conntrack has occurred. */ TCA_FLOWER_KEY_CT_FLAGS_INVALID = 1 << 4, /* Conntrack is invalid. */ + TCA_FLOWER_KEY_CT_FLAGS_REPLY = 1 << 5, /* Packet is in the reply direction. */ }; enum { diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 4a9297a89c770..caf7643e9c836 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -291,9 +291,11 @@ static u16 fl_ct_info_to_flower_map[] = { [IP_CT_RELATED] = TCA_FLOWER_KEY_CT_FLAGS_TRACKED | TCA_FLOWER_KEY_CT_FLAGS_RELATED, [IP_CT_ESTABLISHED_REPLY] = TCA_FLOWER_KEY_CT_FLAGS_TRACKED | - TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED, + TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | + TCA_FLOWER_KEY_CT_FLAGS_REPLY, [IP_CT_RELATED_REPLY] = TCA_FLOWER_KEY_CT_FLAGS_TRACKED | - TCA_FLOWER_KEY_CT_FLAGS_RELATED, + TCA_FLOWER_KEY_CT_FLAGS_RELATED | + TCA_FLOWER_KEY_CT_FLAGS_REPLY, [IP_CT_NEW] = TCA_FLOWER_KEY_CT_FLAGS_TRACKED | TCA_FLOWER_KEY_CT_FLAGS_NEW, }; -- GitLab From 941eff5aea5d4371fb8a496a66e29aa8fc7a0c23 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 27 Jan 2021 16:32:46 +0200 Subject: [PATCH 1710/2012] net: flow_offload: Add original direction flag to ct_metadata Give offloading drivers the direction of the offloaded ct flow, this will be used for matches on direction (ct_state +/-rpl). Signed-off-by: Paul Blakey Reviewed-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- include/net/flow_offload.h | 1 + net/sched/act_ct.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 123b1e9ea304a..e6bd8ebf9ac33 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -245,6 +245,7 @@ struct flow_action_entry { unsigned long cookie; u32 mark; u32 labels[4]; + bool orig_dir; } ct_metadata; struct { /* FLOW_ACTION_MPLS_PUSH */ u32 label; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index b3442078aabcd..f0a0aa125b00a 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -183,6 +183,7 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct, IP_CT_ESTABLISHED_REPLY; /* aligns with the CT reference on the SKB nf_ct_set */ entry->ct_metadata.cookie = (unsigned long)ct | ctinfo; + entry->ct_metadata.orig_dir = dir == IP_CT_DIR_ORIGINAL; act_ct_labels = entry->ct_metadata.labels; ct_labels = nf_ct_labels_find(ct); -- GitLab From 6895cb3a95c9988b9556f179dccc1ef693a981f7 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 27 Jan 2021 16:32:47 +0200 Subject: [PATCH 1711/2012] net/mlx5: CT: Add support for matching on ct_state reply flag Add support for matching on ct_state reply flag. Example: $ tc filter add dev ens1f0_0 ingress prio 1 chain 1 proto ip flower \ ct_state +trk+est+rpl \ action mirred egress redirect dev ens1f0_1 $ tc filter add dev ens1f0_1 ingress prio 1 chain 1 proto ip flower \ ct_state +trk+est-rpl \ action mirred egress redirect dev ens1f0_0 Signed-off-by: Paul Blakey Acked-by: Saeed Mahameed Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index a359c3c731062..e417904ae17f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -27,6 +27,7 @@ #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) #define MLX5_CT_STATE_TRK_BIT BIT(2) #define MLX5_CT_STATE_NAT_BIT BIT(3) +#define MLX5_CT_STATE_REPLY_BIT BIT(4) #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8) #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) @@ -641,6 +642,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, } ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT; + ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT; err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, ct_state, meta->ct_metadata.mark, @@ -1086,8 +1088,8 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, struct netlink_ext_ack *extack) { struct flow_rule *rule = flow_cls_offload_flow_rule(f); + bool trk, est, untrk, unest, new, rpl, unrpl; struct flow_dissector_key_ct *mask, *key; - bool trk, est, untrk, unest, new; u32 ctstate = 0, ctstate_mask = 0; u16 ct_state_on, ct_state_off; u16 ct_state, ct_state_mask; @@ -1113,9 +1115,10 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED | - TCA_FLOWER_KEY_CT_FLAGS_NEW)) { + TCA_FLOWER_KEY_CT_FLAGS_NEW | + TCA_FLOWER_KEY_CT_FLAGS_REPLY)) { NL_SET_ERR_MSG_MOD(extack, - "only ct_state trk, est and new are supported for offload"); + "only ct_state trk, est, new and rpl are supported for offload"); return -EOPNOTSUPP; } @@ -1124,13 +1127,17 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW; est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY; untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; + unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY; ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; + ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0; ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; + ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0; if (new) { NL_SET_ERR_MSG_MOD(extack, -- GitLab From bdbc13c204ee3e742289730618002ff9f21109bf Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 20 Jan 2021 02:53:28 +0200 Subject: [PATCH 1712/2012] net/mlx5: DR, Fix potential shift wrapping of 32-bit value Fix 32-bit variable shift wrapping in dr_ste_v0_get_miss_addr. Fixes: 6b93b400aa88 ("net/mlx5: DR, Move STEv0 setters and getters") Reported-by: Dan Carpenter Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c index b76fdff088907..9ec079247c4b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -248,8 +248,8 @@ static void dr_ste_v0_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) static u64 dr_ste_v0_get_miss_addr(u8 *hw_ste_p) { u64 index = - (MLX5_GET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6) | - MLX5_GET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32) << 26); + ((u64)MLX5_GET(ste_rx_steering_mult, hw_ste_p, miss_address_31_6) | + ((u64)MLX5_GET(ste_rx_steering_mult, hw_ste_p, miss_address_39_32)) << 26); return index << 6; } -- GitLab From 3a77c238909b354b25c8d58ea541c44e14030ba8 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 1 Dec 2020 15:30:59 +0200 Subject: [PATCH 1713/2012] net/mlx5: DR, Add match STEv1 structs to ifc Add mlx5_ifc_dr_ste_v1.h - a new header with HW specific STE structs for version 1. Signed-off-by: Alex Vesker Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mlx5/core/steering/mlx5_ifc_dr_ste_v1.h | 273 ++++++++++++++++++ 1 file changed, 273 insertions(+) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h new file mode 100644 index 0000000000000..6db7b8493fd98 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h @@ -0,0 +1,273 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */ + +#ifndef MLX5_IFC_DR_STE_V1_H +#define MLX5_IFC_DR_STE_V1_H + +struct mlx5_ifc_ste_eth_l2_src_v1_bits { + u8 reserved_at_0[0x1]; + u8 sx_sniffer[0x1]; + u8 functional_loopback[0x1]; + u8 ip_fragmented[0x1]; + u8 qp_type[0x2]; + u8 encapsulation_type[0x2]; + u8 port[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 first_vlan_qualifier[0x2]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_id[0xc]; + + u8 smac_47_16[0x20]; + + u8 smac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 reserved_at_60[0x6]; + u8 tcp_syn[0x1]; + u8 reserved_at_67[0x3]; + u8 force_loopback[0x1]; + u8 l2_ok[0x1]; + u8 l3_ok[0x1]; + u8 l4_ok[0x1]; + u8 second_vlan_qualifier[0x2]; + + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_dst_v1_bits { + u8 reserved_at_0[0x1]; + u8 sx_sniffer[0x1]; + u8 functional_lb[0x1]; + u8 ip_fragmented[0x1]; + u8 qp_type[0x2]; + u8 encapsulation_type[0x2]; + u8 port[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 first_vlan_qualifier[0x2]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_id[0xc]; + + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 reserved_at_60[0x6]; + u8 tcp_syn[0x1]; + u8 reserved_at_67[0x3]; + u8 force_lb[0x1]; + u8 l2_ok[0x1]; + u8 l3_ok[0x1]; + u8 l4_ok[0x1]; + u8 second_vlan_qualifier[0x2]; + u8 second_priority[0x3]; + u8 second_cfi[0x1]; + u8 second_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l2_src_dst_v1_bits { + u8 dmac_47_16[0x20]; + + u8 smac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 reserved_at_50[0x2]; + u8 functional_lb[0x1]; + u8 reserved_at_53[0x5]; + u8 port[0x2]; + u8 l3_type[0x2]; + u8 reserved_at_5c[0x2]; + u8 first_vlan_qualifier[0x2]; + + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_id[0xc]; + u8 smac_15_0[0x10]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_5_tuple_v1_bits { + u8 source_address[0x20]; + + u8 destination_address[0x20]; + + u8 source_port[0x10]; + u8 destination_port[0x10]; + + u8 reserved_at_60[0x4]; + u8 l4_ok[0x1]; + u8 l3_ok[0x1]; + u8 fragmented[0x1]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 dscp[0x6]; + u8 ecn[0x2]; + u8 protocol[0x8]; +}; + +struct mlx5_ifc_ste_eth_l2_tnl_v1_bits { + u8 l2_tunneling_network_id[0x20]; + + u8 dmac_47_16[0x20]; + + u8 dmac_15_0[0x10]; + u8 l3_ethertype[0x10]; + + u8 reserved_at_60[0x3]; + u8 ip_fragmented[0x1]; + u8 reserved_at_64[0x2]; + u8 encp_type[0x2]; + u8 reserved_at_68[0x2]; + u8 l3_type[0x2]; + u8 l4_type[0x2]; + u8 first_vlan_qualifier[0x2]; + u8 first_priority[0x3]; + u8 first_cfi[0x1]; + u8 first_vlan_id[0xc]; +}; + +struct mlx5_ifc_ste_eth_l3_ipv4_misc_v1_bits { + u8 identification[0x10]; + u8 flags[0x3]; + u8 fragment_offset[0xd]; + + u8 total_length[0x10]; + u8 checksum[0x10]; + + u8 version[0x4]; + u8 ihl[0x4]; + u8 time_to_live[0x8]; + u8 reserved_at_50[0x10]; + + u8 reserved_at_60[0x1c]; + u8 voq_internal_prio[0x4]; +}; + +struct mlx5_ifc_ste_eth_l4_v1_bits { + u8 ipv6_version[0x4]; + u8 reserved_at_4[0x4]; + u8 dscp[0x6]; + u8 ecn[0x2]; + u8 ipv6_hop_limit[0x8]; + u8 protocol[0x8]; + + u8 src_port[0x10]; + u8 dst_port[0x10]; + + u8 first_fragment[0x1]; + u8 reserved_at_41[0xb]; + u8 flow_label[0x14]; + + u8 tcp_data_offset[0x4]; + u8 l4_ok[0x1]; + u8 l3_ok[0x1]; + u8 fragmented[0x1]; + u8 tcp_ns[0x1]; + u8 tcp_cwr[0x1]; + u8 tcp_ece[0x1]; + u8 tcp_urg[0x1]; + u8 tcp_ack[0x1]; + u8 tcp_psh[0x1]; + u8 tcp_rst[0x1]; + u8 tcp_syn[0x1]; + u8 tcp_fin[0x1]; + u8 ipv6_paylen[0x10]; +}; + +struct mlx5_ifc_ste_eth_l4_misc_v1_bits { + u8 window_size[0x10]; + u8 urgent_pointer[0x10]; + + u8 ack_num[0x20]; + + u8 seq_num[0x20]; + + u8 length[0x10]; + u8 checksum[0x10]; +}; + +struct mlx5_ifc_ste_mpls_v1_bits { + u8 reserved_at_0[0x15]; + u8 mpls_ok[0x1]; + u8 mpls4_s_bit[0x1]; + u8 mpls4_qualifier[0x1]; + u8 mpls3_s_bit[0x1]; + u8 mpls3_qualifier[0x1]; + u8 mpls2_s_bit[0x1]; + u8 mpls2_qualifier[0x1]; + u8 mpls1_s_bit[0x1]; + u8 mpls1_qualifier[0x1]; + u8 mpls0_s_bit[0x1]; + u8 mpls0_qualifier[0x1]; + + u8 mpls0_label[0x14]; + u8 mpls0_exp[0x3]; + u8 mpls0_s_bos[0x1]; + u8 mpls0_ttl[0x8]; + + u8 mpls1_label[0x20]; + + u8 mpls2_label[0x20]; +}; + +struct mlx5_ifc_ste_gre_v1_bits { + u8 gre_c_present[0x1]; + u8 reserved_at_1[0x1]; + u8 gre_k_present[0x1]; + u8 gre_s_present[0x1]; + u8 strict_src_route[0x1]; + u8 recur[0x3]; + u8 flags[0x5]; + u8 version[0x3]; + u8 gre_protocol[0x10]; + + u8 reserved_at_20[0x20]; + + u8 gre_key_h[0x18]; + u8 gre_key_l[0x8]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_src_gvmi_qp_v1_bits { + u8 loopback_synd[0x8]; + u8 reserved_at_8[0x7]; + u8 functional_lb[0x1]; + u8 source_gvmi[0x10]; + + u8 force_lb[0x1]; + u8 reserved_at_21[0x1]; + u8 source_is_requestor[0x1]; + u8 reserved_at_23[0x5]; + u8 source_qp[0x18]; + + u8 reserved_at_40[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_ste_icmp_v1_bits { + u8 icmp_payload_data[0x20]; + + u8 icmp_header_data[0x20]; + + u8 icmp_type[0x8]; + u8 icmp_code[0x8]; + u8 reserved_at_50[0x10]; + + u8 reserved_at_60[0x20]; +}; + +#endif /* MLX5_IFC_DR_STE_V1_H */ -- GitLab From 10b69418641062b4dc7fabe3c6f2e12432ec6987 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 1 Dec 2020 15:37:57 +0200 Subject: [PATCH 1714/2012] net/mlx5: DR, Add HW STEv1 match logic Add STEv1 match logic to a new file. This file will be used for HW specific STEv1. Signed-off-by: Alex Vesker Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../mellanox/mlx5/core/steering/dr_ste.c | 2 +- .../mellanox/mlx5/core/steering/dr_ste.h | 1 + .../mellanox/mlx5/core/steering/dr_ste_v1.c | 908 ++++++++++++++++++ 4 files changed, 911 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index f61ee7110243c..8809dd4de57e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -85,7 +85,7 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o steering/dr_matcher.o steering/dr_rule.o \ steering/dr_icm_pool.o steering/dr_buddy.o \ steering/dr_ste.o steering/dr_send.o \ - steering/dr_ste_v0.o \ + steering/dr_ste_v0.o steering/dr_ste_v1.o \ steering/dr_cmd.o steering/dr_fw.o \ steering/dr_action.o steering/fs_dr.o # diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 1614481fdf8d6..0f318f409c91d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -1127,7 +1127,7 @@ void mlx5dr_ste_build_src_gvmi_qpn(struct mlx5dr_ste_ctx *ste_ctx, static struct mlx5dr_ste_ctx *mlx5dr_ste_ctx_arr[] = { [MLX5_STEERING_FORMAT_CONNECTX_5] = &ste_ctx_v0, - [MLX5_STEERING_FORMAT_CONNECTX_6DX] = NULL, + [MLX5_STEERING_FORMAT_CONNECTX_6DX] = &ste_ctx_v1, }; struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx(u8 version) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h index 4a3d6a8499912..0dc08fe1a9dba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h @@ -163,5 +163,6 @@ struct mlx5dr_ste_ctx { }; extern struct mlx5dr_ste_ctx ste_ctx_v0; +extern struct mlx5dr_ste_ctx ste_ctx_v1; #endif /* _DR_STE_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c new file mode 100644 index 0000000000000..bf3cb189f3bc9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -0,0 +1,908 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. */ + +#include +#include "mlx5_ifc_dr_ste_v1.h" +#include "dr_ste.h" + +#define DR_STE_CALC_DFNR_TYPE(lookup_type, inner) \ + ((inner) ? DR_STE_V1_LU_TYPE_##lookup_type##_I : \ + DR_STE_V1_LU_TYPE_##lookup_type##_O) + +enum dr_ste_v1_entry_format { + DR_STE_V1_TYPE_BWC_BYTE = 0x0, + DR_STE_V1_TYPE_BWC_DW = 0x1, + DR_STE_V1_TYPE_MATCH = 0x2, +}; + +/* Lookup type is built from 2B: [ Definer mode 1B ][ Definer index 1B ] */ +enum { + DR_STE_V1_LU_TYPE_NOP = 0x0000, + DR_STE_V1_LU_TYPE_ETHL2_TNL = 0x0002, + DR_STE_V1_LU_TYPE_IBL3_EXT = 0x0102, + DR_STE_V1_LU_TYPE_ETHL2_O = 0x0003, + DR_STE_V1_LU_TYPE_IBL4 = 0x0103, + DR_STE_V1_LU_TYPE_ETHL2_I = 0x0004, + DR_STE_V1_LU_TYPE_SRC_QP_GVMI = 0x0104, + DR_STE_V1_LU_TYPE_ETHL2_SRC_O = 0x0005, + DR_STE_V1_LU_TYPE_ETHL2_HEADERS_O = 0x0105, + DR_STE_V1_LU_TYPE_ETHL2_SRC_I = 0x0006, + DR_STE_V1_LU_TYPE_ETHL2_HEADERS_I = 0x0106, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x0007, + DR_STE_V1_LU_TYPE_IPV6_DES_O = 0x0107, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x0008, + DR_STE_V1_LU_TYPE_IPV6_DES_I = 0x0108, + DR_STE_V1_LU_TYPE_ETHL4_O = 0x0009, + DR_STE_V1_LU_TYPE_IPV6_SRC_O = 0x0109, + DR_STE_V1_LU_TYPE_ETHL4_I = 0x000a, + DR_STE_V1_LU_TYPE_IPV6_SRC_I = 0x010a, + DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_O = 0x000b, + DR_STE_V1_LU_TYPE_MPLS_O = 0x010b, + DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_I = 0x000c, + DR_STE_V1_LU_TYPE_MPLS_I = 0x010c, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_O = 0x000d, + DR_STE_V1_LU_TYPE_GRE = 0x010d, + DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x000e, + DR_STE_V1_LU_TYPE_GENERAL_PURPOSE = 0x010e, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_I = 0x000f, + DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0 = 0x010f, + DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1 = 0x0110, + DR_STE_V1_LU_TYPE_FLEX_PARSER_0 = 0x0111, + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 = 0x0112, + DR_STE_V1_LU_TYPE_ETHL4_MISC_O = 0x0113, + DR_STE_V1_LU_TYPE_ETHL4_MISC_I = 0x0114, + DR_STE_V1_LU_TYPE_INVALID = 0x00ff, + DR_STE_V1_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE, +}; + +static void dr_ste_v1_build_eth_l2_src_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, dmac_15_0, mask, dmac_15_0); + + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, smac_47_16, mask, smac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, smac_15_0, mask, smac_15_0); + + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_TAG(eth_l2_src_dst_v1, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_ONES(eth_l2_src_dst_v1, bit_mask, l3_type, mask, ip_version); + + if (mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst_v1, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + } else if (mask->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst_v1, bit_mask, first_vlan_qualifier, -1); + mask->svlan_tag = 0; + } +} + +static int dr_ste_v1_build_eth_l2_src_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, dmac_15_0, spec, dmac_15_0); + + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, smac_47_16, spec, smac_47_16); + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, smac_15_0, spec, smac_15_0); + + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src_dst_v1, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src_dst_v1, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else if (spec->ip_version) { + return -EINVAL; + } + + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src_dst_v1, tag, first_priority, spec, first_prio); + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_dst_v1, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src_dst_v1, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + return 0; +} + +static void dr_ste_v1_build_eth_l2_src_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l2_src_dst_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL2_SRC_DST, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_src_dst_tag; +} + +static int dr_ste_v1_build_eth_l3_ipv6_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_127_96, spec, dst_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_95_64, spec, dst_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_63_32, spec, dst_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_dst, tag, dst_ip_31_0, spec, dst_ip_31_0); + + return 0; +} + +static void dr_ste_v1_build_eth_l3_ipv6_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l3_ipv6_dst_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(IPV6_DES, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv6_dst_tag; +} + +static int dr_ste_v1_build_eth_l3_ipv6_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_127_96, spec, src_ip_127_96); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_95_64, spec, src_ip_95_64); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_63_32, spec, src_ip_63_32); + DR_STE_SET_TAG(eth_l3_ipv6_src, tag, src_ip_31_0, spec, src_ip_31_0); + + return 0; +} + +static void dr_ste_v1_build_eth_l3_ipv6_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l3_ipv6_src_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(IPV6_SRC, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv6_src_tag; +} + +static int dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, destination_address, spec, dst_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, source_address, spec, src_ip_31_0); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, destination_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, destination_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, source_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, source_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l3_ipv4_5_tuple_v1, tag, ecn, spec, ip_ecn); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l3_ipv4_5_tuple_v1, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +static void dr_ste_v1_build_eth_l3_ipv4_5_tuple_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL3_IPV4_5_TUPLE, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_tag; +} + +static void dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, ip_fragmented, mask, frag); // ? + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, l3_ethertype, mask, ethertype); // ? + DR_STE_SET_ONES(eth_l2_src_v1, bit_mask, l3_type, mask, ip_version); + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } + + if (inner) { + if (misc_mask->inner_second_cvlan_tag || + misc_mask->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, bit_mask, second_vlan_qualifier, -1); + misc_mask->inner_second_cvlan_tag = 0; + misc_mask->inner_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_vlan_id, misc_mask, inner_second_vid); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_cfi, misc_mask, inner_second_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_priority, misc_mask, inner_second_prio); + } else { + if (misc_mask->outer_second_cvlan_tag || + misc_mask->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, bit_mask, second_vlan_qualifier, -1); + misc_mask->outer_second_cvlan_tag = 0; + misc_mask->outer_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_vlan_id, misc_mask, outer_second_vid); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_cfi, misc_mask, outer_second_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, + second_priority, misc_mask, outer_second_prio); + } +} + +static int dr_ste_v1_build_eth_l2_src_or_dst_tag(struct mlx5dr_match_param *value, + bool inner, u8 *tag) +{ + struct mlx5dr_match_spec *spec = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc_spec = &value->misc; + + DR_STE_SET_TAG(eth_l2_src_v1, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_src_v1, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_src_v1, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_src_v1, tag, l3_ethertype, spec, ethertype); + + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_src_v1, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_src_v1, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else if (spec->ip_version) { + return -EINVAL; + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (inner) { + if (misc_spec->inner_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->inner_second_cvlan_tag = 0; + } else if (misc_spec->inner_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->inner_second_svlan_tag = 0; + } + + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_vlan_id, misc_spec, inner_second_vid); + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_cfi, misc_spec, inner_second_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_priority, misc_spec, inner_second_prio); + } else { + if (misc_spec->outer_second_cvlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_CVLAN); + misc_spec->outer_second_cvlan_tag = 0; + } else if (misc_spec->outer_second_svlan_tag) { + MLX5_SET(ste_eth_l2_src_v1, tag, second_vlan_qualifier, DR_STE_SVLAN); + misc_spec->outer_second_svlan_tag = 0; + } + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_vlan_id, misc_spec, outer_second_vid); + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_cfi, misc_spec, outer_second_cfi); + DR_STE_SET_TAG(eth_l2_src_v1, tag, second_priority, misc_spec, outer_second_prio); + } + + return 0; +} + +static void dr_ste_v1_build_eth_l2_src_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, smac_47_16, mask, smac_47_16); + DR_STE_SET_TAG(eth_l2_src_v1, bit_mask, smac_15_0, mask, smac_15_0); + + dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask); +} + +static int dr_ste_v1_build_eth_l2_src_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_src_v1, tag, smac_47_16, spec, smac_47_16); + DR_STE_SET_TAG(eth_l2_src_v1, tag, smac_15_0, spec, smac_15_0); + + return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); +} + +static void dr_ste_v1_build_eth_l2_src_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l2_src_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL2_SRC, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_src_tag; +} + +static void dr_ste_v1_build_eth_l2_dst_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_dst_v1, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_TAG(eth_l2_dst_v1, bit_mask, dmac_15_0, mask, dmac_15_0); + + dr_ste_v1_build_eth_l2_src_or_dst_bit_mask(value, inner, bit_mask); +} + +static int dr_ste_v1_build_eth_l2_dst_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l2_dst_v1, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_dst_v1, tag, dmac_15_0, spec, dmac_15_0); + + return dr_ste_v1_build_eth_l2_src_or_dst_tag(value, sb->inner, tag); +} + +static void dr_ste_v1_build_eth_l2_dst_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l2_dst_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL2, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_dst_tag; +} + +static void dr_ste_v1_build_eth_l2_tnl_bit_mask(struct mlx5dr_match_param *value, + bool inner, u8 *bit_mask) +{ + struct mlx5dr_match_spec *mask = inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, dmac_47_16, mask, dmac_47_16); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, dmac_15_0, mask, dmac_15_0); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, first_vlan_id, mask, first_vid); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, first_cfi, mask, first_cfi); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, first_priority, mask, first_prio); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, ip_fragmented, mask, frag); + DR_STE_SET_TAG(eth_l2_tnl_v1, bit_mask, l3_ethertype, mask, ethertype); + DR_STE_SET_ONES(eth_l2_tnl_v1, bit_mask, l3_type, mask, ip_version); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl_v1, bit_mask, + l2_tunneling_network_id, (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (mask->svlan_tag || mask->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl_v1, bit_mask, first_vlan_qualifier, -1); + mask->cvlan_tag = 0; + mask->svlan_tag = 0; + } +} + +static int dr_ste_v1_build_eth_l2_tnl_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, dmac_47_16, spec, dmac_47_16); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, dmac_15_0, spec, dmac_15_0); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, first_vlan_id, spec, first_vid); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, first_cfi, spec, first_cfi); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, ip_fragmented, spec, frag); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, first_priority, spec, first_prio); + DR_STE_SET_TAG(eth_l2_tnl_v1, tag, l3_ethertype, spec, ethertype); + + if (misc->vxlan_vni) { + MLX5_SET(ste_eth_l2_tnl_v1, tag, l2_tunneling_network_id, + (misc->vxlan_vni << 8)); + misc->vxlan_vni = 0; + } + + if (spec->cvlan_tag) { + MLX5_SET(ste_eth_l2_tnl_v1, tag, first_vlan_qualifier, DR_STE_CVLAN); + spec->cvlan_tag = 0; + } else if (spec->svlan_tag) { + MLX5_SET(ste_eth_l2_tnl_v1, tag, first_vlan_qualifier, DR_STE_SVLAN); + spec->svlan_tag = 0; + } + + if (spec->ip_version == IP_VERSION_IPV4) { + MLX5_SET(ste_eth_l2_tnl_v1, tag, l3_type, STE_IPV4); + spec->ip_version = 0; + } else if (spec->ip_version == IP_VERSION_IPV6) { + MLX5_SET(ste_eth_l2_tnl_v1, tag, l3_type, STE_IPV6); + spec->ip_version = 0; + } else if (spec->ip_version) { + return -EINVAL; + } + + return 0; +} + +static void dr_ste_v1_build_eth_l2_tnl_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l2_tnl_bit_mask(mask, sb->inner, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_ETHL2_TNL; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l2_tnl_tag; +} + +static int dr_ste_v1_build_eth_l3_ipv4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + + DR_STE_SET_TAG(eth_l3_ipv4_misc_v1, tag, time_to_live, spec, ttl_hoplimit); + + return 0; +} + +static void dr_ste_v1_build_eth_l3_ipv4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l3_ipv4_misc_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL3_IPV4_MISC, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l3_ipv4_misc_tag; +} + +static int dr_ste_v1_build_eth_ipv6_l3_l4_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_spec *spec = sb->inner ? &value->inner : &value->outer; + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(eth_l4_v1, tag, dst_port, spec, tcp_dport); + DR_STE_SET_TAG(eth_l4_v1, tag, src_port, spec, tcp_sport); + DR_STE_SET_TAG(eth_l4_v1, tag, dst_port, spec, udp_dport); + DR_STE_SET_TAG(eth_l4_v1, tag, src_port, spec, udp_sport); + DR_STE_SET_TAG(eth_l4_v1, tag, protocol, spec, ip_protocol); + DR_STE_SET_TAG(eth_l4_v1, tag, fragmented, spec, frag); + DR_STE_SET_TAG(eth_l4_v1, tag, dscp, spec, ip_dscp); + DR_STE_SET_TAG(eth_l4_v1, tag, ecn, spec, ip_ecn); + DR_STE_SET_TAG(eth_l4_v1, tag, ipv6_hop_limit, spec, ttl_hoplimit); + + if (sb->inner) + DR_STE_SET_TAG(eth_l4_v1, tag, flow_label, misc, inner_ipv6_flow_label); + else + DR_STE_SET_TAG(eth_l4_v1, tag, flow_label, misc, outer_ipv6_flow_label); + + if (spec->tcp_flags) { + DR_STE_SET_TCP_FLAGS(eth_l4_v1, tag, spec); + spec->tcp_flags = 0; + } + + return 0; +} + +static void dr_ste_v1_build_eth_ipv6_l3_l4_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_ipv6_l3_l4_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(ETHL4, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_ipv6_l3_l4_tag; +} + +static int dr_ste_v1_build_mpls_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + if (sb->inner) + DR_STE_SET_MPLS(mpls_v1, misc2, inner, tag); + else + DR_STE_SET_MPLS(mpls_v1, misc2, outer, tag); + + return 0; +} + +static void dr_ste_v1_build_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_mpls_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_CALC_DFNR_TYPE(MPLS, sb->inner); + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_mpls_tag; +} + +static int dr_ste_v1_build_tnl_gre_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(gre_v1, tag, gre_protocol, misc, gre_protocol); + DR_STE_SET_TAG(gre_v1, tag, gre_k_present, misc, gre_k_present); + DR_STE_SET_TAG(gre_v1, tag, gre_key_h, misc, gre_key_h); + DR_STE_SET_TAG(gre_v1, tag, gre_key_l, misc, gre_key_l); + + DR_STE_SET_TAG(gre_v1, tag, gre_c_present, misc, gre_c_present); + DR_STE_SET_TAG(gre_v1, tag, gre_s_present, misc, gre_s_present); + + return 0; +} + +static void dr_ste_v1_build_tnl_gre_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_gre_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_GRE; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_gre_tag; +} + +static int dr_ste_v1_build_tnl_mpls_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + if (DR_STE_IS_OUTER_MPLS_OVER_GRE_SET(misc2)) { + DR_STE_SET_TAG(mpls_v1, tag, mpls0_label, + misc2, outer_first_mpls_over_gre_label); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_exp, + misc2, outer_first_mpls_over_gre_exp); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_s_bos, + misc2, outer_first_mpls_over_gre_s_bos); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_ttl, + misc2, outer_first_mpls_over_gre_ttl); + } else { + DR_STE_SET_TAG(mpls_v1, tag, mpls0_label, + misc2, outer_first_mpls_over_udp_label); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_exp, + misc2, outer_first_mpls_over_udp_exp); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_s_bos, + misc2, outer_first_mpls_over_udp_s_bos); + + DR_STE_SET_TAG(mpls_v1, tag, mpls0_ttl, + misc2, outer_first_mpls_over_udp_ttl); + } + + return 0; +} + +static void dr_ste_v1_build_tnl_mpls_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_tnl_mpls_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_MPLS_I; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_tnl_mpls_tag; +} + +static int dr_ste_v1_build_icmp_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + bool is_ipv4 = DR_MASK_IS_ICMPV4_SET(misc3); + u32 *icmp_header_data; + u8 *icmp_type; + u8 *icmp_code; + + if (is_ipv4) { + icmp_header_data = &misc3->icmpv4_header_data; + icmp_type = &misc3->icmpv4_type; + icmp_code = &misc3->icmpv4_code; + } else { + icmp_header_data = &misc3->icmpv6_header_data; + icmp_type = &misc3->icmpv6_type; + icmp_code = &misc3->icmpv6_code; + } + + MLX5_SET(ste_icmp_v1, tag, icmp_header_data, *icmp_header_data); + MLX5_SET(ste_icmp_v1, tag, icmp_type, *icmp_type); + MLX5_SET(ste_icmp_v1, tag, icmp_code, *icmp_code); + + *icmp_header_data = 0; + *icmp_type = 0; + *icmp_code = 0; + + return 0; +} + +static int dr_ste_v1_build_icmp_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_icmp_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_ETHL4_MISC_O; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_icmp_tag; + + return 0; +} + +static int dr_ste_v1_build_general_purpose_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + DR_STE_SET_TAG(general_purpose, tag, general_purpose_lookup_field, + misc2, metadata_reg_a); + + return 0; +} + +static void dr_ste_v1_build_general_purpose_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_general_purpose_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_GENERAL_PURPOSE; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_general_purpose_tag; +} + +static int dr_ste_v1_build_eth_l4_misc_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + if (sb->inner) { + DR_STE_SET_TAG(eth_l4_misc_v1, tag, seq_num, misc3, inner_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc_v1, tag, ack_num, misc3, inner_tcp_ack_num); + } else { + DR_STE_SET_TAG(eth_l4_misc_v1, tag, seq_num, misc3, outer_tcp_seq_num); + DR_STE_SET_TAG(eth_l4_misc_v1, tag, ack_num, misc3, outer_tcp_ack_num); + } + + return 0; +} + +static void dr_ste_v1_build_eth_l4_misc_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_eth_l4_misc_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_ETHL4_MISC_O; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_eth_l4_misc_tag; +} + +static int +dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc3 *misc3 = &value->misc3; + + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_flags, misc3, + outer_vxlan_gpe_flags); + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_next_protocol, misc3, + outer_vxlan_gpe_next_protocol); + DR_STE_SET_TAG(flex_parser_tnl_vxlan_gpe, tag, + outer_vxlan_gpe_vni, misc3, + outer_vxlan_gpe_vni); + + return 0; +} + +static void +dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_tag; +} + +static int +dr_ste_v1_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc *misc = &value->misc; + + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_protocol_type, misc, geneve_protocol_type); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_oam, misc, geneve_oam); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_opt_len, misc, geneve_opt_len); + DR_STE_SET_TAG(flex_parser_tnl_geneve, tag, + geneve_vni, misc, geneve_vni); + + return 0; +} + +static void +dr_ste_v1_build_flex_parser_tnl_geneve_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_flex_parser_tnl_geneve_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_flex_parser_tnl_geneve_tag; +} + +static int dr_ste_v1_build_register_0_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + DR_STE_SET_TAG(register_0, tag, register_0_h, misc2, metadata_reg_c_0); + DR_STE_SET_TAG(register_0, tag, register_0_l, misc2, metadata_reg_c_1); + DR_STE_SET_TAG(register_0, tag, register_1_h, misc2, metadata_reg_c_2); + DR_STE_SET_TAG(register_0, tag, register_1_l, misc2, metadata_reg_c_3); + + return 0; +} + +static void dr_ste_v1_build_register_0_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_register_0_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_register_0_tag; +} + +static int dr_ste_v1_build_register_1_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc2 *misc2 = &value->misc2; + + DR_STE_SET_TAG(register_1, tag, register_2_h, misc2, metadata_reg_c_4); + DR_STE_SET_TAG(register_1, tag, register_2_l, misc2, metadata_reg_c_5); + DR_STE_SET_TAG(register_1, tag, register_3_h, misc2, metadata_reg_c_6); + DR_STE_SET_TAG(register_1, tag, register_3_l, misc2, metadata_reg_c_7); + + return 0; +} + +static void dr_ste_v1_build_register_1_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_register_1_tag(mask, sb, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_register_1_tag; +} + +static void dr_ste_v1_build_src_gvmi_qpn_bit_mask(struct mlx5dr_match_param *value, + u8 *bit_mask) +{ + struct mlx5dr_match_misc *misc_mask = &value->misc; + + DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_gvmi, misc_mask, source_port); + DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_qp, misc_mask, source_sqn); +} + +static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, + struct mlx5dr_ste_build *sb, + u8 *tag) +{ + struct mlx5dr_match_misc *misc = &value->misc; + struct mlx5dr_cmd_vport_cap *vport_cap; + struct mlx5dr_domain *dmn = sb->dmn; + struct mlx5dr_cmd_caps *caps; + u8 *bit_mask = sb->bit_mask; + + DR_STE_SET_TAG(src_gvmi_qp_v1, tag, source_qp, misc, source_sqn); + + if (sb->vhca_id_valid) { + /* Find port GVMI based on the eswitch_owner_vhca_id */ + if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi) + caps = &dmn->info.caps; + else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id == + dmn->peer_dmn->info.caps.gvmi)) + caps = &dmn->peer_dmn->info.caps; + else + return -EINVAL; + + misc->source_eswitch_owner_vhca_id = 0; + } else { + caps = &dmn->info.caps; + } + + if (!MLX5_GET(ste_src_gvmi_qp_v1, bit_mask, source_gvmi)) + return 0; + + vport_cap = mlx5dr_get_vport_cap(caps, misc->source_port); + if (!vport_cap) { + mlx5dr_err(dmn, "Vport 0x%x is disabled or invalid\n", + misc->source_port); + return -EINVAL; + } + + if (vport_cap->vport_gvmi) + MLX5_SET(ste_src_gvmi_qp_v1, tag, source_gvmi, vport_cap->vport_gvmi); + + misc->source_port = 0; + return 0; +} + +static void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask) +{ + dr_ste_v1_build_src_gvmi_qpn_bit_mask(mask, sb->bit_mask); + + sb->lu_type = DR_STE_V1_LU_TYPE_SRC_QP_GVMI; + sb->byte_mask = mlx5dr_ste_conv_bit_to_byte_mask(sb->bit_mask); + sb->ste_build_tag_func = &dr_ste_v1_build_src_gvmi_qpn_tag; +} + +struct mlx5dr_ste_ctx ste_ctx_v1 = { + .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init, + .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init, + .build_eth_l3_ipv6_dst_init = &dr_ste_v1_build_eth_l3_ipv6_dst_init, + .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_init, + .build_eth_l2_src_init = &dr_ste_v1_build_eth_l2_src_init, + .build_eth_l2_dst_init = &dr_ste_v1_build_eth_l2_dst_init, + .build_eth_l2_tnl_init = &dr_ste_v1_build_eth_l2_tnl_init, + .build_eth_l3_ipv4_misc_init = &dr_ste_v1_build_eth_l3_ipv4_misc_init, + .build_eth_ipv6_l3_l4_init = &dr_ste_v1_build_eth_ipv6_l3_l4_init, + .build_mpls_init = &dr_ste_v1_build_mpls_init, + .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init, + .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init, + .build_icmp_init = &dr_ste_v1_build_icmp_init, + .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init, + .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init, + .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init, + .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init, + .build_register_0_init = &dr_ste_v1_build_register_0_init, + .build_register_1_init = &dr_ste_v1_build_register_1_init, + .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init, +}; -- GitLab From 9f125ced1750ecf299dc52771410d823a36fbbfd Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 22 Sep 2020 04:23:25 +0300 Subject: [PATCH 1715/2012] net/mlx5: DR, Allow native protocol support for HW STEv1 Some flex parser protocols are native as part of STEv1. The check for supported protocols was modified to allow this. Signed-off-by: Alex Vesker Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/dr_matcher.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index e3a002983c262..15673cd10039b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -113,7 +113,8 @@ dr_mask_is_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3) static bool dr_matcher_supp_vxlan_gpe(struct mlx5dr_cmd_caps *caps) { - return caps->flex_protocols & MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED; + return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) || + (caps->flex_protocols & MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED); } static bool @@ -135,7 +136,8 @@ static bool dr_mask_is_tnl_geneve_set(struct mlx5dr_match_misc *misc) static bool dr_matcher_supp_tnl_geneve(struct mlx5dr_cmd_caps *caps) { - return caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_ENABLED; + return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) || + (caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_ENABLED); } static bool @@ -148,12 +150,14 @@ dr_mask_is_tnl_geneve(struct mlx5dr_match_param *mask, static int dr_matcher_supp_icmp_v4(struct mlx5dr_cmd_caps *caps) { - return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED; + return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) || + (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED); } static int dr_matcher_supp_icmp_v6(struct mlx5dr_cmd_caps *caps) { - return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED; + return (caps->sw_format_ver == MLX5_STEERING_FORMAT_CONNECTX_6DX) || + (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED); } static bool dr_mask_is_icmpv6_set(struct mlx5dr_match_misc3 *misc3) -- GitLab From a6098129c781a7b0fe0d518281bce99b60fe7203 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 22 Sep 2020 04:23:31 +0300 Subject: [PATCH 1716/2012] net/mlx5: DR, Add STEv1 setters and getters Add HW specific setter and getters to STEv1 file. Since STEv0 and STEv1 format are different, each version should implemented different setters and getters. Signed-off-by: Alex Vesker Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/dr_ste_v1.c | 76 +++++++++++++++++++ .../mlx5/core/steering/mlx5_ifc_dr_ste_v1.h | 57 ++++++++++++++ 2 files changed, 133 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index bf3cb189f3bc9..522909f1cab68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -55,6 +55,72 @@ enum { DR_STE_V1_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE, }; +static void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) +{ + u64 index = miss_addr >> 6; + + MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32, index >> 26); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6, index); +} + +static u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p) +{ + u64 index = + (MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) | + MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32) << 26); + + return index << 6; +} + +static void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, byte_mask, byte_mask); +} + +static u16 dr_ste_v1_get_byte_mask(u8 *hw_ste_p) +{ + return MLX5_GET(ste_match_bwc_v1, hw_ste_p, byte_mask); +} + +static void dr_ste_v1_set_lu_type(u8 *hw_ste_p, u16 lu_type) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, lu_type >> 8); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, match_definer_ctx_idx, lu_type & 0xFF); +} + +static void dr_ste_v1_set_next_lu_type(u8 *hw_ste_p, u16 lu_type) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_entry_format, lu_type >> 8); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx, lu_type & 0xFF); +} + +static u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p) +{ + u8 mode = MLX5_GET(ste_match_bwc_v1, hw_ste_p, next_entry_format); + u8 index = MLX5_GET(ste_match_bwc_v1, hw_ste_p, hash_definer_ctx_idx); + + return (mode << 8 | index); +} + +static void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size) +{ + u64 index = (icm_addr >> 5) | ht_size; + + MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_39_32_size, index >> 27); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_31_5_size, index); +} + +static void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, + u8 entry_type, u16 gvmi) +{ + dr_ste_v1_set_lu_type(hw_ste_p, lu_type); + dr_ste_v1_set_next_lu_type(hw_ste_p, MLX5DR_STE_LU_TYPE_DONT_CARE); + + MLX5_SET(ste_match_bwc_v1, hw_ste_p, gvmi, gvmi); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_63_48, gvmi); + MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_63_48, gvmi); +} + static void dr_ste_v1_build_eth_l2_src_dst_bit_mask(struct mlx5dr_match_param *value, bool inner, u8 *bit_mask) { @@ -885,6 +951,7 @@ static void dr_ste_v1_build_src_gvmi_qpn_init(struct mlx5dr_ste_build *sb, } struct mlx5dr_ste_ctx ste_ctx_v1 = { + /* Builders */ .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init, .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init, .build_eth_l3_ipv6_dst_init = &dr_ste_v1_build_eth_l3_ipv6_dst_init, @@ -905,4 +972,13 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = { .build_register_0_init = &dr_ste_v1_build_register_0_init, .build_register_1_init = &dr_ste_v1_build_register_1_init, .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init, + /* Getters and Setters */ + .ste_init = &dr_ste_v1_init, + .set_next_lu_type = &dr_ste_v1_set_next_lu_type, + .get_next_lu_type = &dr_ste_v1_get_next_lu_type, + .set_miss_addr = &dr_ste_v1_set_miss_addr, + .get_miss_addr = &dr_ste_v1_get_miss_addr, + .set_hit_addr = &dr_ste_v1_set_hit_addr, + .set_byte_mask = &dr_ste_v1_set_byte_mask, + .get_byte_mask = &dr_ste_v1_get_byte_mask, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h index 6db7b8493fd98..678b048e70223 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h @@ -4,6 +4,63 @@ #ifndef MLX5_IFC_DR_STE_V1_H #define MLX5_IFC_DR_STE_V1_H +struct mlx5_ifc_ste_match_bwc_v1_bits { + u8 entry_format[0x8]; + u8 counter_id[0x18]; + + u8 miss_address_63_48[0x10]; + u8 match_definer_ctx_idx[0x8]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 reserved_at_5a[0x1]; + u8 match_polarity[0x1]; + u8 reparse[0x1]; + u8 reserved_at_5d[0x3]; + + u8 next_table_base_63_48[0x10]; + u8 hash_definer_ctx_idx[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 hash_type[0x2]; + u8 hash_after_actions[0x1]; + u8 reserved_at_9e[0x2]; + + u8 byte_mask[0x10]; + u8 next_entry_format[0x1]; + u8 mask_mode[0x1]; + u8 gvmi[0xe]; + + u8 action[0x40]; +}; + +struct mlx5_ifc_ste_mask_and_match_v1_bits { + u8 entry_format[0x8]; + u8 counter_id[0x18]; + + u8 miss_address_63_48[0x10]; + u8 match_definer_ctx_idx[0x8]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 reserved_at_5a[0x1]; + u8 match_polarity[0x1]; + u8 reparse[0x1]; + u8 reserved_at_5d[0x3]; + + u8 next_table_base_63_48[0x10]; + u8 hash_definer_ctx_idx[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 hash_type[0x2]; + u8 hash_after_actions[0x1]; + u8 reserved_at_9e[0x2]; + + u8 action[0x60]; +}; + struct mlx5_ifc_ste_eth_l2_src_v1_bits { u8 reserved_at_0[0x1]; u8 sx_sniffer[0x1]; -- GitLab From 4e856c5db9b4d6601337dd5e3ea72a6931e2469b Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 22 Sep 2020 04:23:42 +0300 Subject: [PATCH 1717/2012] net/mlx5: DR, Add STEv1 action apply logic Add HW specific action apply logic to STEv1. Since STEv0 and STEv1 actions format is different, each version has its implementation. Signed-off-by: Alex Vesker Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/dr_ste_v1.c | 358 ++++++++++++++++++ .../mlx5/core/steering/mlx5_ifc_dr_ste_v1.h | 100 +++++ 2 files changed, 458 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index 522909f1cab68..02bb3a28cfa1e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -55,6 +55,51 @@ enum { DR_STE_V1_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE, }; +enum dr_ste_v1_header_anchors { + DR_STE_HEADER_ANCHOR_START_OUTER = 0x00, + DR_STE_HEADER_ANCHOR_1ST_VLAN = 0x02, + DR_STE_HEADER_ANCHOR_IPV6_IPV4 = 0x07, + DR_STE_HEADER_ANCHOR_INNER_MAC = 0x13, + DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4 = 0x19, +}; + +enum dr_ste_v1_action_size { + DR_STE_ACTION_SINGLE_SZ = 4, + DR_STE_ACTION_DOUBLE_SZ = 8, + DR_STE_ACTION_TRIPLE_SZ = 12, +}; + +enum dr_ste_v1_action_insert_ptr_attr { + DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE = 0, /* Regular push header (e.g. push vlan) */ + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP = 1, /* Encapsulation / Tunneling */ + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ESP = 2, /* IPsec */ +}; + +enum dr_ste_v1_action_id { + DR_STE_V1_ACTION_ID_NOP = 0x00, + DR_STE_V1_ACTION_ID_COPY = 0x05, + DR_STE_V1_ACTION_ID_SET = 0x06, + DR_STE_V1_ACTION_ID_ADD = 0x07, + DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE = 0x08, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER = 0x09, + DR_STE_V1_ACTION_ID_INSERT_INLINE = 0x0a, + DR_STE_V1_ACTION_ID_INSERT_POINTER = 0x0b, + DR_STE_V1_ACTION_ID_FLOW_TAG = 0x0c, + DR_STE_V1_ACTION_ID_QUEUE_ID_SEL = 0x0d, + DR_STE_V1_ACTION_ID_ACCELERATED_LIST = 0x0e, + DR_STE_V1_ACTION_ID_MODIFY_LIST = 0x0f, + DR_STE_V1_ACTION_ID_TRAILER = 0x13, + DR_STE_V1_ACTION_ID_COUNTER_ID = 0x14, + DR_STE_V1_ACTION_ID_MAX = 0x21, + /* use for special cases */ + DR_STE_V1_ACTION_ID_SPECIAL_ENCAP_L3 = 0x22, +}; + +static void dr_ste_v1_set_entry_type(u8 *hw_ste_p, u8 entry_type) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, entry_type); +} + static void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) { u64 index = miss_addr >> 6; @@ -102,6 +147,11 @@ static u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p) return (mode << 8 | index); } +static void dr_ste_v1_set_hit_gvmi(u8 *hw_ste_p, u16 gvmi) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, next_table_base_63_48, gvmi); +} + static void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size) { u64 index = (icm_addr >> 5) | ht_size; @@ -121,6 +171,311 @@ static void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_63_48, gvmi); } +static void dr_ste_v1_set_rx_flow_tag(u8 *s_action, u32 flow_tag) +{ + MLX5_SET(ste_single_action_flow_tag_v1, s_action, action_id, + DR_STE_V1_ACTION_ID_FLOW_TAG); + MLX5_SET(ste_single_action_flow_tag_v1, s_action, flow_tag, flow_tag); +} + +static void dr_ste_v1_set_counter_id(u8 *hw_ste_p, u32 ctr_id) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, counter_id, ctr_id); +} + +static void dr_ste_v1_set_reparse(u8 *hw_ste_p) +{ + MLX5_SET(ste_match_bwc_v1, hw_ste_p, reparse, 1); +} + +static void dr_ste_v1_set_tx_encap(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, int size) +{ + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_POINTER); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, attributes, + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP); + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_tx_push_vlan(u8 *hw_ste_p, u8 *d_action, + u32 vlan_hdr) +{ + MLX5_SET(ste_double_action_insert_with_inline_v1, d_action, + action_id, DR_STE_V1_ACTION_ID_INSERT_INLINE); + /* The hardware expects offset to vlan header in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_inline_v1, d_action, + start_offset, HDR_LEN_L2_MACS >> 1); + MLX5_SET(ste_double_action_insert_with_inline_v1, d_action, + inline_data, vlan_hdr); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_rx_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num) +{ + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, + action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, + start_anchor, DR_STE_HEADER_ANCHOR_1ST_VLAN); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_single_action_remove_header_size_v1, s_action, + remove_size, (HDR_LEN_L2_VLAN >> 1) * vlans_num); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_tx_encap_l3(u8 *hw_ste_p, + u8 *frst_s_action, + u8 *scnd_d_action, + u32 reformat_id, + int size) +{ + /* Remove L2 headers */ + MLX5_SET(ste_single_action_remove_header_v1, frst_s_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); + MLX5_SET(ste_single_action_remove_header_v1, frst_s_action, end_anchor, + DR_STE_HEADER_ANCHOR_IPV6_IPV4); + + /* Encapsulate with given reformat ID */ + MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_POINTER); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v1, scnd_d_action, attributes, + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action) +{ + MLX5_SET(ste_single_action_remove_header_v1, s_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); + MLX5_SET(ste_single_action_remove_header_v1, s_action, decap, 1); + MLX5_SET(ste_single_action_remove_header_v1, s_action, vni_to_cqe, 1); + MLX5_SET(ste_single_action_remove_header_v1, s_action, end_anchor, + DR_STE_HEADER_ANCHOR_INNER_MAC); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_rx_decap_l3(u8 *hw_ste_p, + u8 *s_action, + u16 decap_actions, + u32 decap_index) +{ + MLX5_SET(ste_single_action_modify_list_v1, s_action, action_id, + DR_STE_V1_ACTION_ID_MODIFY_LIST); + MLX5_SET(ste_single_action_modify_list_v1, s_action, num_of_modify_actions, + decap_actions); + MLX5_SET(ste_single_action_modify_list_v1, s_action, modify_actions_ptr, + decap_index); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p, + u8 *s_action, + u16 num_of_actions, + u32 re_write_index) +{ + MLX5_SET(ste_single_action_modify_list_v1, s_action, action_id, + DR_STE_V1_ACTION_ID_MODIFY_LIST); + MLX5_SET(ste_single_action_modify_list_v1, s_action, num_of_modify_actions, + num_of_actions); + MLX5_SET(ste_single_action_modify_list_v1, s_action, modify_actions_ptr, + re_write_index); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v1_arr_init_next_match(u8 **last_ste, + u32 *added_stes, + u16 gvmi) +{ + u8 *action; + + (*added_stes)++; + *last_ste += DR_STE_SIZE; + dr_ste_v1_init(*last_ste, MLX5DR_STE_LU_TYPE_DONT_CARE, 0, gvmi); + dr_ste_v1_set_entry_type(*last_ste, DR_STE_V1_TYPE_MATCH); + + action = MLX5_ADDR_OF(ste_mask_and_match_v1, *last_ste, action); + memset(action, 0, MLX5_FLD_SZ_BYTES(ste_mask_and_match_v1, action)); +} + +static void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) +{ + u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action); + u8 action_sz = DR_STE_ACTION_DOUBLE_SZ; + bool allow_encap = true; + + if (action_type_set[DR_ACTION_TYP_CTR]) + dr_ste_v1_set_counter_id(last_ste, attr->ctr_id); + + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + if (action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, + attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, + last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_rewrite_actions(last_ste, action, + attr->modify_actions, + attr->modify_index); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + allow_encap = false; + } + + if (action_type_set[DR_ACTION_TYP_PUSH_VLAN]) { + int i; + + for (i = 0; i < attr->vlans.count; i++) { + if (action_sz < DR_STE_ACTION_DOUBLE_SZ || !allow_encap) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_encap = true; + } + dr_ste_v1_set_tx_push_vlan(last_ste, action, attr->vlans.headers[i]); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } + } + + if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2]) { + if (!allow_encap || action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_encap = true; + } + dr_ste_v1_set_tx_encap(last_ste, action, + attr->reformat_id, + attr->reformat_size); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) { + u8 *d_action; + + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + d_action = action + DR_STE_ACTION_SINGLE_SZ; + + dr_ste_v1_set_tx_encap_l3(last_ste, + action, d_action, + attr->reformat_id, + attr->reformat_size); + action_sz -= DR_STE_ACTION_TRIPLE_SZ; + action += DR_STE_ACTION_TRIPLE_SZ; + } + + dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi); + dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); +} + +static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, + u8 *action_type_set, + u8 *last_ste, + struct mlx5dr_ste_actions_attr *attr, + u32 *added_stes) +{ + u8 *action = MLX5_ADDR_OF(ste_match_bwc_v1, last_ste, action); + u8 action_sz = DR_STE_ACTION_DOUBLE_SZ; + bool allow_modify_hdr = true; + bool allow_ctr = true; + + if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) { + dr_ste_v1_set_rx_decap_l3(last_ste, action, + attr->decap_actions, + attr->decap_index); + dr_ste_v1_set_rewrite_actions(last_ste, action, + attr->decap_actions, + attr->decap_index); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + allow_modify_hdr = false; + allow_ctr = false; + } else if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2]) { + dr_ste_v1_set_rx_decap(last_ste, action); + action_sz -= DR_STE_ACTION_SINGLE_SZ; + action += DR_STE_ACTION_SINGLE_SZ; + allow_modify_hdr = false; + allow_ctr = false; + } + + if (action_type_set[DR_ACTION_TYP_TAG]) { + if (action_sz < DR_STE_ACTION_SINGLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_modify_hdr = true; + allow_ctr = true; + } + dr_ste_v1_set_rx_flow_tag(action, attr->flow_tag); + action_sz -= DR_STE_ACTION_SINGLE_SZ; + action += DR_STE_ACTION_SINGLE_SZ; + } + + if (action_type_set[DR_ACTION_TYP_POP_VLAN]) { + if (action_sz < DR_STE_ACTION_SINGLE_SZ || + !allow_modify_hdr) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_modify_hdr = false; + allow_ctr = false; + } + + dr_ste_v1_set_rx_pop_vlan(last_ste, action, attr->vlans.count); + action_sz -= DR_STE_ACTION_SINGLE_SZ; + action += DR_STE_ACTION_SINGLE_SZ; + } + + if (action_type_set[DR_ACTION_TYP_MODIFY_HDR]) { + /* Modify header and decapsulation must use different STEs */ + if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_modify_hdr = true; + allow_ctr = true; + } + dr_ste_v1_set_rewrite_actions(last_ste, action, + attr->modify_actions, + attr->modify_index); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + } + + if (action_type_set[DR_ACTION_TYP_CTR]) { + /* Counter action set after decap to exclude decaped header */ + if (!allow_ctr) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + allow_modify_hdr = true; + allow_ctr = false; + } + dr_ste_v1_set_counter_id(last_ste, attr->ctr_id); + } + + dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi); + dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); +} + static void dr_ste_v1_build_eth_l2_src_dst_bit_mask(struct mlx5dr_match_param *value, bool inner, u8 *bit_mask) { @@ -981,4 +1336,7 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = { .set_hit_addr = &dr_ste_v1_set_hit_addr, .set_byte_mask = &dr_ste_v1_set_byte_mask, .get_byte_mask = &dr_ste_v1_get_byte_mask, + /* Actions */ + .set_actions_rx = &dr_ste_v1_set_actions_rx, + .set_actions_tx = &dr_ste_v1_set_actions_tx, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h index 678b048e70223..31ecdb673625a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h @@ -4,6 +4,106 @@ #ifndef MLX5_IFC_DR_STE_V1_H #define MLX5_IFC_DR_STE_V1_H +struct mlx5_ifc_ste_single_action_flow_tag_v1_bits { + u8 action_id[0x8]; + u8 flow_tag[0x18]; +}; + +struct mlx5_ifc_ste_single_action_modify_list_v1_bits { + u8 action_id[0x8]; + u8 num_of_modify_actions[0x8]; + u8 modify_actions_ptr[0x10]; +}; + +struct mlx5_ifc_ste_single_action_remove_header_v1_bits { + u8 action_id[0x8]; + u8 reserved_at_8[0x2]; + u8 start_anchor[0x6]; + u8 reserved_at_10[0x2]; + u8 end_anchor[0x6]; + u8 reserved_at_18[0x4]; + u8 decap[0x1]; + u8 vni_to_cqe[0x1]; + u8 qos_profile[0x2]; +}; + +struct mlx5_ifc_ste_single_action_remove_header_size_v1_bits { + u8 action_id[0x8]; + u8 reserved_at_8[0x2]; + u8 start_anchor[0x6]; + u8 outer_l4_remove[0x1]; + u8 reserved_at_11[0x1]; + u8 start_offset[0x7]; + u8 reserved_at_18[0x1]; + u8 remove_size[0x6]; +}; + +struct mlx5_ifc_ste_double_action_copy_v1_bits { + u8 action_id[0x8]; + u8 destination_dw_offset[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_17[0x2]; + u8 destination_length[0x6]; + + u8 reserved_at_20[0x8]; + u8 source_dw_offset[0x8]; + u8 reserved_at_30[0x2]; + u8 source_right_shifter[0x6]; + u8 reserved_at_38[0x8]; +}; + +struct mlx5_ifc_ste_double_action_set_v1_bits { + u8 action_id[0x8]; + u8 destination_dw_offset[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_18[0x2]; + u8 destination_length[0x6]; + + u8 inline_data[0x20]; +}; + +struct mlx5_ifc_ste_double_action_add_v1_bits { + u8 action_id[0x8]; + u8 destination_dw_offset[0x8]; + u8 reserved_at_10[0x2]; + u8 destination_left_shifter[0x6]; + u8 reserved_at_18[0x2]; + u8 destination_length[0x6]; + + u8 add_value[0x20]; +}; + +struct mlx5_ifc_ste_double_action_insert_with_inline_v1_bits { + u8 action_id[0x8]; + u8 reserved_at_8[0x2]; + u8 start_anchor[0x6]; + u8 start_offset[0x7]; + u8 reserved_at_17[0x9]; + + u8 inline_data[0x20]; +}; + +struct mlx5_ifc_ste_double_action_insert_with_ptr_v1_bits { + u8 action_id[0x8]; + u8 reserved_at_8[0x2]; + u8 start_anchor[0x6]; + u8 start_offset[0x7]; + u8 size[0x6]; + u8 attributes[0x3]; + + u8 pointer[0x20]; +}; + +struct mlx5_ifc_ste_double_action_modify_action_list_v1_bits { + u8 action_id[0x8]; + u8 modify_actions_pattern_pointer[0x18]; + + u8 number_of_modify_actions[0x8]; + u8 modify_actions_argument_pointer[0x18]; +}; + struct mlx5_ifc_ste_match_bwc_v1_bits { u8 entry_format[0x8]; u8 counter_id[0x18]; -- GitLab From c349b4137cfd9482f30dcd726748d0c4da1427f3 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 22 Sep 2020 04:23:53 +0300 Subject: [PATCH 1718/2012] net/mlx5: DR, Add STEv1 modify header logic Add HW specific modify header fields and logic to STEv1 file. Since STEv0 and STEv1 modify actions values are different, each version has its own implementation. Signed-off-by: Alex Vesker Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/dr_ste_v1.c | 269 ++++++++++++++++++ .../mlx5/core/steering/mlx5_ifc_dr_ste_v1.h | 4 + 2 files changed, 273 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index 02bb3a28cfa1e..42b853fa34654 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -95,6 +95,159 @@ enum dr_ste_v1_action_id { DR_STE_V1_ACTION_ID_SPECIAL_ENCAP_L3 = 0x22, }; +enum { + DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_0 = 0x00, + DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1 = 0x01, + DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2 = 0x02, + DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08, + DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09, + DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e, + DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0 = 0x18, + DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_1 = 0x19, + DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40, + DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, + DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, + DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, + DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, + DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2 = 0x8c, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_3 = 0x8d, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_4 = 0x8e, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_5 = 0x8f, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_6 = 0x90, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_7 = 0x91, +}; + +static const struct mlx5dr_ste_action_modify_field dr_ste_v1_action_modify_field_arr[] = { + [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_6, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_7, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_4, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_5, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_REGISTER_3, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15, + }, +}; + static void dr_ste_v1_set_entry_type(u8 *hw_ste_p, u8 entry_type) { MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, entry_type); @@ -476,6 +629,116 @@ static void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); } +static void dr_ste_v1_set_action_set(u8 *d_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) +{ + shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; + MLX5_SET(ste_double_action_set_v1, d_action, action_id, DR_STE_V1_ACTION_ID_SET); + MLX5_SET(ste_double_action_set_v1, d_action, destination_dw_offset, hw_field); + MLX5_SET(ste_double_action_set_v1, d_action, destination_left_shifter, shifter); + MLX5_SET(ste_double_action_set_v1, d_action, destination_length, length); + MLX5_SET(ste_double_action_set_v1, d_action, inline_data, data); +} + +static void dr_ste_v1_set_action_add(u8 *d_action, + u8 hw_field, + u8 shifter, + u8 length, + u32 data) +{ + shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; + MLX5_SET(ste_double_action_add_v1, d_action, action_id, DR_STE_V1_ACTION_ID_ADD); + MLX5_SET(ste_double_action_add_v1, d_action, destination_dw_offset, hw_field); + MLX5_SET(ste_double_action_add_v1, d_action, destination_left_shifter, shifter); + MLX5_SET(ste_double_action_add_v1, d_action, destination_length, length); + MLX5_SET(ste_double_action_add_v1, d_action, add_value, data); +} + +static void dr_ste_v1_set_action_copy(u8 *d_action, + u8 dst_hw_field, + u8 dst_shifter, + u8 dst_len, + u8 src_hw_field, + u8 src_shifter) +{ + dst_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; + src_shifter += MLX5_MODIFY_HEADER_V1_QW_OFFSET; + MLX5_SET(ste_double_action_copy_v1, d_action, action_id, DR_STE_V1_ACTION_ID_COPY); + MLX5_SET(ste_double_action_copy_v1, d_action, destination_dw_offset, dst_hw_field); + MLX5_SET(ste_double_action_copy_v1, d_action, destination_left_shifter, dst_shifter); + MLX5_SET(ste_double_action_copy_v1, d_action, destination_length, dst_len); + MLX5_SET(ste_double_action_copy_v1, d_action, source_dw_offset, src_hw_field); + MLX5_SET(ste_double_action_copy_v1, d_action, source_right_shifter, src_shifter); +} + +#define DR_STE_DECAP_L3_ACTION_NUM 8 +#define DR_STE_L2_HDR_MAX_SZ 20 + +static int dr_ste_v1_set_action_decap_l3_list(void *data, + u32 data_sz, + u8 *hw_action, + u32 hw_action_sz, + u16 *used_hw_action_num) +{ + u8 padded_data[DR_STE_L2_HDR_MAX_SZ] = {}; + void *data_ptr = padded_data; + u16 used_actions = 0; + u32 inline_data_sz; + u32 i; + + if (hw_action_sz / DR_STE_ACTION_DOUBLE_SZ < DR_STE_DECAP_L3_ACTION_NUM) + return -EINVAL; + + memcpy(padded_data, data, data_sz); + + /* Remove L2L3 outer headers */ + MLX5_SET(ste_single_action_remove_header_v1, hw_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); + MLX5_SET(ste_single_action_remove_header_v1, hw_action, decap, 1); + MLX5_SET(ste_single_action_remove_header_v1, hw_action, vni_to_cqe, 1); + MLX5_SET(ste_single_action_remove_header_v1, hw_action, end_anchor, + DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4); + hw_action += DR_STE_ACTION_DOUBLE_SZ; + used_actions++; /* Remove and NOP are a single double action */ + + inline_data_sz = + MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data); + + /* Add the new header inline + 2 extra bytes */ + for (i = 0; i < data_sz / inline_data_sz + 1; i++) { + void *addr_inline; + + MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_INLINE); + /* The hardware expects here offset to words (2 bytes) */ + MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset, + i * 2); + + /* Copy bytes one by one to avoid endianness problem */ + addr_inline = MLX5_ADDR_OF(ste_double_action_insert_with_inline_v1, + hw_action, inline_data); + memcpy(addr_inline, data_ptr, inline_data_sz); + hw_action += DR_STE_ACTION_DOUBLE_SZ; + data_ptr += inline_data_sz; + used_actions++; + } + + /* Remove 2 extra bytes */ + MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, data_sz / 2); + /* The hardware expects here size in words (2 bytes) */ + MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, remove_size, 1); + used_actions++; + + *used_hw_action_num = used_actions; + + return 0; +} + static void dr_ste_v1_build_eth_l2_src_dst_bit_mask(struct mlx5dr_match_param *value, bool inner, u8 *bit_mask) { @@ -1339,4 +1602,10 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = { /* Actions */ .set_actions_rx = &dr_ste_v1_set_actions_rx, .set_actions_tx = &dr_ste_v1_set_actions_tx, + .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v1_action_modify_field_arr), + .modify_field_arr = dr_ste_v1_action_modify_field_arr, + .set_action_set = &dr_ste_v1_set_action_set, + .set_action_add = &dr_ste_v1_set_action_add, + .set_action_copy = &dr_ste_v1_set_action_copy, + .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h index 31ecdb673625a..34c2bd17a8b4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h @@ -4,6 +4,10 @@ #ifndef MLX5_IFC_DR_STE_V1_H #define MLX5_IFC_DR_STE_V1_H +enum mlx5_ifc_ste_v1_modify_hdr_offset { + MLX5_MODIFY_HEADER_V1_QW_OFFSET = 0x20, +}; + struct mlx5_ifc_ste_single_action_flow_tag_v1_bits { u8 action_id[0x8]; u8 flow_tag[0x18]; -- GitLab From f06d496985f49189dde4506d0ac15494d1a74607 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 22 Sep 2020 04:23:58 +0300 Subject: [PATCH 1719/2012] net/mlx5: DR, Use the right size when writing partial STE into HW In these cases we need to update only the ctrl area of the STE. So it is better to write only the control 32B and avoid copying the unneeded reduced 48B (control 32B + tag 16B). Signed-off-by: Erez Shitrit Signed-off-by: Alex Vesker Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/steering/dr_rule.c | 12 ++++++++---- .../ethernet/mellanox/mlx5/core/steering/dr_ste.c | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index ddcb7017e1216..fcea2a21abe9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -30,7 +30,7 @@ static int dr_rule_append_to_miss_list(struct mlx5dr_ste_ctx *ste_ctx, mlx5dr_ste_get_icm_addr(new_last_ste)); list_add_tail(&new_last_ste->miss_list_node, miss_list); - mlx5dr_send_fill_and_append_ste_send_info(last_ste, DR_STE_SIZE_REDUCED, + mlx5dr_send_fill_and_append_ste_send_info(last_ste, DR_STE_SIZE_CTRL, 0, last_ste->hw_ste, ste_info_last, send_list, true); @@ -110,10 +110,14 @@ dr_rule_handle_one_ste_in_update_list(struct mlx5dr_ste_send_info *ste_info, ste_info->size, ste_info->offset); if (ret) goto out; - /* Copy data to ste, only reduced size, the last 16B (mask) + + /* Copy data to ste, only reduced size or control, the last 16B (mask) * is already written to the hw. */ - memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_REDUCED); + if (ste_info->size == DR_STE_SIZE_CTRL) + memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_CTRL); + else + memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_REDUCED); out: kfree(ste_info); @@ -456,7 +460,7 @@ dr_rule_rehash_htbl(struct mlx5dr_rule *rule, ste_to_update = cur_htbl->pointing_ste; } - mlx5dr_send_fill_and_append_ste_send_info(ste_to_update, DR_STE_SIZE_REDUCED, + mlx5dr_send_fill_and_append_ste_send_info(ste_to_update, DR_STE_SIZE_CTRL, 0, ste_to_update->hw_ste, ste_info, update_list, false); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 0f318f409c91d..e21b61030e352 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -264,7 +264,7 @@ static void dr_ste_remove_middle_ste(struct mlx5dr_ste_ctx *ste_ctx, miss_addr = ste_ctx->get_miss_addr(ste->hw_ste); ste_ctx->set_miss_addr(prev_ste->hw_ste, miss_addr); - mlx5dr_send_fill_and_append_ste_send_info(prev_ste, DR_STE_SIZE_REDUCED, 0, + mlx5dr_send_fill_and_append_ste_send_info(prev_ste, DR_STE_SIZE_CTRL, 0, prev_ste->hw_ste, ste_info, send_ste_list, true /* Copy data*/); -- GitLab From 4fe45e1d31efb07bbf0c80a59c211109e389b8e3 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 6 Dec 2020 18:13:01 +0200 Subject: [PATCH 1720/2012] net/mlx5: DR, Use HW specific logic API when writing STE STEv0 format and STEv1 HW format are different, each has a different order: STEv0: CTRL 32B, TAG 16B, BITMASK 16B STEv1: CTRL 32B, BITMASK 16B, TAG 16B To make this transparent to upper layers we introduce a new ste_ctx function to format the STE prior to writing it. Signed-off-by: Erez Shitrit Signed-off-by: Alex Vesker Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/dr_rule.c | 9 +++--- .../mellanox/mlx5/core/steering/dr_send.c | 29 +++++++++++++------ .../mellanox/mlx5/core/steering/dr_ste.c | 7 +++++ .../mellanox/mlx5/core/steering/dr_ste.h | 3 ++ .../mellanox/mlx5/core/steering/dr_ste_v1.c | 22 ++++++++++++++ .../mellanox/mlx5/core/steering/dr_types.h | 3 ++ 6 files changed, 60 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index fcea2a21abe9c..b337d6626bffc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -106,10 +106,6 @@ dr_rule_handle_one_ste_in_update_list(struct mlx5dr_ste_send_info *ste_info, int ret; list_del(&ste_info->send_list); - ret = mlx5dr_send_postsend_ste(dmn, ste_info->ste, ste_info->data, - ste_info->size, ste_info->offset); - if (ret) - goto out; /* Copy data to ste, only reduced size or control, the last 16B (mask) * is already written to the hw. @@ -119,6 +115,11 @@ dr_rule_handle_one_ste_in_update_list(struct mlx5dr_ste_send_info *ste_info, else memcpy(ste_info->ste->hw_ste, ste_info->data, DR_STE_SIZE_REDUCED); + ret = mlx5dr_send_postsend_ste(dmn, ste_info->ste, ste_info->data, + ste_info->size, ste_info->offset); + if (ret) + goto out; + out: kfree(ste_info); return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 24dede1b0a209..83c4c877d558c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -431,6 +431,8 @@ int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, { struct postsend_info send_info = {}; + mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data, size); + send_info.write.addr = (uintptr_t)data; send_info.write.length = size; send_info.write.lkey = 0; @@ -457,6 +459,8 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, if (ret) return ret; + mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, formatted_ste, DR_STE_SIZE); + /* Send the data iteration times */ for (i = 0; i < iterations; i++) { u32 ste_index = i * (byte_size / DR_STE_SIZE); @@ -480,6 +484,10 @@ int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, /* Copy bit_mask */ memcpy(data + ste_off + DR_STE_SIZE_REDUCED, mask, DR_STE_SIZE_MASK); + /* Only when we have mask we need to re-arrange the STE */ + mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, + data + (j * DR_STE_SIZE), + DR_STE_SIZE); } } @@ -509,6 +517,7 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, u32 byte_size = htbl->chunk->byte_size; int iterations; int num_stes; + u8 *copy_dst; u8 *data; int ret; int i; @@ -518,20 +527,22 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, if (ret) return ret; - for (i = 0; i < num_stes; i++) { - u8 *copy_dst; - - /* Copy the same ste on the data buffer */ - copy_dst = data + i * DR_STE_SIZE; - memcpy(copy_dst, ste_init_data, DR_STE_SIZE); - - if (update_hw_ste) { - /* Copy the reduced ste to hash table ste_arr */ + if (update_hw_ste) { + /* Copy the reduced STE to hash table ste_arr */ + for (i = 0; i < num_stes; i++) { copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED; memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED); } } + mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, ste_init_data, DR_STE_SIZE); + + /* Copy the same STE on the data buffer */ + for (i = 0; i < num_stes; i++) { + copy_dst = data + i * DR_STE_SIZE; + memcpy(copy_dst, ste_init_data, DR_STE_SIZE); + } + /* Send the data iteration times */ for (i = 0; i < iterations; i++) { u8 ste_index = i * (byte_size / DR_STE_SIZE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index e21b61030e352..8ac3ccdda84c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -356,6 +356,13 @@ void mlx5dr_ste_set_hit_addr_by_next_htbl(struct mlx5dr_ste_ctx *ste_ctx, ste_ctx->set_hit_addr(hw_ste, chunk->icm_addr, chunk->num_of_entries); } +void mlx5dr_ste_prepare_for_postsend(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste_p, u32 ste_size) +{ + if (ste_ctx->prepare_for_postsend) + ste_ctx->prepare_for_postsend(hw_ste_p, ste_size); +} + /* Init one ste as a pattern for ste data array */ void mlx5dr_ste_set_formatted_ste(struct mlx5dr_ste_ctx *ste_ctx, u16 gvmi, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h index 0dc08fe1a9dba..06bcb0ee8f965 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h @@ -160,6 +160,9 @@ struct mlx5dr_ste_ctx { u8 *hw_action, u32 hw_action_sz, u16 *used_hw_action_num); + + /* Send */ + void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size); }; extern struct mlx5dr_ste_ctx ste_ctx_v0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index 42b853fa34654..4088d6e515082 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -324,6 +324,26 @@ static void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, MLX5_SET(ste_match_bwc_v1, hw_ste_p, miss_address_63_48, gvmi); } +static void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, + u32 ste_size) +{ + u8 *tag = hw_ste_p + DR_STE_SIZE_CTRL; + u8 *mask = tag + DR_STE_SIZE_TAG; + u8 tmp_tag[DR_STE_SIZE_TAG] = {}; + + if (ste_size == DR_STE_SIZE_CTRL) + return; + + WARN_ON(ste_size != DR_STE_SIZE); + + /* Backup tag */ + memcpy(tmp_tag, tag, DR_STE_SIZE_TAG); + + /* Swap mask and tag both are the same size */ + memcpy(tag, mask, DR_STE_SIZE_MASK); + memcpy(mask, tmp_tag, DR_STE_SIZE_TAG); +} + static void dr_ste_v1_set_rx_flow_tag(u8 *s_action, u32 flow_tag) { MLX5_SET(ste_single_action_flow_tag_v1, s_action, action_id, @@ -1608,4 +1628,6 @@ struct mlx5dr_ste_ctx ste_ctx_v1 = { .set_action_add = &dr_ste_v1_set_action_add, .set_action_copy = &dr_ste_v1_set_action_copy, .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list, + /* Send */ + .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 8d2c3b6e27552..3b76142218d15 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -1072,6 +1072,9 @@ struct mlx5dr_icm_chunk * mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, enum mlx5dr_icm_chunk_size chunk_size); void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk); + +void mlx5dr_ste_prepare_for_postsend(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste_p, u32 ste_size); int mlx5dr_ste_htbl_init_and_postsend(struct mlx5dr_domain *dmn, struct mlx5dr_domain_rx_tx *nic_dmn, struct mlx5dr_ste_htbl *htbl, -- GitLab From 8fdac12acf32fa327c2da9ded8a460e606cb74ac Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 22 Sep 2020 04:24:09 +0300 Subject: [PATCH 1721/2012] net/mlx5: DR, Copy all 64B whenever replacing STE in the head of miss-list Till now the code assumed that need to copy reduced size of the ste because the rest is the mask part which shouldn't be changed. This is not true for all types of HW (like STEv1). Take all 64B from the new STE and write them in the replaced STE place. This change will make it easier to handle all STE HW types because we have all the data that is about to be written into HW. Signed-off-by: Erez Shitrit Signed-off-by: Alex Vesker Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/dr_ste.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 8ac3ccdda84c6..9cd5c50c5d42d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -211,13 +211,17 @@ dr_ste_remove_head_ste(struct mlx5dr_ste_ctx *ste_ctx, * |_ste_| --> |_next_ste_| -->|__| -->|__| -->/0 */ static void -dr_ste_replace_head_ste(struct mlx5dr_ste *ste, struct mlx5dr_ste *next_ste, +dr_ste_replace_head_ste(struct mlx5dr_matcher_rx_tx *nic_matcher, + struct mlx5dr_ste *ste, + struct mlx5dr_ste *next_ste, struct mlx5dr_ste_send_info *ste_info_head, struct list_head *send_ste_list, struct mlx5dr_ste_htbl *stats_tbl) { struct mlx5dr_ste_htbl *next_miss_htbl; + u8 hw_ste[DR_STE_SIZE] = {}; + int sb_idx; next_miss_htbl = next_ste->htbl; @@ -230,13 +234,19 @@ dr_ste_replace_head_ste(struct mlx5dr_ste *ste, struct mlx5dr_ste *next_ste, /* Move data from next into ste */ dr_ste_replace(ste, next_ste); + /* Copy all 64 hw_ste bytes */ + memcpy(hw_ste, ste->hw_ste, DR_STE_SIZE_REDUCED); + sb_idx = ste->ste_chain_location - 1; + mlx5dr_ste_set_bit_mask(hw_ste, + nic_matcher->ste_builder[sb_idx].bit_mask); + /* Del the htbl that contains the next_ste. * The origin htbl stay with the same number of entries. */ mlx5dr_htbl_put(next_miss_htbl); - mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE_REDUCED, - 0, ste->hw_ste, + mlx5dr_send_fill_and_append_ste_send_info(ste, DR_STE_SIZE, + 0, hw_ste, ste_info_head, send_ste_list, true /* Copy data */); @@ -316,7 +326,8 @@ void mlx5dr_ste_free(struct mlx5dr_ste *ste, stats_tbl); } else { /* First but not only entry in the list */ - dr_ste_replace_head_ste(ste, next_ste, &ste_info_head, + dr_ste_replace_head_ste(nic_matcher, ste, + next_ste, &ste_info_head, &send_ste_list, stats_tbl); put_on_origin_table = false; } -- GitLab From 64f45c0fc4c71f577506c5a7a7956ae3bc3388ea Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Mon, 25 Jan 2021 02:26:45 +0200 Subject: [PATCH 1722/2012] net/mlx5: DR, Allow SW steering for sw_owner_v2 devices Allow sw_owner_v2 based on sw_format_version. Signed-off-by: Alex Vesker Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/steering/dr_cmd.c | 17 +++++++++++------ .../mellanox/mlx5/core/steering/dr_domain.c | 17 +++++++++-------- .../mellanox/mlx5/core/steering/dr_types.h | 6 +++++- .../mellanox/mlx5/core/steering/mlx5dr.h | 5 ++++- 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index ba65ec406cfab..30b0136b5bc7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -78,9 +78,9 @@ int mlx5dr_cmd_query_esw_caps(struct mlx5_core_dev *mdev, caps->uplink_icm_address_tx = MLX5_CAP64_ESW_FLOWTABLE(mdev, sw_steering_uplink_icm_address_tx); - caps->sw_owner = - MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, - sw_owner); + caps->sw_owner_v2 = MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, sw_owner_v2); + if (!caps->sw_owner_v2) + caps->sw_owner = MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, sw_owner); return 0; } @@ -113,10 +113,15 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, caps->nic_tx_allow_address = MLX5_CAP64_FLOWTABLE(mdev, sw_steering_nic_tx_action_allow_icm_address); - caps->rx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner); - caps->max_ft_level = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_ft_level); + caps->rx_sw_owner_v2 = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner_v2); + caps->tx_sw_owner_v2 = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner_v2); + + if (!caps->rx_sw_owner_v2) + caps->rx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, sw_owner); + if (!caps->tx_sw_owner_v2) + caps->tx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner); - caps->tx_sw_owner = MLX5_CAP_FLOWTABLE_NIC_TX(mdev, sw_owner); + caps->max_ft_level = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_ft_level); caps->log_icm_size = MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size); caps->hdr_modify_icm_addr = diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 47ec88964bf36..7091b1be84ef1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -4,6 +4,11 @@ #include #include "dr_types.h" +#define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \ + ((dmn)->info.caps.dmn_type##_sw_owner || \ + ((dmn)->info.caps.dmn_type##_sw_owner_v2 && \ + (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_6DX)) + static int dr_domain_init_cache(struct mlx5dr_domain *dmn) { /* Per vport cached FW FT for checksum recalculation, this @@ -187,6 +192,7 @@ static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev, return ret; dmn->info.caps.fdb_sw_owner = dmn->info.caps.esw_caps.sw_owner; + dmn->info.caps.fdb_sw_owner_v2 = dmn->info.caps.esw_caps.sw_owner_v2; dmn->info.caps.esw_rx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_rx; dmn->info.caps.esw_tx_drop_address = dmn->info.caps.esw_caps.drop_icm_address_tx; @@ -229,18 +235,13 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev, if (ret) return ret; - if (dmn->info.caps.sw_format_ver != MLX5_STEERING_FORMAT_CONNECTX_5) { - mlx5dr_err(dmn, "SW steering is not supported on this device\n"); - return -EOPNOTSUPP; - } - ret = dr_domain_query_fdb_caps(mdev, dmn); if (ret) return ret; switch (dmn->type) { case MLX5DR_DOMAIN_TYPE_NIC_RX: - if (!dmn->info.caps.rx_sw_owner) + if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, rx)) return -ENOTSUPP; dmn->info.supp_sw_steering = true; @@ -249,7 +250,7 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev, dmn->info.rx.drop_icm_addr = dmn->info.caps.nic_rx_drop_address; break; case MLX5DR_DOMAIN_TYPE_NIC_TX: - if (!dmn->info.caps.tx_sw_owner) + if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, tx)) return -ENOTSUPP; dmn->info.supp_sw_steering = true; @@ -261,7 +262,7 @@ static int dr_domain_caps_init(struct mlx5_core_dev *mdev, if (!dmn->info.caps.eswitch_manager) return -ENOTSUPP; - if (!dmn->info.caps.fdb_sw_owner) + if (!DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, fdb)) return -ENOTSUPP; dmn->info.rx.ste_type = MLX5DR_STE_TYPE_RX; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 3b76142218d15..a8b497cbb844e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -666,7 +666,8 @@ struct mlx5dr_esw_caps { u64 drop_icm_address_tx; u64 uplink_icm_address_rx; u64 uplink_icm_address_tx; - bool sw_owner; + u8 sw_owner:1; + u8 sw_owner_v2:1; }; struct mlx5dr_cmd_vport_cap { @@ -699,6 +700,9 @@ struct mlx5dr_cmd_caps { bool rx_sw_owner; bool tx_sw_owner; bool fdb_sw_owner; + u8 rx_sw_owner_v2:1; + u8 tx_sw_owner_v2:1; + u8 fdb_sw_owner_v2:1; u32 num_vports; struct mlx5dr_esw_caps esw_caps; struct mlx5dr_cmd_vport_cap *vports_caps; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index 4177786b8eaf2..612b0ac31db23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -124,7 +124,10 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action); static inline bool mlx5dr_is_supported(struct mlx5_core_dev *dev) { - return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner); + return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) || + (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) && + (MLX5_CAP_GEN(dev, steering_format_version) <= + MLX5_STEERING_FORMAT_CONNECTX_6DX)); } /* buddy functions & structure */ -- GitLab From a5b88632fc967906a86e16513bae9cc49070934c Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Wed, 27 Jan 2021 18:32:55 +0100 Subject: [PATCH 1723/2012] net: atm: pppoatm: use tasklet_init to initialize wakeup tasklet Previously a temporary tasklet structure was initialized on the stack using DECLARE_TASKLET_OLD() and then copied over and modified. Nothing else in the kernel seems to use this pattern, so let's just call tasklet_init() like everyone else. Signed-off-by: Emil Renner Berthing Link: https://lore.kernel.org/r/20210127173256.13954-1-kernel@esmil.dk Signed-off-by: Jakub Kicinski --- net/atm/pppoatm.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index 579b66da1d95d..5f06af0983905 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -389,11 +389,7 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg) struct atm_backend_ppp be; struct pppoatm_vcc *pvcc; int err; - /* - * Each PPPoATM instance has its own tasklet - this is just a - * prototypical one used to initialize them - */ - static const DECLARE_TASKLET_OLD(tasklet_proto, pppoatm_wakeup_sender); + if (copy_from_user(&be, arg, sizeof be)) return -EFAULT; if (be.encaps != PPPOATM_ENCAPS_AUTODETECT && @@ -415,8 +411,8 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg) pvcc->chan.ops = &pppoatm_ops; pvcc->chan.mtu = atmvcc->qos.txtp.max_sdu - PPP_HDRLEN - (be.encaps == e_vc ? 0 : LLC_LEN); - pvcc->wakeup_tasklet = tasklet_proto; - pvcc->wakeup_tasklet.data = (unsigned long) &pvcc->chan; + tasklet_init(&pvcc->wakeup_tasklet, pppoatm_wakeup_sender, + (unsigned long)&pvcc->chan); err = ppp_register_channel(&pvcc->chan); if (err != 0) { kfree(pvcc); -- GitLab From a58745979cdd2ad70cd43ce9f1de8b076ae98e21 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Wed, 27 Jan 2021 18:32:56 +0100 Subject: [PATCH 1724/2012] net: atm: pppoatm: use new API for wakeup tasklet This converts the driver to use the new tasklet API introduced in commit 12cc923f1ccc ("tasklet: Introduce new initialization API") Signed-off-by: Emil Renner Berthing Link: https://lore.kernel.org/r/20210127173256.13954-2-kernel@esmil.dk Signed-off-by: Jakub Kicinski --- net/atm/pppoatm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index 5f06af0983905..3e4f17d335feb 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -101,9 +101,11 @@ static inline struct pppoatm_vcc *chan_to_pvcc(const struct ppp_channel *chan) * doesn't want to be called in interrupt context, so we do it from * a tasklet */ -static void pppoatm_wakeup_sender(unsigned long arg) +static void pppoatm_wakeup_sender(struct tasklet_struct *t) { - ppp_output_wakeup((struct ppp_channel *) arg); + struct pppoatm_vcc *pvcc = from_tasklet(pvcc, t, wakeup_tasklet); + + ppp_output_wakeup(&pvcc->chan); } static void pppoatm_release_cb(struct atm_vcc *atmvcc) @@ -411,8 +413,7 @@ static int pppoatm_assign_vcc(struct atm_vcc *atmvcc, void __user *arg) pvcc->chan.ops = &pppoatm_ops; pvcc->chan.mtu = atmvcc->qos.txtp.max_sdu - PPP_HDRLEN - (be.encaps == e_vc ? 0 : LLC_LEN); - tasklet_init(&pvcc->wakeup_tasklet, pppoatm_wakeup_sender, - (unsigned long)&pvcc->chan); + tasklet_setup(&pvcc->wakeup_tasklet, pppoatm_wakeup_sender); err = ppp_register_channel(&pvcc->chan); if (err != 0) { kfree(pvcc); -- GitLab From afa4f675aa62467f706f06b67d4c7955b362f949 Mon Sep 17 00:00:00 2001 From: dingsenjie Date: Thu, 28 Jan 2021 11:53:30 +0800 Subject: [PATCH 1725/2012] net/ethernet: convert to use module_platform_driver in octeon_mgmt.c Simplify the code by using module_platform_driver macro for octeon_mgmt. Signed-off-by: dingsenjie Link: https://lore.kernel.org/r/20210128035330.17676-1-dingsenjie@163.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 5e50bb19bf26c..ecffebd513be3 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1556,18 +1556,7 @@ static struct platform_driver octeon_mgmt_driver = { .remove = octeon_mgmt_remove, }; -static int __init octeon_mgmt_mod_init(void) -{ - return platform_driver_register(&octeon_mgmt_driver); -} - -static void __exit octeon_mgmt_mod_exit(void) -{ - platform_driver_unregister(&octeon_mgmt_driver); -} - -module_init(octeon_mgmt_mod_init); -module_exit(octeon_mgmt_mod_exit); +module_platform_driver(octeon_mgmt_driver); MODULE_SOFTDEP("pre: mdio-cavium"); MODULE_DESCRIPTION(DRV_DESCRIPTION); -- GitLab From 5daf83846cdb67a3ef0c17f9826738567598ed19 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Thu, 28 Jan 2021 12:19:30 +0100 Subject: [PATCH 1726/2012] docs: networking: timestamping: fix section title markup This section was missed during the conversion to ReST, so convert it in the same style as the surrounding section titles. Signed-off-by: Jan Luebbe Link: https://lore.kernel.org/r/20210128111930.29473-1-jlu@pengutronix.de Signed-off-by: Jakub Kicinski --- Documentation/networking/timestamping.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index 03f7beade470b..f682e88fa87e8 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -55,7 +55,8 @@ struct __kernel_sock_timeval format. SO_TIMESTAMP_OLD returns incorrect timestamps after the year 2038 on 32 bit machines. -1.2 SO_TIMESTAMPNS (also SO_TIMESTAMPNS_OLD and SO_TIMESTAMPNS_NEW): +1.2 SO_TIMESTAMPNS (also SO_TIMESTAMPNS_OLD and SO_TIMESTAMPNS_NEW) +------------------------------------------------------------------- This option is identical to SO_TIMESTAMP except for the returned data type. Its struct timespec allows for higher resolution (ns) timestamps than the -- GitLab From e6ec3ccd4eb2b3b9cf0a4127cf00fae602570a52 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 28 Jan 2021 14:47:22 +0100 Subject: [PATCH 1727/2012] net: mhi: Get RX queue size from MHI core The RX queue size can be determined at runtime by retrieving the number of available transfer descriptors. Signed-off-by: Loic Poulain Signed-off-by: Jakub Kicinski --- drivers/net/mhi_net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c index a5a214d298490..abdd2baa312a4 100644 --- a/drivers/net/mhi_net.c +++ b/drivers/net/mhi_net.c @@ -273,9 +273,6 @@ static int mhi_net_probe(struct mhi_device *mhi_dev, SET_NETDEV_DEV(ndev, &mhi_dev->dev); SET_NETDEV_DEVTYPE(ndev, &wwan_type); - /* All MHI net channels have 128 ring elements (at least for now) */ - mhi_netdev->rx_queue_sz = 128; - INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work); u64_stats_init(&mhi_netdev->stats.rx_syncp); u64_stats_init(&mhi_netdev->stats.tx_syncp); @@ -285,6 +282,9 @@ static int mhi_net_probe(struct mhi_device *mhi_dev, if (err) goto out_err; + /* Number of transfer descriptors determines size of the queue */ + mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); + err = register_netdev(ndev); if (err) goto out_err; -- GitLab From 6e10785ee148655577885b65605836210a741bee Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 11 Jan 2021 19:07:42 +0100 Subject: [PATCH 1728/2012] net: mhi: Get rid of local rx queue count Use the new mhi_get_free_desc_count helper to track queue usage instead of relying on the locally maintained rx_queued count. Signed-off-by: Loic Poulain Signed-off-by: Jakub Kicinski --- drivers/net/mhi_net.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c index abdd2baa312a4..4f512531b7d0e 100644 --- a/drivers/net/mhi_net.c +++ b/drivers/net/mhi_net.c @@ -25,7 +25,6 @@ struct mhi_net_stats { u64_stats_t tx_bytes; u64_stats_t tx_errors; u64_stats_t tx_dropped; - atomic_t rx_queued; struct u64_stats_sync tx_syncp; struct u64_stats_sync rx_syncp; }; @@ -138,9 +137,9 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, { struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev); struct sk_buff *skb = mhi_res->buf_addr; - int remaining; + int free_desc_count; - remaining = atomic_dec_return(&mhi_netdev->stats.rx_queued); + free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); if (unlikely(mhi_res->transaction_status)) { dev_kfree_skb_any(skb); @@ -175,7 +174,7 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, } /* Refill if RX buffers queue becomes low */ - if (remaining <= mhi_netdev->rx_queue_sz / 2) + if (free_desc_count >= mhi_netdev->rx_queue_sz / 2) schedule_delayed_work(&mhi_netdev->rx_refill, 0); } @@ -222,7 +221,7 @@ static void mhi_net_rx_refill_work(struct work_struct *work) struct sk_buff *skb; int err; - while (atomic_read(&mhi_netdev->stats.rx_queued) < mhi_netdev->rx_queue_sz) { + while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) { skb = netdev_alloc_skb(ndev, size); if (unlikely(!skb)) break; @@ -235,8 +234,6 @@ static void mhi_net_rx_refill_work(struct work_struct *work) break; } - atomic_inc(&mhi_netdev->stats.rx_queued); - /* Do not hog the CPU if rx buffers are consumed faster than * queued (unlikely). */ @@ -244,7 +241,7 @@ static void mhi_net_rx_refill_work(struct work_struct *work) } /* If we're still starved of rx buffers, reschedule later */ - if (unlikely(!atomic_read(&mhi_netdev->stats.rx_queued))) + if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mhi_netdev->rx_queue_sz) schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2); } -- GitLab From 62fafcd63139920eb25b3fbf154177ce3e6f3232 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 28 Jan 2021 17:18:31 +0800 Subject: [PATCH 1729/2012] net: support ip generic csum processing in skb_csum_hwoffload_help NETIF_F_IP|IPV6_CSUM feature flag indicates UDP and TCP csum offload while NETIF_F_HW_CSUM feature flag indicates ip generic csum offload for HW, which includes not only for TCP/UDP csum, but also for other protocols' csum like GRE's. However, in skb_csum_hwoffload_help() it only checks features against NETIF_F_CSUM_MASK(NETIF_F_HW|IP|IPV6_CSUM). So if it's a non TCP/UDP packet and the features doesn't support NETIF_F_HW_CSUM, but supports NETIF_F_IP|IPV6_CSUM only, it would still return 0 and leave the HW to do csum. This patch is to support ip generic csum processing by checking NETIF_F_HW_CSUM for all protocols, and check (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) only for TCP and UDP. Note that we're using skb->csum_offset to check if it's a TCP/UDP proctol, this might be fragile. However, as Alex said, for now we only have a few L4 protocols that are requesting Tx csum offload, we'd better fix this until a new protocol comes with a same csum offset. v1->v2: - not extend skb->csum_not_inet, but use skb->csum_offset to tell if it's an UDP/TCP csum packet. v2->v3: - add a note in the changelog, as Willem suggested. Suggested-by: Alexander Duyck Signed-off-by: Xin Long Signed-off-by: Jakub Kicinski --- net/core/dev.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 6df3f1bcdc686..aae116d059da7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3621,7 +3621,18 @@ int skb_csum_hwoffload_help(struct sk_buff *skb, return !!(features & NETIF_F_SCTP_CRC) ? 0 : skb_crc32c_csum_help(skb); - return !!(features & NETIF_F_CSUM_MASK) ? 0 : skb_checksum_help(skb); + if (features & NETIF_F_HW_CSUM) + return 0; + + if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) { + switch (skb->csum_offset) { + case offsetof(struct tcphdr, check): + case offsetof(struct udphdr, check): + return 0; + } + } + + return skb_checksum_help(skb); } EXPORT_SYMBOL(skb_csum_hwoffload_help); -- GitLab From efa1a65c7e1946ff174c56d36bf015ff9e11c4a1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 28 Jan 2021 17:18:32 +0800 Subject: [PATCH 1730/2012] ip_gre: add csum offload support for gre header This patch is to add csum offload support for gre header: On the TX path in gre_build_header(), when CHECKSUM_PARTIAL's set for inner proto, it will calculate the csum for outer proto, and inner csum will be offloaded later. Otherwise, CHECKSUM_PARTIAL and csum_start/offset will be set for outer proto, and the outer csum will be offloaded later. On the GSO path in gre_gso_segment(), when CHECKSUM_PARTIAL is not set for inner proto and the hardware supports csum offload, CHECKSUM_PARTIAL and csum_start/offset will be set for outer proto, and outer csum will be offloaded later. Otherwise, it will do csum for outer proto by calling gso_make_checksum(). Note that SCTP has to do the csum by itself for non GSO path in sctp_packet_pack(), as gre_build_header() can't handle the csum with CHECKSUM_PARTIAL set for SCTP CRC csum offload. v1->v2: - remove the SCTP part, as GRE dev doesn't support SCTP CRC CSUM and it will always do checksum for SCTP in sctp_packet_pack() when it's not a GSO packet. Signed-off-by: Xin Long Signed-off-by: Jakub Kicinski --- include/net/gre.h | 19 +++++++------------ net/ipv4/gre_offload.c | 15 +++++++++++++-- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/include/net/gre.h b/include/net/gre.h index b60f212c16c65..4e209708b7545 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -106,17 +106,6 @@ static inline __be16 gre_tnl_flags_to_gre_flags(__be16 tflags) return flags; } -static inline __sum16 gre_checksum(struct sk_buff *skb) -{ - __wsum csum; - - if (skb->ip_summed == CHECKSUM_PARTIAL) - csum = lco_csum(skb); - else - csum = skb_checksum(skb, 0, skb->len, 0); - return csum_fold(csum); -} - static inline void gre_build_header(struct sk_buff *skb, int hdr_len, __be16 flags, __be16 proto, __be32 key, __be32 seq) @@ -146,7 +135,13 @@ static inline void gre_build_header(struct sk_buff *skb, int hdr_len, !(skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) { *ptr = 0; - *(__sum16 *)ptr = gre_checksum(skb); + if (skb->ip_summed == CHECKSUM_PARTIAL) { + *(__sum16 *)ptr = csum_fold(lco_csum(skb)); + } else { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = sizeof(*greh); + } } } } diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 10bc49bde9a11..1121a9d5fed92 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -15,10 +15,10 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, netdev_features_t features) { int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); + bool need_csum, offload_csum, gso_partial, need_ipsec; struct sk_buff *segs = ERR_PTR(-EINVAL); u16 mac_offset = skb->mac_header; __be16 protocol = skb->protocol; - bool need_csum, gso_partial; u16 mac_len = skb->mac_len; int gre_offset, outer_hlen; @@ -47,6 +47,11 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, if (need_csum) features &= ~NETIF_F_SCTP_CRC; + need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb)); + /* Try to offload checksum if possible */ + offload_csum = !!(need_csum && !need_ipsec && + (skb->dev->features & NETIF_F_HW_CSUM)); + /* segment inner packet. */ segs = skb_mac_gso_segment(skb, features); if (IS_ERR_OR_NULL(segs)) { @@ -100,7 +105,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, } *(pcsum + 1) = 0; - *pcsum = gso_make_checksum(skb, 0); + if (skb->encapsulation || !offload_csum) { + *pcsum = gso_make_checksum(skb, 0); + } else { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = sizeof(*greh); + } } while ((skb = skb->next)); out: return segs; -- GitLab From 2bbad0aa40e172e7ed7aba6f6ad4d9977dbd0be3 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Thu, 28 Jan 2021 19:51:35 +0800 Subject: [PATCH 1731/2012] net: hns3: add interfaces to query information of tm priority/qset Add some interfaces to get information of tm priority and qset, then they can be used by debugfs. Signed-off-by: Guangbin Huang Signed-off-by: Huazhong Tan Signed-off-by: Jakub Kicinski --- .../hisilicon/hns3/hns3pf/hclge_cmd.h | 1 + .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 186 ++++++++++++++++++ .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 48 ++++- 3 files changed, 234 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index edfadb5cb1c34..f861bdbe46c2d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -160,6 +160,7 @@ enum hclge_opcode_type { HCLGE_OPC_TM_PRI_SCH_MODE_CFG = 0x0813, HCLGE_OPC_TM_QS_SCH_MODE_CFG = 0x0814, HCLGE_OPC_TM_BP_TO_QSET_MAPPING = 0x0815, + HCLGE_OPC_TM_NODES = 0x0816, HCLGE_OPC_ETS_TC_WEIGHT = 0x0843, HCLGE_OPC_QSET_DFX_STS = 0x0844, HCLGE_OPC_PRI_DFX_STS = 0x0845, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 82742a64f3b70..216ab1e927239 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -1616,3 +1616,189 @@ int hclge_tm_vport_map_update(struct hclge_dev *hdev) return hclge_tm_bp_setup(hdev); } + +int hclge_tm_get_qset_num(struct hclge_dev *hdev, u16 *qset_num) +{ + struct hclge_tm_nodes_cmd *nodes; + struct hclge_desc desc; + int ret; + + if (hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2) { + /* Each PF has 8 qsets and each VF has 1 qset */ + *qset_num = HCLGE_TM_PF_MAX_QSET_NUM + pci_num_vf(hdev->pdev); + return 0; + } + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_NODES, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get qset num, ret = %d\n", ret); + return ret; + } + + nodes = (struct hclge_tm_nodes_cmd *)desc.data; + *qset_num = le16_to_cpu(nodes->qset_num); + return 0; +} + +int hclge_tm_get_pri_num(struct hclge_dev *hdev, u8 *pri_num) +{ + struct hclge_tm_nodes_cmd *nodes; + struct hclge_desc desc; + int ret; + + if (hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2) { + *pri_num = HCLGE_TM_PF_MAX_PRI_NUM; + return 0; + } + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_NODES, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get pri num, ret = %d\n", ret); + return ret; + } + + nodes = (struct hclge_tm_nodes_cmd *)desc.data; + *pri_num = nodes->pri_num; + return 0; +} + +int hclge_tm_get_qset_map_pri(struct hclge_dev *hdev, u16 qset_id, u8 *priority, + u8 *link_vld) +{ + struct hclge_qs_to_pri_link_cmd *map; + struct hclge_desc desc; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_TO_PRI_LINK, true); + map = (struct hclge_qs_to_pri_link_cmd *)desc.data; + map->qs_id = cpu_to_le16(qset_id); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get qset map priority, ret = %d\n", ret); + return ret; + } + + *priority = map->priority; + *link_vld = map->link_vld; + return 0; +} + +int hclge_tm_get_qset_sch_mode(struct hclge_dev *hdev, u16 qset_id, u8 *mode) +{ + struct hclge_qs_sch_mode_cfg_cmd *qs_sch_mode; + struct hclge_desc desc; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_SCH_MODE_CFG, true); + qs_sch_mode = (struct hclge_qs_sch_mode_cfg_cmd *)desc.data; + qs_sch_mode->qs_id = cpu_to_le16(qset_id); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get qset sch mode, ret = %d\n", ret); + return ret; + } + + *mode = qs_sch_mode->sch_mode; + return 0; +} + +int hclge_tm_get_qset_weight(struct hclge_dev *hdev, u16 qset_id, u8 *weight) +{ + struct hclge_qs_weight_cmd *qs_weight; + struct hclge_desc desc; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_WEIGHT, true); + qs_weight = (struct hclge_qs_weight_cmd *)desc.data; + qs_weight->qs_id = cpu_to_le16(qset_id); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get qset weight, ret = %d\n", ret); + return ret; + } + + *weight = qs_weight->dwrr; + return 0; +} + +int hclge_tm_get_pri_sch_mode(struct hclge_dev *hdev, u8 pri_id, u8 *mode) +{ + struct hclge_pri_sch_mode_cfg_cmd *pri_sch_mode; + struct hclge_desc desc; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PRI_SCH_MODE_CFG, true); + pri_sch_mode = (struct hclge_pri_sch_mode_cfg_cmd *)desc.data; + pri_sch_mode->pri_id = pri_id; + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get priority sch mode, ret = %d\n", ret); + return ret; + } + + *mode = pri_sch_mode->sch_mode; + return 0; +} + +int hclge_tm_get_pri_weight(struct hclge_dev *hdev, u8 pri_id, u8 *weight) +{ + struct hclge_priority_weight_cmd *priority_weight; + struct hclge_desc desc; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PRI_WEIGHT, true); + priority_weight = (struct hclge_priority_weight_cmd *)desc.data; + priority_weight->pri_id = pri_id; + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get priority weight, ret = %d\n", ret); + return ret; + } + + *weight = priority_weight->dwrr; + return 0; +} + +int hclge_tm_get_pri_shaper(struct hclge_dev *hdev, u8 pri_id, + enum hclge_opcode_type cmd, + struct hclge_pri_shaper_para *para) +{ + struct hclge_pri_shapping_cmd *shap_cfg_cmd; + struct hclge_desc desc; + u32 shapping_para; + int ret; + + if (cmd != HCLGE_OPC_TM_PRI_C_SHAPPING && + cmd != HCLGE_OPC_TM_PRI_P_SHAPPING) + return -EINVAL; + + hclge_cmd_setup_basic_desc(&desc, cmd, true); + shap_cfg_cmd = (struct hclge_pri_shapping_cmd *)desc.data; + shap_cfg_cmd->pri_id = pri_id; + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get priority shaper(%#x), ret = %d\n", + cmd, ret); + return ret; + } + + shapping_para = le32_to_cpu(shap_cfg_cmd->pri_shapping_para); + para->ir_b = hclge_tm_get_field(shapping_para, IR_B); + para->ir_u = hclge_tm_get_field(shapping_para, IR_U); + para->ir_s = hclge_tm_get_field(shapping_para, IR_S); + para->bs_b = hclge_tm_get_field(shapping_para, BS_B); + para->bs_s = hclge_tm_get_field(shapping_para, BS_S); + para->flag = shap_cfg_cmd->flag; + para->rate = le32_to_cpu(shap_cfg_cmd->pri_rate); + return 0; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 5498d73ed34b8..d33cb04acbef4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -21,6 +21,9 @@ #define HCLGE_ETHER_MAX_RATE 100000 +#define HCLGE_TM_PF_MAX_PRI_NUM 8 +#define HCLGE_TM_PF_MAX_QSET_NUM 8 + struct hclge_pg_to_pri_link_cmd { u8 pg_id; u8 rsvd1[3]; @@ -65,6 +68,18 @@ struct hclge_priority_weight_cmd { u8 dwrr; }; +struct hclge_pri_sch_mode_cfg_cmd { + u8 pri_id; + u8 rsvd[3]; + u8 sch_mode; +}; + +struct hclge_qs_sch_mode_cfg_cmd { + __le16 qs_id; + u8 rsvd[2]; + u8 sch_mode; +}; + struct hclge_qs_weight_cmd { __le16 qs_id; u8 dwrr; @@ -173,6 +188,27 @@ struct hclge_shaper_ir_para { u8 ir_s; /* IR_S parameter of IR shaper */ }; +struct hclge_tm_nodes_cmd { + u8 pg_base_id; + u8 pri_base_id; + __le16 qset_base_id; + __le16 queue_base_id; + u8 pg_num; + u8 pri_num; + __le16 qset_num; + __le16 queue_num; +}; + +struct hclge_pri_shaper_para { + u8 ir_b; + u8 ir_u; + u8 ir_s; + u8 bs_b; + u8 bs_s; + u8 flag; + u32 rate; +}; + #define hclge_tm_set_field(dest, string, val) \ hnae3_set_field((dest), \ (HCLGE_TM_SHAP_##string##_MSK), \ @@ -195,5 +231,15 @@ int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr); int hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats); int hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats); int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate); - +int hclge_tm_get_qset_num(struct hclge_dev *hdev, u16 *qset_num); +int hclge_tm_get_pri_num(struct hclge_dev *hdev, u8 *pri_num); +int hclge_tm_get_qset_map_pri(struct hclge_dev *hdev, u16 qset_id, u8 *priority, + u8 *link_vld); +int hclge_tm_get_qset_sch_mode(struct hclge_dev *hdev, u16 qset_id, u8 *mode); +int hclge_tm_get_qset_weight(struct hclge_dev *hdev, u16 qset_id, u8 *weight); +int hclge_tm_get_pri_sch_mode(struct hclge_dev *hdev, u8 pri_id, u8 *mode); +int hclge_tm_get_pri_weight(struct hclge_dev *hdev, u8 pri_id, u8 *weight); +int hclge_tm_get_pri_shaper(struct hclge_dev *hdev, u8 pri_id, + enum hclge_opcode_type cmd, + struct hclge_pri_shaper_para *para); #endif -- GitLab From 04987ca1b9b6841cfa5f9b459c5a270b75c89345 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Thu, 28 Jan 2021 19:51:36 +0800 Subject: [PATCH 1732/2012] net: hns3: add debugfs support for tm nodes, priority and qset info In order to query tm info of nodes, priority and qset for debugging, adds three debugfs files tm_nodes, tm_priority and tm_qset in newly created tm directory. Unlike previous debugfs commands, these three files just support read ops, so they only support to use cat command to dump their info. The new tm file style is acccording to suggestion from Jakub Kicinski's opinion as link https://lkml.org/lkml/2020/9/29/2101. Signed-off-by: Guangbin Huang Signed-off-by: Huazhong Tan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 8 + .../ethernet/hisilicon/hns3/hns3_debugfs.c | 55 ++++++- .../hisilicon/hns3/hns3pf/hclge_debugfs.c | 153 ++++++++++++++++++ .../hisilicon/hns3/hns3pf/hclge_main.c | 1 + .../hisilicon/hns3/hns3pf/hclge_main.h | 2 + 5 files changed, 218 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index a7daf6d4511ee..fe09cf6c15f30 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -465,6 +465,8 @@ struct hnae3_ae_dev { * Delete clsflower rule * cls_flower_active * Check if any cls flower rule exist + * dbg_read_cmd + * Execute debugfs read command. */ struct hnae3_ae_ops { int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev); @@ -620,6 +622,8 @@ struct hnae3_ae_ops { int (*add_arfs_entry)(struct hnae3_handle *handle, u16 queue_id, u16 flow_id, struct flow_keys *fkeys); int (*dbg_run_cmd)(struct hnae3_handle *handle, const char *cmd_buf); + int (*dbg_read_cmd)(struct hnae3_handle *handle, const char *cmd_buf, + char *buf, int len); pci_ers_result_t (*handle_hw_ras_error)(struct hnae3_ae_dev *ae_dev); bool (*get_hw_reset_stat)(struct hnae3_handle *handle); bool (*ae_dev_resetting)(struct hnae3_handle *handle); @@ -777,6 +781,10 @@ struct hnae3_handle { #define hnae3_get_bit(origin, shift) \ hnae3_get_field((origin), (0x1 << (shift)), (shift)) +#define HNAE3_DBG_TM_NODES "tm_nodes" +#define HNAE3_DBG_TM_PRI "tm_priority" +#define HNAE3_DBG_TM_QSET "tm_qset" + int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev); void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 9d4e9c053a8fe..6978304f1ac53 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -7,7 +7,7 @@ #include "hnae3.h" #include "hns3_enet.h" -#define HNS3_DBG_READ_LEN 256 +#define HNS3_DBG_READ_LEN 65536 #define HNS3_DBG_WRITE_LEN 1024 static struct dentry *hns3_dbgfs_root; @@ -484,6 +484,42 @@ static ssize_t hns3_dbg_cmd_write(struct file *filp, const char __user *buffer, return count; } +static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct hnae3_handle *handle = filp->private_data; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + struct hns3_nic_priv *priv = handle->priv; + char *cmd_buf, *read_buf; + ssize_t size = 0; + int ret = 0; + + if (!filp->f_path.dentry->d_iname) + return -EINVAL; + + read_buf = kzalloc(HNS3_DBG_READ_LEN, GFP_KERNEL); + if (!read_buf) + return -ENOMEM; + + cmd_buf = filp->f_path.dentry->d_iname; + + if (ops->dbg_read_cmd) + ret = ops->dbg_read_cmd(handle, cmd_buf, read_buf, + HNS3_DBG_READ_LEN); + + if (ret) { + dev_info(priv->dev, "unknown command\n"); + goto out; + } + + size = simple_read_from_buffer(buffer, count, ppos, read_buf, + strlen(read_buf)); + +out: + kfree(read_buf); + return size; +} + static const struct file_operations hns3_dbg_cmd_fops = { .owner = THIS_MODULE, .open = simple_open, @@ -491,14 +527,31 @@ static const struct file_operations hns3_dbg_cmd_fops = { .write = hns3_dbg_cmd_write, }; +static const struct file_operations hns3_dbg_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = hns3_dbg_read, +}; + void hns3_dbg_init(struct hnae3_handle *handle) { + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev); const char *name = pci_name(handle->pdev); + struct dentry *entry_dir; handle->hnae3_dbgfs = debugfs_create_dir(name, hns3_dbgfs_root); debugfs_create_file("cmd", 0600, handle->hnae3_dbgfs, handle, &hns3_dbg_cmd_fops); + + entry_dir = debugfs_create_dir("tm", handle->hnae3_dbgfs); + if (ae_dev->dev_version > HNAE3_DEVICE_VERSION_V2) + debugfs_create_file(HNAE3_DBG_TM_NODES, 0600, entry_dir, handle, + &hns3_dbg_fops); + debugfs_create_file(HNAE3_DBG_TM_PRI, 0600, entry_dir, handle, + &hns3_dbg_fops); + debugfs_create_file(HNAE3_DBG_TM_QSET, 0600, entry_dir, handle, + &hns3_dbg_fops); } void hns3_dbg_uninit(struct hnae3_handle *handle) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 8f6dea5198cf5..8f3fefe507196 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -800,6 +800,140 @@ err_tm_map_cmd_send: cmd, ret); } +static int hclge_dbg_dump_tm_nodes(struct hclge_dev *hdev, char *buf, int len) +{ + struct hclge_tm_nodes_cmd *nodes; + struct hclge_desc desc; + int pos = 0; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_NODES, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to dump tm nodes, ret = %d\n", ret); + return ret; + } + + nodes = (struct hclge_tm_nodes_cmd *)desc.data; + + pos += scnprintf(buf + pos, len - pos, " BASE_ID MAX_NUM\n"); + pos += scnprintf(buf + pos, len - pos, "PG %4u %4u\n", + nodes->pg_base_id, nodes->pg_num); + pos += scnprintf(buf + pos, len - pos, "PRI %4u %4u\n", + nodes->pri_base_id, nodes->pri_num); + pos += scnprintf(buf + pos, len - pos, "QSET %4u %4u\n", + le16_to_cpu(nodes->qset_base_id), + le16_to_cpu(nodes->qset_num)); + pos += scnprintf(buf + pos, len - pos, "QUEUE %4u %4u\n", + le16_to_cpu(nodes->queue_base_id), + le16_to_cpu(nodes->queue_num)); + + return 0; +} + +static int hclge_dbg_dump_tm_pri(struct hclge_dev *hdev, char *buf, int len) +{ + struct hclge_pri_shaper_para c_shaper_para; + struct hclge_pri_shaper_para p_shaper_para; + u8 pri_num, sch_mode, weight; + char *sch_mode_str; + int pos = 0; + int ret; + u8 i; + + ret = hclge_tm_get_pri_num(hdev, &pri_num); + if (ret) + return ret; + + pos += scnprintf(buf + pos, len - pos, + "ID MODE DWRR C_IR_B C_IR_U C_IR_S C_BS_B "); + pos += scnprintf(buf + pos, len - pos, + "C_BS_S C_FLAG C_RATE(Mbps) P_IR_B P_IR_U "); + pos += scnprintf(buf + pos, len - pos, + "P_IR_S P_BS_B P_BS_S P_FLAG P_RATE(Mbps)\n"); + + for (i = 0; i < pri_num; i++) { + ret = hclge_tm_get_pri_sch_mode(hdev, i, &sch_mode); + if (ret) + return ret; + + ret = hclge_tm_get_pri_weight(hdev, i, &weight); + if (ret) + return ret; + + ret = hclge_tm_get_pri_shaper(hdev, i, + HCLGE_OPC_TM_PRI_C_SHAPPING, + &c_shaper_para); + if (ret) + return ret; + + ret = hclge_tm_get_pri_shaper(hdev, i, + HCLGE_OPC_TM_PRI_P_SHAPPING, + &p_shaper_para); + if (ret) + return ret; + + sch_mode_str = sch_mode & HCLGE_TM_TX_SCHD_DWRR_MSK ? "dwrr" : + "sp"; + + pos += scnprintf(buf + pos, len - pos, + "%04u %4s %3u %3u %3u %3u ", + i, sch_mode_str, weight, c_shaper_para.ir_b, + c_shaper_para.ir_u, c_shaper_para.ir_s); + pos += scnprintf(buf + pos, len - pos, + "%3u %3u %1u %6u ", + c_shaper_para.bs_b, c_shaper_para.bs_s, + c_shaper_para.flag, c_shaper_para.rate); + pos += scnprintf(buf + pos, len - pos, + "%3u %3u %3u %3u %3u ", + p_shaper_para.ir_b, p_shaper_para.ir_u, + p_shaper_para.ir_s, p_shaper_para.bs_b, + p_shaper_para.bs_s); + pos += scnprintf(buf + pos, len - pos, "%1u %6u\n", + p_shaper_para.flag, p_shaper_para.rate); + } + + return 0; +} + +static int hclge_dbg_dump_tm_qset(struct hclge_dev *hdev, char *buf, int len) +{ + u8 priority, link_vld, sch_mode, weight; + char *sch_mode_str; + int ret, pos; + u16 qset_num; + u16 i; + + ret = hclge_tm_get_qset_num(hdev, &qset_num); + if (ret) + return ret; + + pos = scnprintf(buf, len, "ID MAP_PRI LINK_VLD MODE DWRR\n"); + + for (i = 0; i < qset_num; i++) { + ret = hclge_tm_get_qset_map_pri(hdev, i, &priority, &link_vld); + if (ret) + return ret; + + ret = hclge_tm_get_qset_sch_mode(hdev, i, &sch_mode); + if (ret) + return ret; + + ret = hclge_tm_get_qset_weight(hdev, i, &weight); + if (ret) + return ret; + + sch_mode_str = sch_mode & HCLGE_TM_TX_SCHD_DWRR_MSK ? "dwrr" : + "sp"; + pos += scnprintf(buf + pos, len - pos, + "%04u %4u %1u %4s %3u\n", + i, priority, link_vld, sch_mode_str, weight); + } + + return 0; +} + static void hclge_dbg_dump_qos_pause_cfg(struct hclge_dev *hdev) { struct hclge_cfg_pause_param_cmd *pause_param; @@ -1591,3 +1725,22 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf) return 0; } + +int hclge_dbg_read_cmd(struct hnae3_handle *handle, const char *cmd_buf, + char *buf, int len) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + if (strncmp(cmd_buf, HNAE3_DBG_TM_NODES, + strlen(HNAE3_DBG_TM_NODES)) == 0) + return hclge_dbg_dump_tm_nodes(hdev, buf, len); + else if (strncmp(cmd_buf, HNAE3_DBG_TM_PRI, + strlen(HNAE3_DBG_TM_PRI)) == 0) + return hclge_dbg_dump_tm_pri(hdev, buf, len); + else if (strncmp(cmd_buf, HNAE3_DBG_TM_QSET, + strlen(HNAE3_DBG_TM_QSET)) == 0) + return hclge_dbg_dump_tm_qset(hdev, buf, len); + + return -EINVAL; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c242883fea5db..16ccb1abe3d91 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11850,6 +11850,7 @@ static const struct hnae3_ae_ops hclge_ops = { .enable_fd = hclge_enable_fd, .add_arfs_entry = hclge_add_fd_entry_by_arfs, .dbg_run_cmd = hclge_dbg_run_cmd, + .dbg_read_cmd = hclge_dbg_read_cmd, .handle_hw_ras_error = hclge_handle_hw_ras_error, .get_hw_reset_stat = hclge_get_hw_reset_stat, .ae_dev_resetting = hclge_ae_dev_resetting, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index ca46bc9110d7d..32e5f82ef615b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -1006,6 +1006,8 @@ int hclge_vport_start(struct hclge_vport *vport); void hclge_vport_stop(struct hclge_vport *vport); int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu); int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf); +int hclge_dbg_read_cmd(struct hnae3_handle *handle, const char *cmd_buf, + char *buf, int len); u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id); int hclge_notify_client(struct hclge_dev *hdev, enum hnae3_reset_notify_type type); -- GitLab From 01365633bd1c836240f9bbf86bbeee749795480a Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Thu, 28 Jan 2021 20:48:02 +0100 Subject: [PATCH 1733/2012] net: arcnet: Fix RESET flag handling The main arcnet interrupt handler calls arcnet_close() then arcnet_open(), if the RESET status flag is encountered. This is invalid: 1) In general, interrupt handlers should never call ->ndo_stop() and ->ndo_open() functions. They are usually full of blocking calls and other methods that are expected to be called only from drivers init and exit code paths. 2) arcnet_close() contains a del_timer_sync(). If the irq handler interrupts the to-be-deleted timer, del_timer_sync() will just loop forever. 3) arcnet_close() also calls tasklet_kill(), which has a warning if called from irq context. 4) For device reset, the sequence "arcnet_close(); arcnet_open();" is not complete. Some children arcnet drivers have special init/exit code sequences, which then embed a call to arcnet_open() and arcnet_close() accordingly. Check drivers/net/arcnet/com20020.c. Run the device RESET sequence from a scheduled workqueue instead. Signed-off-by: Ahmed S. Darwish Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20210128194802.727770-1-a.darwish@linutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/arcnet/arc-rimi.c | 4 +- drivers/net/arcnet/arcdevice.h | 6 +++ drivers/net/arcnet/arcnet.c | 66 +++++++++++++++++++++++++++++-- drivers/net/arcnet/com20020-isa.c | 4 +- drivers/net/arcnet/com20020-pci.c | 2 +- drivers/net/arcnet/com20020_cs.c | 2 +- drivers/net/arcnet/com90io.c | 4 +- drivers/net/arcnet/com90xx.c | 4 +- 8 files changed, 78 insertions(+), 14 deletions(-) diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c index 98df38fe553ce..12d085405bd05 100644 --- a/drivers/net/arcnet/arc-rimi.c +++ b/drivers/net/arcnet/arc-rimi.c @@ -332,7 +332,7 @@ static int __init arc_rimi_init(void) dev->irq = 9; if (arcrimi_probe(dev)) { - free_netdev(dev); + free_arcdev(dev); return -EIO; } @@ -349,7 +349,7 @@ static void __exit arc_rimi_exit(void) iounmap(lp->mem_start); release_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1); free_irq(dev->irq, dev); - free_netdev(dev); + free_arcdev(dev); } #ifndef MODULE diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h index 22a49c6d7ae6e..5d4a4c7efbbff 100644 --- a/drivers/net/arcnet/arcdevice.h +++ b/drivers/net/arcnet/arcdevice.h @@ -298,6 +298,10 @@ struct arcnet_local { int excnak_pending; /* We just got an excesive nak interrupt */ + /* RESET flag handling */ + int reset_in_progress; + struct work_struct reset_work; + struct { uint16_t sequence; /* sequence number (incs with each packet) */ __be16 aborted_seq; @@ -350,7 +354,9 @@ void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc) void arcnet_unregister_proto(struct ArcProto *proto); irqreturn_t arcnet_interrupt(int irq, void *dev_id); + struct net_device *alloc_arcdev(const char *name); +void free_arcdev(struct net_device *dev); int arcnet_open(struct net_device *dev); int arcnet_close(struct net_device *dev); diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index e04efc0a5c977..d76dd7d14299e 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -387,10 +387,44 @@ static void arcnet_timer(struct timer_list *t) struct arcnet_local *lp = from_timer(lp, t, timer); struct net_device *dev = lp->dev; - if (!netif_carrier_ok(dev)) { + spin_lock_irq(&lp->lock); + + if (!lp->reset_in_progress && !netif_carrier_ok(dev)) { netif_carrier_on(dev); netdev_info(dev, "link up\n"); } + + spin_unlock_irq(&lp->lock); +} + +static void reset_device_work(struct work_struct *work) +{ + struct arcnet_local *lp; + struct net_device *dev; + + lp = container_of(work, struct arcnet_local, reset_work); + dev = lp->dev; + + /* Do not bring the network interface back up if an ifdown + * was already done. + */ + if (!netif_running(dev) || !lp->reset_in_progress) + return; + + rtnl_lock(); + + /* Do another check, in case of an ifdown that was triggered in + * the small race window between the exit condition above and + * acquiring RTNL. + */ + if (!netif_running(dev) || !lp->reset_in_progress) + goto out; + + dev_close(dev); + dev_open(dev, NULL); + +out: + rtnl_unlock(); } static void arcnet_reply_tasklet(unsigned long data) @@ -452,12 +486,25 @@ struct net_device *alloc_arcdev(const char *name) lp->dev = dev; spin_lock_init(&lp->lock); timer_setup(&lp->timer, arcnet_timer, 0); + INIT_WORK(&lp->reset_work, reset_device_work); } return dev; } EXPORT_SYMBOL(alloc_arcdev); +void free_arcdev(struct net_device *dev) +{ + struct arcnet_local *lp = netdev_priv(dev); + + /* Do not cancel this at ->ndo_close(), as the workqueue itself + * indirectly calls the ifdown path through dev_close(). + */ + cancel_work_sync(&lp->reset_work); + free_netdev(dev); +} +EXPORT_SYMBOL(free_arcdev); + /* Open/initialize the board. This is called sometime after booting when * the 'ifconfig' program is run. * @@ -587,6 +634,10 @@ int arcnet_close(struct net_device *dev) /* shut down the card */ lp->hw.close(dev); + + /* reset counters */ + lp->reset_in_progress = 0; + module_put(lp->hw.owner); return 0; } @@ -820,6 +871,9 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) spin_lock_irqsave(&lp->lock, flags); + if (lp->reset_in_progress) + goto out; + /* RESET flag was enabled - if device is not running, we must * clear it right away (but nothing else). */ @@ -852,11 +906,14 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) if (status & RESETflag) { arc_printk(D_NORMAL, dev, "spurious reset (status=%Xh)\n", status); - arcnet_close(dev); - arcnet_open(dev); + + lp->reset_in_progress = 1; + netif_stop_queue(dev); + netif_carrier_off(dev); + schedule_work(&lp->reset_work); /* get out of the interrupt handler! */ - break; + goto out; } /* RX is inhibited - we must have received something. * Prepare to receive into the next buffer. @@ -1052,6 +1109,7 @@ irqreturn_t arcnet_interrupt(int irq, void *dev_id) udelay(1); lp->hw.intmask(dev, lp->intmask); +out: spin_unlock_irqrestore(&lp->lock, flags); return retval; } diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c index f983c4ce6b07f..be618e4b9ed5e 100644 --- a/drivers/net/arcnet/com20020-isa.c +++ b/drivers/net/arcnet/com20020-isa.c @@ -169,7 +169,7 @@ static int __init com20020_init(void) dev->irq = 9; if (com20020isa_probe(dev)) { - free_netdev(dev); + free_arcdev(dev); return -EIO; } @@ -182,7 +182,7 @@ static void __exit com20020_exit(void) unregister_netdev(my_dev); free_irq(my_dev->irq, my_dev); release_region(my_dev->base_addr, ARCNET_TOTAL_SIZE); - free_netdev(my_dev); + free_arcdev(my_dev); } #ifndef MODULE diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index eb7f76753c9c0..8bdc44b7e09a1 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -291,7 +291,7 @@ static void com20020pci_remove(struct pci_dev *pdev) unregister_netdev(dev); free_irq(dev->irq, dev); - free_netdev(dev); + free_arcdev(dev); } } diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c index cf607ffcf358e..9cc5eb6a8e905 100644 --- a/drivers/net/arcnet/com20020_cs.c +++ b/drivers/net/arcnet/com20020_cs.c @@ -177,7 +177,7 @@ static void com20020_detach(struct pcmcia_device *link) dev = info->dev; if (dev) { dev_dbg(&link->dev, "kfree...\n"); - free_netdev(dev); + free_arcdev(dev); } dev_dbg(&link->dev, "kfree2...\n"); kfree(info); diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c index cf214b7306715..3856b447d38ed 100644 --- a/drivers/net/arcnet/com90io.c +++ b/drivers/net/arcnet/com90io.c @@ -396,7 +396,7 @@ static int __init com90io_init(void) err = com90io_probe(dev); if (err) { - free_netdev(dev); + free_arcdev(dev); return err; } @@ -419,7 +419,7 @@ static void __exit com90io_exit(void) free_irq(dev->irq, dev); release_region(dev->base_addr, ARCNET_TOTAL_SIZE); - free_netdev(dev); + free_arcdev(dev); } module_init(com90io_init) diff --git a/drivers/net/arcnet/com90xx.c b/drivers/net/arcnet/com90xx.c index 3dc3d533cb19a..d8dfb9ea0de89 100644 --- a/drivers/net/arcnet/com90xx.c +++ b/drivers/net/arcnet/com90xx.c @@ -554,7 +554,7 @@ err_free_irq: err_release_mem: release_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1); err_free_dev: - free_netdev(dev); + free_arcdev(dev); return -EIO; } @@ -672,7 +672,7 @@ static void __exit com90xx_exit(void) release_region(dev->base_addr, ARCNET_TOTAL_SIZE); release_mem_region(dev->mem_start, dev->mem_end - dev->mem_start + 1); - free_netdev(dev); + free_arcdev(dev); } } -- GitLab From df610cd9163b90adc3b5c23868089a0349580551 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 29 Jan 2021 00:02:17 +0900 Subject: [PATCH 1734/2012] net: Remove redundant calls of sk_tx_queue_clear(). The commit 41b14fb8724d ("net: Do not clear the sock TX queue in sk_set_socket()") removes sk_tx_queue_clear() from sk_set_socket() and adds it instead in sk_alloc() and sk_clone_lock() to fix an issue introduced in the commit e022f0b4a03f ("net: Introduce sk_tx_queue_mapping"). On the other hand, the original commit had already put sk_tx_queue_clear() in sk_prot_alloc(): the callee of sk_alloc() and sk_clone_lock(). Thus sk_tx_queue_clear() is called twice in each path. If we remove sk_tx_queue_clear() in sk_alloc() and sk_clone_lock(), it currently works well because (i) sk_tx_queue_mapping is defined between sk_dontcopy_begin and sk_dontcopy_end, and (ii) sock_copy() called after sk_prot_alloc() in sk_clone_lock() does not overwrite sk_tx_queue_mapping. However, if we move sk_tx_queue_mapping out of the no copy area, it introduces a bug unintentionally. Therefore, this patch adds a compile-time check to take care of the order of sock_copy() and sk_tx_queue_clear() and removes sk_tx_queue_clear() from sk_prot_alloc() so that it does the only allocation and its callers initialize fields. CC: Boris Pismenny Signed-off-by: Kuniyuki Iwashima Acked-by: Tariq Toukan Link: https://lore.kernel.org/r/20210128150217.6060-1-kuniyu@amazon.co.jp Signed-off-by: Jakub Kicinski --- net/core/sock.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/core/sock.c b/net/core/sock.c index 4600e58828da6..648a5cb46209a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1657,6 +1657,16 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) #ifdef CONFIG_SECURITY_NETWORK void *sptr = nsk->sk_security; #endif + + /* If we move sk_tx_queue_mapping out of the private section, + * we must check if sk_tx_queue_clear() is called after + * sock_copy() in sk_clone_lock(). + */ + BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) < + offsetof(struct sock, sk_dontcopy_begin) || + offsetof(struct sock, sk_tx_queue_mapping) >= + offsetof(struct sock, sk_dontcopy_end)); + memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, @@ -1690,7 +1700,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, if (!try_module_get(prot->owner)) goto out_free_sec; - sk_tx_queue_clear(sk); } return sk; -- GitLab From 0d6cd689f9ba47deffffe9dfd204843ce8f1a51e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Jan 2021 08:21:45 -0800 Subject: [PATCH 1735/2012] net: proc: speedup /proc/net/netstat Use cache friendly helpers to better use cpu caches while reading /proc/net/netstat Tested on a platform with 256 threads (AMD Rome) Before: 305 usec spent in netstat_seq_show() After: 130 usec spent in netstat_seq_show() Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20210128162145.1703601-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/proc.c | 50 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 63cd370ea29db..6d46297a99f8d 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -464,30 +464,52 @@ static int snmp_seq_show(struct seq_file *seq, void *v) */ static int netstat_seq_show(struct seq_file *seq, void *v) { - int i; + const int ip_cnt = ARRAY_SIZE(snmp4_ipextstats_list) - 1; + const int tcp_cnt = ARRAY_SIZE(snmp4_net_list) - 1; struct net *net = seq->private; + unsigned long *buff; + int i; seq_puts(seq, "TcpExt:"); - for (i = 0; snmp4_net_list[i].name; i++) + for (i = 0; i < tcp_cnt; i++) seq_printf(seq, " %s", snmp4_net_list[i].name); seq_puts(seq, "\nTcpExt:"); - for (i = 0; snmp4_net_list[i].name; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.net_statistics, - snmp4_net_list[i].entry)); - + buff = kzalloc(max(tcp_cnt * sizeof(long), ip_cnt * sizeof(u64)), + GFP_KERNEL); + if (buff) { + snmp_get_cpu_field_batch(buff, snmp4_net_list, + net->mib.net_statistics); + for (i = 0; i < tcp_cnt; i++) + seq_printf(seq, " %lu", buff[i]); + } else { + for (i = 0; i < tcp_cnt; i++) + seq_printf(seq, " %lu", + snmp_fold_field(net->mib.net_statistics, + snmp4_net_list[i].entry)); + } seq_puts(seq, "\nIpExt:"); - for (i = 0; snmp4_ipextstats_list[i].name; i++) + for (i = 0; i < ip_cnt; i++) seq_printf(seq, " %s", snmp4_ipextstats_list[i].name); seq_puts(seq, "\nIpExt:"); - for (i = 0; snmp4_ipextstats_list[i].name; i++) - seq_printf(seq, " %llu", - snmp_fold_field64(net->mib.ip_statistics, - snmp4_ipextstats_list[i].entry, - offsetof(struct ipstats_mib, syncp))); - + if (buff) { + u64 *buff64 = (u64 *)buff; + + memset(buff64, 0, ip_cnt * sizeof(u64)); + snmp_get_cpu_field64_batch(buff64, snmp4_ipextstats_list, + net->mib.ip_statistics, + offsetof(struct ipstats_mib, syncp)); + for (i = 0; i < ip_cnt; i++) + seq_printf(seq, " %llu", buff64[i]); + } else { + for (i = 0; i < ip_cnt; i++) + seq_printf(seq, " %llu", + snmp_fold_field64(net->mib.ip_statistics, + snmp4_ipextstats_list[i].entry, + offsetof(struct ipstats_mib, syncp))); + } + kfree(buff); seq_putc(seq, '\n'); mptcp_seq_show(seq); return 0; -- GitLab From 6c13d75beee5313a38adab7e14365724e301886f Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Thu, 28 Jan 2021 17:33:38 +0100 Subject: [PATCH 1736/2012] net: dsa: hellcreek: Add missing TAPRIO dependency Add missing dependency to TAPRIO to avoid build failures such as: |ERROR: modpost: "taprio_offload_get" [drivers/net/dsa/hirschmann/hellcreek_sw.ko] undefined! |ERROR: modpost: "taprio_offload_free" [drivers/net/dsa/hirschmann/hellcreek_sw.ko] undefined! Fixes: 24dfc6eb39b2 ("net: dsa: hellcreek: Add TAPRIO offloading support") Reported-by: Randy Dunlap Signed-off-by: Kurt Kanzenbach Acked-by: Arnd Bergmann Acked-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/20210128163338.22665-1-kurt@linutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/dsa/hirschmann/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/hirschmann/Kconfig b/drivers/net/dsa/hirschmann/Kconfig index e01191107a4ba..9ea2c643f8f80 100644 --- a/drivers/net/dsa/hirschmann/Kconfig +++ b/drivers/net/dsa/hirschmann/Kconfig @@ -5,6 +5,7 @@ config NET_DSA_HIRSCHMANN_HELLCREEK depends on NET_DSA depends on PTP_1588_CLOCK depends on LEDS_CLASS + depends on NET_SCH_TAPRIO select NET_DSA_TAG_HELLCREEK help This driver adds support for Hirschmann Hellcreek TSN switches. -- GitLab From de1da8bcf40564a2adada2d5d5426e05355f66e8 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Wed, 27 Jan 2021 18:08:16 -0800 Subject: [PATCH 1737/2012] vmxnet3: Remove buf_info from device accessible structures buf_info structures in RX & TX queues are private driver data that do not need to be visible to the device. Although there is physical address and length in the queue descriptor that points to these structures, their layout is not standardized, and device never looks at them. So lets allocate these structures in non-DMA-able memory, and fill physical address as all-ones and length as zero in the queue descriptor. That should alleviate worries brought by Martin Radev in https://lists.osuosl.org/pipermail/intel-wired-lan/Week-of-Mon-20210104/022829.html that malicious vmxnet3 device could subvert SVM/TDX guarantees. Signed-off-by: Petr Vandrovec Signed-off-by: Ronak Doshi Signed-off-by: Jakub Kicinski --- drivers/net/vmxnet3/vmxnet3_drv.c | 46 ++++++++++--------------------- drivers/net/vmxnet3/vmxnet3_int.h | 2 -- 2 files changed, 15 insertions(+), 33 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 336504b7531d9..6e87f1fc4874a 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -451,12 +451,8 @@ vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq, tq->comp_ring.base, tq->comp_ring.basePA); tq->comp_ring.base = NULL; } - if (tq->buf_info) { - dma_free_coherent(&adapter->pdev->dev, - tq->tx_ring.size * sizeof(tq->buf_info[0]), - tq->buf_info, tq->buf_info_pa); - tq->buf_info = NULL; - } + kfree(tq->buf_info); + tq->buf_info = NULL; } @@ -505,8 +501,6 @@ static int vmxnet3_tq_create(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) { - size_t sz; - BUG_ON(tq->tx_ring.base || tq->data_ring.base || tq->comp_ring.base || tq->buf_info); @@ -534,9 +528,9 @@ vmxnet3_tq_create(struct vmxnet3_tx_queue *tq, goto err; } - sz = tq->tx_ring.size * sizeof(tq->buf_info[0]); - tq->buf_info = dma_alloc_coherent(&adapter->pdev->dev, sz, - &tq->buf_info_pa, GFP_KERNEL); + tq->buf_info = kcalloc_node(tq->tx_ring.size, sizeof(tq->buf_info[0]), + GFP_KERNEL, + dev_to_node(&adapter->pdev->dev)); if (!tq->buf_info) goto err; @@ -1737,13 +1731,9 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, rq->comp_ring.base = NULL; } - if (rq->buf_info[0]) { - size_t sz = sizeof(struct vmxnet3_rx_buf_info) * - (rq->rx_ring[0].size + rq->rx_ring[1].size); - dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0], - rq->buf_info_pa); - rq->buf_info[0] = rq->buf_info[1] = NULL; - } + kfree(rq->buf_info[0]); + rq->buf_info[0] = NULL; + rq->buf_info[1] = NULL; } static void @@ -1883,10 +1873,9 @@ vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter) goto err; } - sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size + - rq->rx_ring[1].size); - bi = dma_alloc_coherent(&adapter->pdev->dev, sz, &rq->buf_info_pa, - GFP_KERNEL); + bi = kcalloc_node(rq->rx_ring[0].size + rq->rx_ring[1].size, + sizeof(rq->buf_info[0][0]), GFP_KERNEL, + dev_to_node(&adapter->pdev->dev)); if (!bi) goto err; @@ -2522,14 +2511,12 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) tqc->txRingBasePA = cpu_to_le64(tq->tx_ring.basePA); tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA); tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA); - tqc->ddPA = cpu_to_le64(tq->buf_info_pa); + tqc->ddPA = cpu_to_le64(~0ULL); tqc->txRingSize = cpu_to_le32(tq->tx_ring.size); tqc->dataRingSize = cpu_to_le32(tq->data_ring.size); tqc->txDataRingDescSize = cpu_to_le32(tq->txdata_desc_size); tqc->compRingSize = cpu_to_le32(tq->comp_ring.size); - tqc->ddLen = cpu_to_le32( - sizeof(struct vmxnet3_tx_buf_info) * - tqc->txRingSize); + tqc->ddLen = cpu_to_le32(0); tqc->intrIdx = tq->comp_ring.intr_idx; } @@ -2541,14 +2528,11 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA); rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA); rqc->compRingBasePA = cpu_to_le64(rq->comp_ring.basePA); - rqc->ddPA = cpu_to_le64(rq->buf_info_pa); + rqc->ddPA = cpu_to_le64(~0ULL); rqc->rxRingSize[0] = cpu_to_le32(rq->rx_ring[0].size); rqc->rxRingSize[1] = cpu_to_le32(rq->rx_ring[1].size); rqc->compRingSize = cpu_to_le32(rq->comp_ring.size); - rqc->ddLen = cpu_to_le32( - sizeof(struct vmxnet3_rx_buf_info) * - (rqc->rxRingSize[0] + - rqc->rxRingSize[1])); + rqc->ddLen = cpu_to_le32(0); rqc->intrIdx = rq->comp_ring.intr_idx; if (VMXNET3_VERSION_GE_3(adapter)) { rqc->rxDataRingBasePA = diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index d958b92c94299..e910596b79cf5 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -240,7 +240,6 @@ struct vmxnet3_tx_queue { spinlock_t tx_lock; struct vmxnet3_cmd_ring tx_ring; struct vmxnet3_tx_buf_info *buf_info; - dma_addr_t buf_info_pa; struct vmxnet3_tx_data_ring data_ring; struct vmxnet3_comp_ring comp_ring; struct Vmxnet3_TxQueueCtrl *shared; @@ -298,7 +297,6 @@ struct vmxnet3_rx_queue { u32 qid2; /* rqID in RCD for buffer from 2nd ring */ u32 dataRingQid; /* rqID in RCD for buffer from data ring */ struct vmxnet3_rx_buf_info *buf_info[2]; - dma_addr_t buf_info_pa; struct Vmxnet3_RxQueueCtrl *shared; struct vmxnet3_rq_driver_stats stats; } __attribute__((__aligned__(SMP_CACHE_BYTES))); -- GitLab From 8d520b4de3edca4f4fb242b5ddc659b6a9b9e65e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 28 Jan 2021 23:01:54 +0100 Subject: [PATCH 1738/2012] r8169: work around RTL8125 UDP hw bug It was reported that on RTL8125 network breaks under heavy UDP load, e.g. torrent traffic ([0], from comment 27). Realtek confirmed a hw bug and provided me with a test version of the r8125 driver including a workaround. Tests confirmed that the workaround fixes the issue. I modified the original version of the workaround to meet mainline code style. [0] https://bugzilla.kernel.org/show_bug.cgi?id=209839 v2: - rebased to net v3: - make rtl_skb_is_udp() more robust and use skb_header_pointer() to access the ip(v6) header v4: - remove dependency on ptp_classify.h - replace magic number with offsetof(struct udphdr, len) Fixes: f1bce4ad2f1c ("r8169: add support for RTL8125") Tested-by: xplo Signed-off-by: Heiner Kallweit Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/6e453d49-1801-e6de-d5f7-d7e6c7526c8f@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 71 +++++++++++++++++++++-- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index a569abe7f5ef2..f2269c9f5f055 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4046,17 +4046,72 @@ err_out: return -EIO; } -static bool rtl_test_hw_pad_bug(struct rtl8169_private *tp) +static bool rtl_skb_is_udp(struct sk_buff *skb) +{ + int no = skb_network_offset(skb); + struct ipv6hdr *i6h, _i6h; + struct iphdr *ih, _ih; + + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + ih = skb_header_pointer(skb, no, sizeof(_ih), &_ih); + return ih && ih->protocol == IPPROTO_UDP; + case htons(ETH_P_IPV6): + i6h = skb_header_pointer(skb, no, sizeof(_i6h), &_i6h); + return i6h && i6h->nexthdr == IPPROTO_UDP; + default: + return false; + } +} + +#define RTL_MIN_PATCH_LEN 47 + +/* see rtl8125_get_patch_pad_len() in r8125 vendor driver */ +static unsigned int rtl8125_quirk_udp_padto(struct rtl8169_private *tp, + struct sk_buff *skb) { + unsigned int padto = 0, len = skb->len; + + if (rtl_is_8125(tp) && len < 128 + RTL_MIN_PATCH_LEN && + rtl_skb_is_udp(skb) && skb_transport_header_was_set(skb)) { + unsigned int trans_data_len = skb_tail_pointer(skb) - + skb_transport_header(skb); + + if (trans_data_len >= offsetof(struct udphdr, len) && + trans_data_len < RTL_MIN_PATCH_LEN) { + u16 dest = ntohs(udp_hdr(skb)->dest); + + /* dest is a standard PTP port */ + if (dest == 319 || dest == 320) + padto = len + RTL_MIN_PATCH_LEN - trans_data_len; + } + + if (trans_data_len < sizeof(struct udphdr)) + padto = max_t(unsigned int, padto, + len + sizeof(struct udphdr) - trans_data_len); + } + + return padto; +} + +static unsigned int rtl_quirk_packet_padto(struct rtl8169_private *tp, + struct sk_buff *skb) +{ + unsigned int padto; + + padto = rtl8125_quirk_udp_padto(tp, skb); + switch (tp->mac_version) { case RTL_GIGA_MAC_VER_34: case RTL_GIGA_MAC_VER_60: case RTL_GIGA_MAC_VER_61: case RTL_GIGA_MAC_VER_63: - return true; + padto = max_t(unsigned int, padto, ETH_ZLEN); default: - return false; + break; } + + return padto; } static void rtl8169_tso_csum_v1(struct sk_buff *skb, u32 *opts) @@ -4128,9 +4183,10 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, opts[1] |= transport_offset << TCPHO_SHIFT; } else { - if (unlikely(skb->len < ETH_ZLEN && rtl_test_hw_pad_bug(tp))) - /* eth_skb_pad would free the skb on error */ - return !__skb_put_padto(skb, ETH_ZLEN, false); + unsigned int padto = rtl_quirk_packet_padto(tp, skb); + + /* skb_padto would free the skb on error */ + return !__skb_put_padto(skb, padto, false); } return true; @@ -4307,6 +4363,9 @@ static netdev_features_t rtl8169_features_check(struct sk_buff *skb, if (skb->len < ETH_ZLEN) features &= ~NETIF_F_CSUM_MASK; + if (rtl_quirk_packet_padto(tp, skb)) + features &= ~NETIF_F_CSUM_MASK; + if (transport_offset > TCPHO_MAX && rtl_chip_supports_csum_v2(tp)) features &= ~NETIF_F_CSUM_MASK; -- GitLab From 9c7caf28068421c9e0d1faea437e35e6b8983ac6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 29 Jan 2021 02:59:59 +0200 Subject: [PATCH 1739/2012] net: dsa: tag_8021q: add helpers to deduce whether a VLAN ID is RX or TX VLAN The sja1105 implementation can be blind about this, but the felix driver doesn't do exactly what it's being told, so it needs to know whether it is a TX or an RX VLAN, so it can install the appropriate type of TCAM rule. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- include/linux/dsa/8021q.h | 14 ++++++++++++++ net/dsa/tag_8021q.c | 15 +++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 88cd72dfa4e0e..b12b05f1c8b4a 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -64,6 +64,10 @@ int dsa_8021q_rx_source_port(u16 vid); u16 dsa_8021q_rx_subvlan(u16 vid); +bool vid_is_dsa_8021q_rxvlan(u16 vid); + +bool vid_is_dsa_8021q_txvlan(u16 vid); + bool vid_is_dsa_8021q(u16 vid); #else @@ -123,6 +127,16 @@ u16 dsa_8021q_rx_subvlan(u16 vid) return 0; } +bool vid_is_dsa_8021q_rxvlan(u16 vid) +{ + return false; +} + +bool vid_is_dsa_8021q_txvlan(u16 vid) +{ + return false; +} + bool vid_is_dsa_8021q(u16 vid) { return false; diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 8e3e8a5b85593..008c1ec6e20c1 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -133,10 +133,21 @@ u16 dsa_8021q_rx_subvlan(u16 vid) } EXPORT_SYMBOL_GPL(dsa_8021q_rx_subvlan); +bool vid_is_dsa_8021q_rxvlan(u16 vid) +{ + return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX; +} +EXPORT_SYMBOL_GPL(vid_is_dsa_8021q_rxvlan); + +bool vid_is_dsa_8021q_txvlan(u16 vid) +{ + return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_TX; +} +EXPORT_SYMBOL_GPL(vid_is_dsa_8021q_txvlan); + bool vid_is_dsa_8021q(u16 vid) { - return ((vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX || - (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_TX); + return vid_is_dsa_8021q_rxvlan(vid) || vid_is_dsa_8021q_txvlan(vid); } EXPORT_SYMBOL_GPL(vid_is_dsa_8021q); -- GitLab From 0e9bb4e9d93f2711897b8e2a613899f7b8a15a3b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 29 Jan 2021 03:00:00 +0200 Subject: [PATCH 1740/2012] net: mscc: ocelot: export VCAP structures to include/soc/mscc The Felix driver will need to preinstall some VCAP filters for its tag_8021q implementation (outside of the tc-flower offload logic), so these need to be exported to the common includes. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_net.c | 1 + drivers/net/ethernet/mscc/ocelot_vcap.c | 2 + drivers/net/ethernet/mscc/ocelot_vcap.h | 293 +----------------------- include/soc/mscc/ocelot_vcap.h | 289 +++++++++++++++++++++++ 4 files changed, 294 insertions(+), 291 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 9553eb3e441c9..05142803a4638 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -9,6 +9,7 @@ */ #include +#include #include "ocelot.h" #include "ocelot_vcap.h" diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index d8c778ee6f1b8..489bf16362a7e 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -1150,6 +1150,7 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot, vcap_entry_set(ocelot, index, filter); return 0; } +EXPORT_SYMBOL(ocelot_vcap_filter_add); static void ocelot_vcap_block_remove_filter(struct ocelot *ocelot, struct ocelot_vcap_block *block, @@ -1204,6 +1205,7 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot, return 0; } +EXPORT_SYMBOL(ocelot_vcap_filter_del); int ocelot_vcap_filter_stats_update(struct ocelot *ocelot, struct ocelot_vcap_filter *filter) diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.h b/drivers/net/ethernet/mscc/ocelot_vcap.h index 82fd10581a149..cfc8b976d1deb 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.h +++ b/drivers/net/ethernet/mscc/ocelot_vcap.h @@ -7,300 +7,11 @@ #define _MSCC_OCELOT_VCAP_H_ #include "ocelot.h" -#include "ocelot_police.h" -#include -#include +#include +#include #define OCELOT_POLICER_DISCARD 0x17f -struct ocelot_ipv4 { - u8 addr[4]; -}; - -enum ocelot_vcap_bit { - OCELOT_VCAP_BIT_ANY, - OCELOT_VCAP_BIT_0, - OCELOT_VCAP_BIT_1 -}; - -struct ocelot_vcap_u8 { - u8 value[1]; - u8 mask[1]; -}; - -struct ocelot_vcap_u16 { - u8 value[2]; - u8 mask[2]; -}; - -struct ocelot_vcap_u24 { - u8 value[3]; - u8 mask[3]; -}; - -struct ocelot_vcap_u32 { - u8 value[4]; - u8 mask[4]; -}; - -struct ocelot_vcap_u40 { - u8 value[5]; - u8 mask[5]; -}; - -struct ocelot_vcap_u48 { - u8 value[6]; - u8 mask[6]; -}; - -struct ocelot_vcap_u64 { - u8 value[8]; - u8 mask[8]; -}; - -struct ocelot_vcap_u128 { - u8 value[16]; - u8 mask[16]; -}; - -struct ocelot_vcap_vid { - u16 value; - u16 mask; -}; - -struct ocelot_vcap_ipv4 { - struct ocelot_ipv4 value; - struct ocelot_ipv4 mask; -}; - -struct ocelot_vcap_udp_tcp { - u16 value; - u16 mask; -}; - -struct ocelot_vcap_port { - u8 value; - u8 mask; -}; - -enum ocelot_vcap_key_type { - OCELOT_VCAP_KEY_ANY, - OCELOT_VCAP_KEY_ETYPE, - OCELOT_VCAP_KEY_LLC, - OCELOT_VCAP_KEY_SNAP, - OCELOT_VCAP_KEY_ARP, - OCELOT_VCAP_KEY_IPV4, - OCELOT_VCAP_KEY_IPV6 -}; - -struct ocelot_vcap_key_vlan { - struct ocelot_vcap_vid vid; /* VLAN ID (12 bit) */ - struct ocelot_vcap_u8 pcp; /* PCP (3 bit) */ - enum ocelot_vcap_bit dei; /* DEI */ - enum ocelot_vcap_bit tagged; /* Tagged/untagged frame */ -}; - -struct ocelot_vcap_key_etype { - struct ocelot_vcap_u48 dmac; - struct ocelot_vcap_u48 smac; - struct ocelot_vcap_u16 etype; - struct ocelot_vcap_u16 data; /* MAC data */ -}; - -struct ocelot_vcap_key_llc { - struct ocelot_vcap_u48 dmac; - struct ocelot_vcap_u48 smac; - - /* LLC header: DSAP at byte 0, SSAP at byte 1, Control at byte 2 */ - struct ocelot_vcap_u32 llc; -}; - -struct ocelot_vcap_key_snap { - struct ocelot_vcap_u48 dmac; - struct ocelot_vcap_u48 smac; - - /* SNAP header: Organization Code at byte 0, Type at byte 3 */ - struct ocelot_vcap_u40 snap; -}; - -struct ocelot_vcap_key_arp { - struct ocelot_vcap_u48 smac; - enum ocelot_vcap_bit arp; /* Opcode ARP/RARP */ - enum ocelot_vcap_bit req; /* Opcode request/reply */ - enum ocelot_vcap_bit unknown; /* Opcode unknown */ - enum ocelot_vcap_bit smac_match; /* Sender MAC matches SMAC */ - enum ocelot_vcap_bit dmac_match; /* Target MAC matches DMAC */ - - /**< Protocol addr. length 4, hardware length 6 */ - enum ocelot_vcap_bit length; - - enum ocelot_vcap_bit ip; /* Protocol address type IP */ - enum ocelot_vcap_bit ethernet; /* Hardware address type Ethernet */ - struct ocelot_vcap_ipv4 sip; /* Sender IP address */ - struct ocelot_vcap_ipv4 dip; /* Target IP address */ -}; - -struct ocelot_vcap_key_ipv4 { - enum ocelot_vcap_bit ttl; /* TTL zero */ - enum ocelot_vcap_bit fragment; /* Fragment */ - enum ocelot_vcap_bit options; /* Header options */ - struct ocelot_vcap_u8 ds; - struct ocelot_vcap_u8 proto; /* Protocol */ - struct ocelot_vcap_ipv4 sip; /* Source IP address */ - struct ocelot_vcap_ipv4 dip; /* Destination IP address */ - struct ocelot_vcap_u48 data; /* Not UDP/TCP: IP data */ - struct ocelot_vcap_udp_tcp sport; /* UDP/TCP: Source port */ - struct ocelot_vcap_udp_tcp dport; /* UDP/TCP: Destination port */ - enum ocelot_vcap_bit tcp_fin; - enum ocelot_vcap_bit tcp_syn; - enum ocelot_vcap_bit tcp_rst; - enum ocelot_vcap_bit tcp_psh; - enum ocelot_vcap_bit tcp_ack; - enum ocelot_vcap_bit tcp_urg; - enum ocelot_vcap_bit sip_eq_dip; /* SIP equals DIP */ - enum ocelot_vcap_bit sport_eq_dport; /* SPORT equals DPORT */ - enum ocelot_vcap_bit seq_zero; /* TCP sequence number is zero */ -}; - -struct ocelot_vcap_key_ipv6 { - struct ocelot_vcap_u8 proto; /* IPv6 protocol */ - struct ocelot_vcap_u128 sip; /* IPv6 source (byte 0-7 ignored) */ - struct ocelot_vcap_u128 dip; /* IPv6 destination (byte 0-7 ignored) */ - enum ocelot_vcap_bit ttl; /* TTL zero */ - struct ocelot_vcap_u8 ds; - struct ocelot_vcap_u48 data; /* Not UDP/TCP: IP data */ - struct ocelot_vcap_udp_tcp sport; - struct ocelot_vcap_udp_tcp dport; - enum ocelot_vcap_bit tcp_fin; - enum ocelot_vcap_bit tcp_syn; - enum ocelot_vcap_bit tcp_rst; - enum ocelot_vcap_bit tcp_psh; - enum ocelot_vcap_bit tcp_ack; - enum ocelot_vcap_bit tcp_urg; - enum ocelot_vcap_bit sip_eq_dip; /* SIP equals DIP */ - enum ocelot_vcap_bit sport_eq_dport; /* SPORT equals DPORT */ - enum ocelot_vcap_bit seq_zero; /* TCP sequence number is zero */ -}; - -enum ocelot_mask_mode { - OCELOT_MASK_MODE_NONE, - OCELOT_MASK_MODE_PERMIT_DENY, - OCELOT_MASK_MODE_POLICY, - OCELOT_MASK_MODE_REDIRECT, -}; - -enum ocelot_es0_tag { - OCELOT_NO_ES0_TAG, - OCELOT_ES0_TAG, - OCELOT_FORCE_PORT_TAG, - OCELOT_FORCE_UNTAG, -}; - -enum ocelot_tag_tpid_sel { - OCELOT_TAG_TPID_SEL_8021Q, - OCELOT_TAG_TPID_SEL_8021AD, -}; - -struct ocelot_vcap_action { - union { - /* VCAP ES0 */ - struct { - enum ocelot_es0_tag push_outer_tag; - enum ocelot_es0_tag push_inner_tag; - enum ocelot_tag_tpid_sel tag_a_tpid_sel; - int tag_a_vid_sel; - int tag_a_pcp_sel; - u16 vid_a_val; - u8 pcp_a_val; - u8 dei_a_val; - enum ocelot_tag_tpid_sel tag_b_tpid_sel; - int tag_b_vid_sel; - int tag_b_pcp_sel; - u16 vid_b_val; - u8 pcp_b_val; - u8 dei_b_val; - }; - - /* VCAP IS1 */ - struct { - bool vid_replace_ena; - u16 vid; - bool vlan_pop_cnt_ena; - int vlan_pop_cnt; - bool pcp_dei_ena; - u8 pcp; - u8 dei; - bool qos_ena; - u8 qos_val; - u8 pag_override_mask; - u8 pag_val; - }; - - /* VCAP IS2 */ - struct { - bool cpu_copy_ena; - u8 cpu_qu_num; - enum ocelot_mask_mode mask_mode; - unsigned long port_mask; - bool police_ena; - struct ocelot_policer pol; - u32 pol_ix; - }; - }; -}; - -struct ocelot_vcap_stats { - u64 bytes; - u64 pkts; - u64 used; -}; - -enum ocelot_vcap_filter_type { - OCELOT_VCAP_FILTER_DUMMY, - OCELOT_VCAP_FILTER_PAG, - OCELOT_VCAP_FILTER_OFFLOAD, -}; - -struct ocelot_vcap_filter { - struct list_head list; - - enum ocelot_vcap_filter_type type; - int block_id; - int goto_target; - int lookup; - u8 pag; - u16 prio; - u32 id; - - struct ocelot_vcap_action action; - struct ocelot_vcap_stats stats; - /* For VCAP IS1 and IS2 */ - unsigned long ingress_port_mask; - /* For VCAP ES0 */ - struct ocelot_vcap_port ingress_port; - struct ocelot_vcap_port egress_port; - - enum ocelot_vcap_bit dmac_mc; - enum ocelot_vcap_bit dmac_bc; - struct ocelot_vcap_key_vlan vlan; - - enum ocelot_vcap_key_type key_type; - union { - /* OCELOT_VCAP_KEY_ANY: No specific fields */ - struct ocelot_vcap_key_etype etype; - struct ocelot_vcap_key_llc llc; - struct ocelot_vcap_key_snap snap; - struct ocelot_vcap_key_arp arp; - struct ocelot_vcap_key_ipv4 ipv4; - struct ocelot_vcap_key_ipv6 ipv6; - } key; -}; - -int ocelot_vcap_filter_add(struct ocelot *ocelot, - struct ocelot_vcap_filter *rule, - struct netlink_ext_ack *extack); -int ocelot_vcap_filter_del(struct ocelot *ocelot, - struct ocelot_vcap_filter *rule); int ocelot_vcap_filter_stats_update(struct ocelot *ocelot, struct ocelot_vcap_filter *rule); struct ocelot_vcap_filter * diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index 96300adf36481..7f1b82fba63c7 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -400,4 +400,293 @@ enum vcap_es0_action_field { VCAP_ES0_ACT_HIT_STICKY, }; +struct ocelot_ipv4 { + u8 addr[4]; +}; + +enum ocelot_vcap_bit { + OCELOT_VCAP_BIT_ANY, + OCELOT_VCAP_BIT_0, + OCELOT_VCAP_BIT_1 +}; + +struct ocelot_vcap_u8 { + u8 value[1]; + u8 mask[1]; +}; + +struct ocelot_vcap_u16 { + u8 value[2]; + u8 mask[2]; +}; + +struct ocelot_vcap_u24 { + u8 value[3]; + u8 mask[3]; +}; + +struct ocelot_vcap_u32 { + u8 value[4]; + u8 mask[4]; +}; + +struct ocelot_vcap_u40 { + u8 value[5]; + u8 mask[5]; +}; + +struct ocelot_vcap_u48 { + u8 value[6]; + u8 mask[6]; +}; + +struct ocelot_vcap_u64 { + u8 value[8]; + u8 mask[8]; +}; + +struct ocelot_vcap_u128 { + u8 value[16]; + u8 mask[16]; +}; + +struct ocelot_vcap_vid { + u16 value; + u16 mask; +}; + +struct ocelot_vcap_ipv4 { + struct ocelot_ipv4 value; + struct ocelot_ipv4 mask; +}; + +struct ocelot_vcap_udp_tcp { + u16 value; + u16 mask; +}; + +struct ocelot_vcap_port { + u8 value; + u8 mask; +}; + +enum ocelot_vcap_key_type { + OCELOT_VCAP_KEY_ANY, + OCELOT_VCAP_KEY_ETYPE, + OCELOT_VCAP_KEY_LLC, + OCELOT_VCAP_KEY_SNAP, + OCELOT_VCAP_KEY_ARP, + OCELOT_VCAP_KEY_IPV4, + OCELOT_VCAP_KEY_IPV6 +}; + +struct ocelot_vcap_key_vlan { + struct ocelot_vcap_vid vid; /* VLAN ID (12 bit) */ + struct ocelot_vcap_u8 pcp; /* PCP (3 bit) */ + enum ocelot_vcap_bit dei; /* DEI */ + enum ocelot_vcap_bit tagged; /* Tagged/untagged frame */ +}; + +struct ocelot_vcap_key_etype { + struct ocelot_vcap_u48 dmac; + struct ocelot_vcap_u48 smac; + struct ocelot_vcap_u16 etype; + struct ocelot_vcap_u16 data; /* MAC data */ +}; + +struct ocelot_vcap_key_llc { + struct ocelot_vcap_u48 dmac; + struct ocelot_vcap_u48 smac; + + /* LLC header: DSAP at byte 0, SSAP at byte 1, Control at byte 2 */ + struct ocelot_vcap_u32 llc; +}; + +struct ocelot_vcap_key_snap { + struct ocelot_vcap_u48 dmac; + struct ocelot_vcap_u48 smac; + + /* SNAP header: Organization Code at byte 0, Type at byte 3 */ + struct ocelot_vcap_u40 snap; +}; + +struct ocelot_vcap_key_arp { + struct ocelot_vcap_u48 smac; + enum ocelot_vcap_bit arp; /* Opcode ARP/RARP */ + enum ocelot_vcap_bit req; /* Opcode request/reply */ + enum ocelot_vcap_bit unknown; /* Opcode unknown */ + enum ocelot_vcap_bit smac_match; /* Sender MAC matches SMAC */ + enum ocelot_vcap_bit dmac_match; /* Target MAC matches DMAC */ + + /**< Protocol addr. length 4, hardware length 6 */ + enum ocelot_vcap_bit length; + + enum ocelot_vcap_bit ip; /* Protocol address type IP */ + enum ocelot_vcap_bit ethernet; /* Hardware address type Ethernet */ + struct ocelot_vcap_ipv4 sip; /* Sender IP address */ + struct ocelot_vcap_ipv4 dip; /* Target IP address */ +}; + +struct ocelot_vcap_key_ipv4 { + enum ocelot_vcap_bit ttl; /* TTL zero */ + enum ocelot_vcap_bit fragment; /* Fragment */ + enum ocelot_vcap_bit options; /* Header options */ + struct ocelot_vcap_u8 ds; + struct ocelot_vcap_u8 proto; /* Protocol */ + struct ocelot_vcap_ipv4 sip; /* Source IP address */ + struct ocelot_vcap_ipv4 dip; /* Destination IP address */ + struct ocelot_vcap_u48 data; /* Not UDP/TCP: IP data */ + struct ocelot_vcap_udp_tcp sport; /* UDP/TCP: Source port */ + struct ocelot_vcap_udp_tcp dport; /* UDP/TCP: Destination port */ + enum ocelot_vcap_bit tcp_fin; + enum ocelot_vcap_bit tcp_syn; + enum ocelot_vcap_bit tcp_rst; + enum ocelot_vcap_bit tcp_psh; + enum ocelot_vcap_bit tcp_ack; + enum ocelot_vcap_bit tcp_urg; + enum ocelot_vcap_bit sip_eq_dip; /* SIP equals DIP */ + enum ocelot_vcap_bit sport_eq_dport; /* SPORT equals DPORT */ + enum ocelot_vcap_bit seq_zero; /* TCP sequence number is zero */ +}; + +struct ocelot_vcap_key_ipv6 { + struct ocelot_vcap_u8 proto; /* IPv6 protocol */ + struct ocelot_vcap_u128 sip; /* IPv6 source (byte 0-7 ignored) */ + struct ocelot_vcap_u128 dip; /* IPv6 destination (byte 0-7 ignored) */ + enum ocelot_vcap_bit ttl; /* TTL zero */ + struct ocelot_vcap_u8 ds; + struct ocelot_vcap_u48 data; /* Not UDP/TCP: IP data */ + struct ocelot_vcap_udp_tcp sport; + struct ocelot_vcap_udp_tcp dport; + enum ocelot_vcap_bit tcp_fin; + enum ocelot_vcap_bit tcp_syn; + enum ocelot_vcap_bit tcp_rst; + enum ocelot_vcap_bit tcp_psh; + enum ocelot_vcap_bit tcp_ack; + enum ocelot_vcap_bit tcp_urg; + enum ocelot_vcap_bit sip_eq_dip; /* SIP equals DIP */ + enum ocelot_vcap_bit sport_eq_dport; /* SPORT equals DPORT */ + enum ocelot_vcap_bit seq_zero; /* TCP sequence number is zero */ +}; + +enum ocelot_mask_mode { + OCELOT_MASK_MODE_NONE, + OCELOT_MASK_MODE_PERMIT_DENY, + OCELOT_MASK_MODE_POLICY, + OCELOT_MASK_MODE_REDIRECT, +}; + +enum ocelot_es0_tag { + OCELOT_NO_ES0_TAG, + OCELOT_ES0_TAG, + OCELOT_FORCE_PORT_TAG, + OCELOT_FORCE_UNTAG, +}; + +enum ocelot_tag_tpid_sel { + OCELOT_TAG_TPID_SEL_8021Q, + OCELOT_TAG_TPID_SEL_8021AD, +}; + +struct ocelot_vcap_action { + union { + /* VCAP ES0 */ + struct { + enum ocelot_es0_tag push_outer_tag; + enum ocelot_es0_tag push_inner_tag; + enum ocelot_tag_tpid_sel tag_a_tpid_sel; + int tag_a_vid_sel; + int tag_a_pcp_sel; + u16 vid_a_val; + u8 pcp_a_val; + u8 dei_a_val; + enum ocelot_tag_tpid_sel tag_b_tpid_sel; + int tag_b_vid_sel; + int tag_b_pcp_sel; + u16 vid_b_val; + u8 pcp_b_val; + u8 dei_b_val; + }; + + /* VCAP IS1 */ + struct { + bool vid_replace_ena; + u16 vid; + bool vlan_pop_cnt_ena; + int vlan_pop_cnt; + bool pcp_dei_ena; + u8 pcp; + u8 dei; + bool qos_ena; + u8 qos_val; + u8 pag_override_mask; + u8 pag_val; + }; + + /* VCAP IS2 */ + struct { + bool cpu_copy_ena; + u8 cpu_qu_num; + enum ocelot_mask_mode mask_mode; + unsigned long port_mask; + bool police_ena; + struct ocelot_policer pol; + u32 pol_ix; + }; + }; +}; + +struct ocelot_vcap_stats { + u64 bytes; + u64 pkts; + u64 used; +}; + +enum ocelot_vcap_filter_type { + OCELOT_VCAP_FILTER_DUMMY, + OCELOT_VCAP_FILTER_PAG, + OCELOT_VCAP_FILTER_OFFLOAD, +}; + +struct ocelot_vcap_filter { + struct list_head list; + + enum ocelot_vcap_filter_type type; + int block_id; + int goto_target; + int lookup; + u8 pag; + u16 prio; + u32 id; + + struct ocelot_vcap_action action; + struct ocelot_vcap_stats stats; + /* For VCAP IS1 and IS2 */ + unsigned long ingress_port_mask; + /* For VCAP ES0 */ + struct ocelot_vcap_port ingress_port; + struct ocelot_vcap_port egress_port; + + enum ocelot_vcap_bit dmac_mc; + enum ocelot_vcap_bit dmac_bc; + struct ocelot_vcap_key_vlan vlan; + + enum ocelot_vcap_key_type key_type; + union { + /* OCELOT_VCAP_KEY_ANY: No specific fields */ + struct ocelot_vcap_key_etype etype; + struct ocelot_vcap_key_llc llc; + struct ocelot_vcap_key_snap snap; + struct ocelot_vcap_key_arp arp; + struct ocelot_vcap_key_ipv4 ipv4; + struct ocelot_vcap_key_ipv6 ipv6; + } key; +}; + +int ocelot_vcap_filter_add(struct ocelot *ocelot, + struct ocelot_vcap_filter *rule, + struct netlink_ext_ack *extack); +int ocelot_vcap_filter_del(struct ocelot *ocelot, + struct ocelot_vcap_filter *rule); + #endif /* _OCELOT_VCAP_H_ */ -- GitLab From 50c6cc5b9283efdbf72162dee467bd68c7167807 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 29 Jan 2021 03:00:01 +0200 Subject: [PATCH 1741/2012] net: mscc: ocelot: store a namespaced VCAP filter ID We will be adding some private VCAP filters that should not interfere in any way with the filters added using tc-flower. So we need to allocate some IDs which will not be used by tc. Currently ocelot uses an u32 id derived from the flow cookie, which in itself is an unsigned long. This is a problem in itself, since on 64 bit systems, sizeof(unsigned long)=8, so the driver is already truncating these. Create a struct ocelot_vcap_id which contains the full unsigned long cookie from tc, as well as a boolean that is supposed to namespace the filters added by tc with the ones that aren't. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_flower.c | 7 ++++--- drivers/net/ethernet/mscc/ocelot_vcap.c | 16 ++++++++++++---- drivers/net/ethernet/mscc/ocelot_vcap.h | 3 ++- include/soc/mscc/ocelot_vcap.h | 7 ++++++- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 729495a1a77ee..c3ac026f6aea2 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -622,7 +622,8 @@ static int ocelot_flower_parse(struct ocelot *ocelot, int port, bool ingress, int ret; filter->prio = f->common.prio; - filter->id = f->cookie; + filter->id.cookie = f->cookie; + filter->id.tc_offload = true; ret = ocelot_flower_parse_action(ocelot, port, ingress, f, filter); if (ret) @@ -717,7 +718,7 @@ int ocelot_cls_flower_destroy(struct ocelot *ocelot, int port, block = &ocelot->block[block_id]; - filter = ocelot_vcap_block_find_filter_by_id(block, f->cookie); + filter = ocelot_vcap_block_find_filter_by_id(block, f->cookie, true); if (!filter) return 0; @@ -741,7 +742,7 @@ int ocelot_cls_flower_stats(struct ocelot *ocelot, int port, block = &ocelot->block[block_id]; - filter = ocelot_vcap_block_find_filter_by_id(block, f->cookie); + filter = ocelot_vcap_block_find_filter_by_id(block, f->cookie, true); if (!filter || filter->type == OCELOT_VCAP_FILTER_DUMMY) return 0; diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 489bf16362a7e..b82fd4103a685 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -959,6 +959,12 @@ static void ocelot_vcap_filter_add_to_block(struct ocelot *ocelot, list_add(&filter->list, pos->prev); } +static bool ocelot_vcap_filter_equal(const struct ocelot_vcap_filter *a, + const struct ocelot_vcap_filter *b) +{ + return !memcmp(&a->id, &b->id, sizeof(struct ocelot_vcap_id)); +} + static int ocelot_vcap_block_get_filter_index(struct ocelot_vcap_block *block, struct ocelot_vcap_filter *filter) { @@ -966,7 +972,7 @@ static int ocelot_vcap_block_get_filter_index(struct ocelot_vcap_block *block, int index = 0; list_for_each_entry(tmp, &block->rules, list) { - if (filter->id == tmp->id) + if (ocelot_vcap_filter_equal(filter, tmp)) return index; index++; } @@ -991,12 +997,14 @@ ocelot_vcap_block_find_filter_by_index(struct ocelot_vcap_block *block, } struct ocelot_vcap_filter * -ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int id) +ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int cookie, + bool tc_offload) { struct ocelot_vcap_filter *filter; list_for_each_entry(filter, &block->rules, list) - if (filter->id == id) + if (filter->id.tc_offload == tc_offload && + filter->id.cookie == cookie) return filter; return NULL; @@ -1161,7 +1169,7 @@ static void ocelot_vcap_block_remove_filter(struct ocelot *ocelot, list_for_each_safe(pos, q, &block->rules) { tmp = list_entry(pos, struct ocelot_vcap_filter, list); - if (tmp->id == filter->id) { + if (ocelot_vcap_filter_equal(filter, tmp)) { if (tmp->block_id == VCAP_IS2 && tmp->action.police_ena) ocelot_vcap_policer_del(ocelot, block, diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.h b/drivers/net/ethernet/mscc/ocelot_vcap.h index cfc8b976d1deb..3b0c7916056e9 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.h +++ b/drivers/net/ethernet/mscc/ocelot_vcap.h @@ -15,7 +15,8 @@ int ocelot_vcap_filter_stats_update(struct ocelot *ocelot, struct ocelot_vcap_filter *rule); struct ocelot_vcap_filter * -ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int id); +ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int id, + bool tc_offload); void ocelot_detect_vcap_constants(struct ocelot *ocelot); int ocelot_vcap_init(struct ocelot *ocelot); diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index 7f1b82fba63c7..76e01c927e17a 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -648,6 +648,11 @@ enum ocelot_vcap_filter_type { OCELOT_VCAP_FILTER_OFFLOAD, }; +struct ocelot_vcap_id { + unsigned long cookie; + bool tc_offload; +}; + struct ocelot_vcap_filter { struct list_head list; @@ -657,7 +662,7 @@ struct ocelot_vcap_filter { int lookup; u8 pag; u16 prio; - u32 id; + struct ocelot_vcap_id id; struct ocelot_vcap_action action; struct ocelot_vcap_stats stats; -- GitLab From 9b521250bff4dd04592651bb8fab07ecfcd2fb64 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 29 Jan 2021 03:00:02 +0200 Subject: [PATCH 1742/2012] net: mscc: ocelot: reapply bridge forwarding mask on bonding join/leave Applying the bridge forwarding mask currently is done only on the STP state changes for any port. But it depends on both STP state changes, and bonding interface state changes. Export the bit that recalculates the forwarding mask so that it could be reused, and call it when a port starts and stops offloading a bonding interface. Now that the logic is split into a separate function, we can rename "p" into "port", since the "port" variable was already taken in ocelot_bridge_stp_state_set. Also, we can rename "i" into "lag", to make it more clear what is it that we're iterating through. Signed-off-by: Vladimir Oltean Reviewed-by: Alexandre Belloni Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 63 +++++++++++++++++------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 5b2c0cea49eaf..7352f58f9bc21 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -889,10 +889,42 @@ int ocelot_get_ts_info(struct ocelot *ocelot, int port, } EXPORT_SYMBOL(ocelot_get_ts_info); +static void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot) +{ + int port; + + /* Apply FWD mask. The loop is needed to add/remove the current port as + * a source for the other ports. + */ + for (port = 0; port < ocelot->num_phys_ports; port++) { + if (ocelot->bridge_fwd_mask & BIT(port)) { + unsigned long mask = ocelot->bridge_fwd_mask & ~BIT(port); + int lag; + + for (lag = 0; lag < ocelot->num_phys_ports; lag++) { + unsigned long bond_mask = ocelot->lags[lag]; + + if (!bond_mask) + continue; + + if (bond_mask & BIT(port)) { + mask &= ~bond_mask; + break; + } + } + + ocelot_write_rix(ocelot, mask, + ANA_PGID_PGID, PGID_SRC + port); + } else { + ocelot_write_rix(ocelot, 0, + ANA_PGID_PGID, PGID_SRC + port); + } + } +} + void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state) { u32 port_cfg; - int p, i; if (!(BIT(port) & ocelot->bridge_mask)) return; @@ -915,32 +947,7 @@ void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state) ocelot_write_gix(ocelot, port_cfg, ANA_PORT_PORT_CFG, port); - /* Apply FWD mask. The loop is needed to add/remove the current port as - * a source for the other ports. - */ - for (p = 0; p < ocelot->num_phys_ports; p++) { - if (ocelot->bridge_fwd_mask & BIT(p)) { - unsigned long mask = ocelot->bridge_fwd_mask & ~BIT(p); - - for (i = 0; i < ocelot->num_phys_ports; i++) { - unsigned long bond_mask = ocelot->lags[i]; - - if (!bond_mask) - continue; - - if (bond_mask & BIT(p)) { - mask &= ~bond_mask; - break; - } - } - - ocelot_write_rix(ocelot, mask, - ANA_PGID_PGID, PGID_SRC + p); - } else { - ocelot_write_rix(ocelot, 0, - ANA_PGID_PGID, PGID_SRC + p); - } - } + ocelot_apply_bridge_fwd_mask(ocelot); } EXPORT_SYMBOL(ocelot_bridge_stp_state_set); @@ -1297,6 +1304,7 @@ int ocelot_port_lag_join(struct ocelot *ocelot, int port, } ocelot_setup_lag(ocelot, lag); + ocelot_apply_bridge_fwd_mask(ocelot); ocelot_set_aggr_pgids(ocelot); return 0; @@ -1330,6 +1338,7 @@ void ocelot_port_lag_leave(struct ocelot *ocelot, int port, ocelot_write_gix(ocelot, port_cfg | ANA_PORT_PORT_CFG_PORTID_VAL(port), ANA_PORT_PORT_CFG, port); + ocelot_apply_bridge_fwd_mask(ocelot); ocelot_set_aggr_pgids(ocelot); } EXPORT_SYMBOL(ocelot_port_lag_leave); -- GitLab From cacea62fcdda5656cb5b8104e73a00e043b61730 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 29 Jan 2021 03:00:03 +0200 Subject: [PATCH 1743/2012] net: mscc: ocelot: don't use NPI tag prefix for the CPU port module Context: Ocelot switches put the injection/extraction frame header in front of the Ethernet header. When used in NPI mode, a DSA master would see junk instead of the destination MAC address, and it would most likely drop the packets. So the Ocelot frame header can have an optional prefix, which is just "ff:ff:ff:ff:ff:fe > ff:ff:ff:ff:ff:ff" padding put before the actual tag (still before the real Ethernet header) such that the DSA master thinks it's looking at a broadcast frame with a strange EtherType. Unfortunately, a lesson learned in commit 69df578c5f4b ("net: mscc: ocelot: eliminate confusion between CPU and NPI port") seems to have been forgotten in the meanwhile. The CPU port module and the NPI port have independent settings for the length of the tag prefix. However, the driver is using the same variable to program both of them. There is no reason really to use any tag prefix with the CPU port module, since that is not connected to any Ethernet port. So this patch makes the inj_prefix and xtr_prefix variables apply only to the NPI port (which the switchdev ocelot_vsc7514 driver does not use). Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 8 ++++---- drivers/net/ethernet/mscc/ocelot.c | 12 ++++++------ drivers/net/ethernet/mscc/ocelot_vsc7514.c | 2 -- include/soc/mscc/ocelot.h | 4 ++-- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 767cbdccdb3e1..054e57dd43839 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -425,8 +425,8 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports) ocelot->num_mact_rows = felix->info->num_mact_rows; ocelot->vcap = felix->info->vcap; ocelot->ops = felix->info->ops; - ocelot->inj_prefix = OCELOT_TAG_PREFIX_SHORT; - ocelot->xtr_prefix = OCELOT_TAG_PREFIX_SHORT; + ocelot->npi_inj_prefix = OCELOT_TAG_PREFIX_SHORT; + ocelot->npi_xtr_prefix = OCELOT_TAG_PREFIX_SHORT; ocelot->devlink = felix->ds->devlink; port_phy_modes = kcalloc(num_phys_ports, sizeof(phy_interface_t), @@ -541,9 +541,9 @@ static void felix_npi_port_init(struct ocelot *ocelot, int port) /* NPI port Injection/Extraction configuration */ ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_XTR_HDR, - ocelot->xtr_prefix); + ocelot->npi_xtr_prefix); ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_INJ_HDR, - ocelot->inj_prefix); + ocelot->npi_inj_prefix); /* Disable transmission of pause frames */ ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0); diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 7352f58f9bc21..714165c2f85ab 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1359,9 +1359,9 @@ void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu) if (port == ocelot->npi) { maxlen += OCELOT_TAG_LEN; - if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_SHORT) + if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_SHORT) maxlen += OCELOT_SHORT_PREFIX_LEN; - else if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_LONG) + else if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_LONG) maxlen += OCELOT_LONG_PREFIX_LEN; } @@ -1391,9 +1391,9 @@ int ocelot_get_max_mtu(struct ocelot *ocelot, int port) if (port == ocelot->npi) { max_mtu -= OCELOT_TAG_LEN; - if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_SHORT) + if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_SHORT) max_mtu -= OCELOT_SHORT_PREFIX_LEN; - else if (ocelot->inj_prefix == OCELOT_TAG_PREFIX_LONG) + else if (ocelot->npi_inj_prefix == OCELOT_TAG_PREFIX_LONG) max_mtu -= OCELOT_LONG_PREFIX_LEN; } @@ -1478,9 +1478,9 @@ static void ocelot_cpu_port_init(struct ocelot *ocelot) ocelot_fields_write(ocelot, cpu, QSYS_SWITCH_PORT_MODE_PORT_ENA, 1); /* CPU port Injection/Extraction configuration */ ocelot_fields_write(ocelot, cpu, SYS_PORT_MODE_INCL_XTR_HDR, - ocelot->xtr_prefix); + OCELOT_TAG_PREFIX_NONE); ocelot_fields_write(ocelot, cpu, SYS_PORT_MODE_INCL_INJ_HDR, - ocelot->inj_prefix); + OCELOT_TAG_PREFIX_NONE); /* Configure the CPU port to be VLAN aware */ ocelot_write_gix(ocelot, ANA_PORT_VLAN_CFG_VLAN_VID(0) | diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 30a38df08a219..407244fe5b17b 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -1347,8 +1347,6 @@ static int mscc_ocelot_probe(struct platform_device *pdev) ocelot->num_flooding_pgids = 1; ocelot->vcap = vsc7514_vcap_props; - ocelot->inj_prefix = OCELOT_TAG_PREFIX_NONE; - ocelot->xtr_prefix = OCELOT_TAG_PREFIX_NONE; ocelot->npi = -1; err = ocelot_init(ocelot); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index cdc33fa05660b..93c22627dedd3 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -651,8 +651,8 @@ struct ocelot { int npi; - enum ocelot_tag_prefix inj_prefix; - enum ocelot_tag_prefix xtr_prefix; + enum ocelot_tag_prefix npi_inj_prefix; + enum ocelot_tag_prefix npi_xtr_prefix; u32 *lags; -- GitLab From 886f8e26f5397827f031bce48f3138040d88ffb3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 29 Jan 2021 03:00:04 +0200 Subject: [PATCH 1744/2012] net: dsa: document the existing switch tree notifiers and add a new one The existence of dsa_broadcast has generated some confusion in the past: https://www.mail-archive.com/netdev@vger.kernel.org/msg365042.html So let's document the existing dsa_port_notify and dsa_broadcast functions and explain when each of them should be used. Also, in fact, the in-between function has always been there but was lacking a name, and is the main reason for this patch: dsa_tree_notify. Refactor dsa_broadcast to use it. This patch also moves dsa_broadcast (a top-level function) to dsa2.c, where it really belonged in the first place, but had no companion so it stood with dsa_port_notify. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- net/dsa/dsa2.c | 43 +++++++++++++++++++++++++++++++++++++++++++ net/dsa/dsa_priv.h | 2 ++ net/dsa/port.c | 36 +++++++++++++----------------------- 3 files changed, 58 insertions(+), 23 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index cc13549120e5b..2953d0c1c7bc4 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -21,6 +21,49 @@ static DEFINE_MUTEX(dsa2_mutex); LIST_HEAD(dsa_tree_list); +/** + * dsa_tree_notify - Execute code for all switches in a DSA switch tree. + * @dst: collection of struct dsa_switch devices to notify. + * @e: event, must be of type DSA_NOTIFIER_* + * @v: event-specific value. + * + * Given a struct dsa_switch_tree, this can be used to run a function once for + * each member DSA switch. The other alternative of traversing the tree is only + * through its ports list, which does not uniquely list the switches. + */ +int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v) +{ + struct raw_notifier_head *nh = &dst->nh; + int err; + + err = raw_notifier_call_chain(nh, e, v); + + return notifier_to_errno(err); +} + +/** + * dsa_broadcast - Notify all DSA trees in the system. + * @e: event, must be of type DSA_NOTIFIER_* + * @v: event-specific value. + * + * Can be used to notify the switching fabric of events such as cross-chip + * bridging between disjoint trees (such as islands of tagger-compatible + * switches bridged by an incompatible middle switch). + */ +int dsa_broadcast(unsigned long e, void *v) +{ + struct dsa_switch_tree *dst; + int err = 0; + + list_for_each_entry(dst, &dsa_tree_list, list) { + err = dsa_tree_notify(dst, e, v); + if (err) + break; + } + + return err; +} + /** * dsa_lag_map() - Map LAG netdev to a linear LAG ID * @dst: Tree in which to record the mapping. diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 2ce46bb877033..3cc1e6d76e3ac 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -283,6 +283,8 @@ void dsa_switch_unregister_notifier(struct dsa_switch *ds); /* dsa2.c */ void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag); void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag); +int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v); +int dsa_broadcast(unsigned long e, void *v); extern struct list_head dsa_tree_list; diff --git a/net/dsa/port.c b/net/dsa/port.c index f5b0f72ee7cd2..a8886cf401602 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -13,31 +13,21 @@ #include "dsa_priv.h" -static int dsa_broadcast(unsigned long e, void *v) -{ - struct dsa_switch_tree *dst; - int err = 0; - - list_for_each_entry(dst, &dsa_tree_list, list) { - struct raw_notifier_head *nh = &dst->nh; - - err = raw_notifier_call_chain(nh, e, v); - err = notifier_to_errno(err); - if (err) - break; - } - - return err; -} - +/** + * dsa_port_notify - Notify the switching fabric of changes to a port + * @dp: port on which change occurred + * @e: event, must be of type DSA_NOTIFIER_* + * @v: event-specific value. + * + * Notify all switches in the DSA tree that this port's switch belongs to, + * including this switch itself, of an event. Allows the other switches to + * reconfigure themselves for cross-chip operations. Can also be used to + * reconfigure ports without net_devices (CPU ports, DSA links) whenever + * a user port's state changes. + */ static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v) { - struct raw_notifier_head *nh = &dp->ds->dst->nh; - int err; - - err = raw_notifier_call_chain(nh, e, v); - - return notifier_to_errno(err); + return dsa_tree_notify(dp->ds->dst, e, v); } int dsa_port_set_state(struct dsa_port *dp, u8 state) -- GitLab From 357f203bb3b529fa7471494c7ad6a7a54d070353 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 29 Jan 2021 03:00:05 +0200 Subject: [PATCH 1745/2012] net: dsa: keep a copy of the tagging protocol in the DSA switch tree Cascading DSA switches can be done multiple ways. There is the brute force approach / tag stacking, where one upstream switch, located between leaf switches and the host Ethernet controller, will just happily transport the DSA header of those leaf switches as payload. For this kind of setups, DSA works without any special kind of treatment compared to a single switch - they just aren't aware of each other. Then there's the approach where the upstream switch understands the tags it transports from its leaves below, as it doesn't push a tag of its own, but it routes based on the source port & switch id information present in that tag (as opposed to DMAC & VID) and it strips the tag when egressing a front-facing port. Currently only Marvell implements the latter, and Marvell DSA trees contain only Marvell switches. So it is safe to say that DSA trees already have a single tag protocol shared by all switches, and in fact this is what makes the switches able to understand each other. This fact is also implied by the fact that currently, the tagging protocol is reported as part of a sysfs installed on the DSA master and not per port, so it must be the same for all the ports connected to that DSA master regardless of the switch that they belong to. It's time to make this official and enforce it (yes, this also means we won't have any "switch understands tag to some extent but is not able to speak it" hardware oddities that we'll support in the future). This is needed due to the imminent introduction of the dsa_switch_ops:: change_tag_protocol driver API. When that is introduced, we'll have to notify switches of the tagging protocol that they're configured to use. Currently the tag_ops structure pointer is held only for CPU ports. But there are switches which don't have CPU ports and nonetheless still need to be configured. These would be Marvell leaf switches whose upstream port is just a DSA link. How do we inform these of their tagging protocol setup/deletion? One answer to the above would be: iterate through the DSA switch tree's ports once, list the CPU ports, get their tag_ops, then iterate again now that we have it, and notify everybody of that tag_ops. But what to do if conflicts appear between one cpu_dp->tag_ops and another? There's no escaping the fact that conflict resolution needs to be done, so we can be upfront about it. Ease our work and just keep the master copy of the tag_ops inside the struct dsa_switch_tree. Reference counting is now moved to be per-tree too, instead of per-CPU port. There are many places in the data path that access master->dsa_ptr->tag_ops and we would introduce unnecessary performance penalty going through yet another indirection, so keep those right where they are. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 7 ++++++- net/dsa/dsa2.c | 36 ++++++++++++++++++++++++------------ 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 2f5435d3d1db5..b8af1d6c879a7 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -140,6 +140,9 @@ struct dsa_switch_tree { /* Has this tree been applied to the hardware? */ bool setup; + /* Tagging protocol operations */ + const struct dsa_device_ops *tag_ops; + /* * Configuration data for the platform device that owns * this dsa switch tree instance. @@ -225,7 +228,9 @@ struct dsa_port { struct net_device *slave; }; - /* CPU port tagging operations used by master or slave devices */ + /* Copy of the tagging protocol operations, for quicker access + * in the data path. Valid only for the CPU ports. + */ const struct dsa_device_ops *tag_ops; /* Copies for faster access in master receive hot path */ diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 2953d0c1c7bc4..63035a898ca8b 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -179,6 +179,8 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index) static void dsa_tree_free(struct dsa_switch_tree *dst) { + if (dst->tag_ops) + dsa_tag_driver_put(dst->tag_ops); list_del(&dst->list); kfree(dst); } @@ -467,7 +469,6 @@ static void dsa_port_teardown(struct dsa_port *dp) break; case DSA_PORT_TYPE_CPU: dsa_port_disable(dp); - dsa_tag_driver_put(dp->tag_ops); dsa_port_link_unregister_of(dp); break; case DSA_PORT_TYPE_DSA: @@ -1011,24 +1012,35 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) { struct dsa_switch *ds = dp->ds; struct dsa_switch_tree *dst = ds->dst; - const struct dsa_device_ops *tag_ops; enum dsa_tag_protocol tag_protocol; tag_protocol = dsa_get_tag_protocol(dp, master); - tag_ops = dsa_tag_driver_get(tag_protocol); - if (IS_ERR(tag_ops)) { - if (PTR_ERR(tag_ops) == -ENOPROTOOPT) - return -EPROBE_DEFER; - dev_warn(ds->dev, "No tagger for this switch\n"); - dp->master = NULL; - return PTR_ERR(tag_ops); + if (dst->tag_ops) { + if (dst->tag_ops->proto != tag_protocol) { + dev_err(ds->dev, + "A DSA switch tree can have only one tagging protocol\n"); + return -EINVAL; + } + /* In the case of multiple CPU ports per switch, the tagging + * protocol is still reference-counted only per switch tree, so + * nothing to do here. + */ + } else { + dst->tag_ops = dsa_tag_driver_get(tag_protocol); + if (IS_ERR(dst->tag_ops)) { + if (PTR_ERR(dst->tag_ops) == -ENOPROTOOPT) + return -EPROBE_DEFER; + dev_warn(ds->dev, "No tagger for this switch\n"); + dp->master = NULL; + return PTR_ERR(dst->tag_ops); + } } dp->master = master; dp->type = DSA_PORT_TYPE_CPU; - dp->filter = tag_ops->filter; - dp->rcv = tag_ops->rcv; - dp->tag_ops = tag_ops; + dp->filter = dst->tag_ops->filter; + dp->rcv = dst->tag_ops->rcv; + dp->tag_ops = dst->tag_ops; dp->dst = dst; return 0; -- GitLab From 53da0ebaad102626f56495e0967a614f89a2acc8 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 29 Jan 2021 03:00:06 +0200 Subject: [PATCH 1746/2012] net: dsa: allow changing the tag protocol via the "tagging" device attribute Currently DSA exposes the following sysfs: $ cat /sys/class/net/eno2/dsa/tagging ocelot which is a read-only device attribute, introduced in the kernel as commit 98cdb4807123 ("net: dsa: Expose tagging protocol to user-space"), and used by libpcap since its commit 993db3800d7d ("Add support for DSA link-layer types"). It would be nice if we could extend this device attribute by making it writable: $ echo ocelot-8021q > /sys/class/net/eno2/dsa/tagging This is useful with DSA switches that can make use of more than one tagging protocol. It may be useful in dsa_loop in the future too, to perform offline testing of various taggers, or for changing between dsa and edsa on Marvell switches, if that is desirable. In terms of implementation, drivers can support this feature by implementing .change_tag_protocol, which should always leave the switch in a consistent state: either with the new protocol if things went well, or with the old one if something failed. Teardown of the old protocol, if necessary, must be handled by the driver. Some things remain as before: - The .get_tag_protocol is currently only called at probe time, to load the initial tagging protocol driver. Nonetheless, new drivers should report the tagging protocol in current use now. - The driver should manage by itself the initial setup of tagging protocol, no later than the .setup() method, as well as destroying resources used by the last tagger in use, no earlier than the .teardown() method. For multi-switch DSA trees, error handling is a bit more complicated, since e.g. the 5th out of 7 switches may fail to change the tag protocol. When that happens, a revert to the original tag protocol is attempted, but that may fail too, leaving the tree in an inconsistent state despite each individual switch implementing .change_tag_protocol transactionally. Since the intersection between drivers that implement .change_tag_protocol and drivers that support D in DSA is currently the empty set, the possibility for this error to happen is ignored for now. Testing: $ insmod mscc_felix.ko [ 79.549784] mscc_felix 0000:00:00.5: Adding to iommu group 14 [ 79.565712] mscc_felix 0000:00:00.5: Failed to register DSA switch: -517 $ insmod tag_ocelot.ko $ rmmod mscc_felix.ko $ insmod mscc_felix.ko [ 97.261724] libphy: VSC9959 internal MDIO bus: probed [ 97.267363] mscc_felix 0000:00:00.5: Found PCS at internal MDIO address 0 [ 97.274998] mscc_felix 0000:00:00.5: Found PCS at internal MDIO address 1 [ 97.282561] mscc_felix 0000:00:00.5: Found PCS at internal MDIO address 2 [ 97.289700] mscc_felix 0000:00:00.5: Found PCS at internal MDIO address 3 [ 97.599163] mscc_felix 0000:00:00.5 swp0 (uninitialized): PHY [0000:00:00.3:10] driver [Microsemi GE VSC8514 SyncE] (irq=POLL) [ 97.862034] mscc_felix 0000:00:00.5 swp1 (uninitialized): PHY [0000:00:00.3:11] driver [Microsemi GE VSC8514 SyncE] (irq=POLL) [ 97.950731] mscc_felix 0000:00:00.5 swp0: configuring for inband/qsgmii link mode [ 97.964278] 8021q: adding VLAN 0 to HW filter on device swp0 [ 98.146161] mscc_felix 0000:00:00.5 swp2 (uninitialized): PHY [0000:00:00.3:12] driver [Microsemi GE VSC8514 SyncE] (irq=POLL) [ 98.238649] mscc_felix 0000:00:00.5 swp1: configuring for inband/qsgmii link mode [ 98.251845] 8021q: adding VLAN 0 to HW filter on device swp1 [ 98.433916] mscc_felix 0000:00:00.5 swp3 (uninitialized): PHY [0000:00:00.3:13] driver [Microsemi GE VSC8514 SyncE] (irq=POLL) [ 98.485542] mscc_felix 0000:00:00.5: configuring for fixed/internal link mode [ 98.503584] mscc_felix 0000:00:00.5: Link is Up - 2.5Gbps/Full - flow control rx/tx [ 98.527948] device eno2 entered promiscuous mode [ 98.544755] DSA: tree 0 setup $ ping 10.0.0.1 PING 10.0.0.1 (10.0.0.1): 56 data bytes 64 bytes from 10.0.0.1: seq=0 ttl=64 time=2.337 ms 64 bytes from 10.0.0.1: seq=1 ttl=64 time=0.754 ms ^C - 10.0.0.1 ping statistics - 2 packets transmitted, 2 packets received, 0% packet loss round-trip min/avg/max = 0.754/1.545/2.337 ms $ cat /sys/class/net/eno2/dsa/tagging ocelot $ cat ./test_ocelot_8021q.sh #!/bin/bash ip link set swp0 down ip link set swp1 down ip link set swp2 down ip link set swp3 down ip link set swp5 down ip link set eno2 down echo ocelot-8021q > /sys/class/net/eno2/dsa/tagging ip link set eno2 up ip link set swp0 up ip link set swp1 up ip link set swp2 up ip link set swp3 up ip link set swp5 up $ ./test_ocelot_8021q.sh ./test_ocelot_8021q.sh: line 9: echo: write error: Protocol not available $ rmmod tag_ocelot.ko rmmod: can't unload module 'tag_ocelot': Resource temporarily unavailable $ insmod tag_ocelot_8021q.ko $ ./test_ocelot_8021q.sh $ cat /sys/class/net/eno2/dsa/tagging ocelot-8021q $ rmmod tag_ocelot.ko $ rmmod tag_ocelot_8021q.ko rmmod: can't unload module 'tag_ocelot_8021q': Resource temporarily unavailable $ ping 10.0.0.1 PING 10.0.0.1 (10.0.0.1): 56 data bytes 64 bytes from 10.0.0.1: seq=0 ttl=64 time=0.953 ms 64 bytes from 10.0.0.1: seq=1 ttl=64 time=0.787 ms 64 bytes from 10.0.0.1: seq=2 ttl=64 time=0.771 ms $ rmmod mscc_felix.ko [ 645.544426] mscc_felix 0000:00:00.5: Link is Down [ 645.838608] DSA: tree 0 torn down $ rmmod tag_ocelot_8021q.ko Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- Documentation/ABI/testing/sysfs-class-net-dsa | 11 +++- include/net/dsa.h | 9 +++ net/dsa/dsa.c | 26 +++++++++ net/dsa/dsa2.c | 55 ++++++++++++++++++- net/dsa/dsa_priv.h | 15 +++++ net/dsa/master.c | 39 ++++++++++++- net/dsa/port.c | 8 +++ net/dsa/slave.c | 35 ++++++++---- net/dsa/switch.c | 55 +++++++++++++++++++ 9 files changed, 235 insertions(+), 18 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-net-dsa b/Documentation/ABI/testing/sysfs-class-net-dsa index 985d84c585c66..e2da26b44dd0d 100644 --- a/Documentation/ABI/testing/sysfs-class-net-dsa +++ b/Documentation/ABI/testing/sysfs-class-net-dsa @@ -3,5 +3,12 @@ Date: August 2018 KernelVersion: 4.20 Contact: netdev@vger.kernel.org Description: - String indicating the type of tagging protocol used by the - DSA slave network device. + On read, this file returns a string indicating the type of + tagging protocol used by the DSA network devices that are + attached to this master interface. + On write, this file changes the tagging protocol of the + attached DSA switches, if this operation is supported by the + driver. Changing the tagging protocol must be done with the DSA + interfaces and the master interface all administratively down. + See the "name" field of each registered struct dsa_device_ops + for a list of valid values. diff --git a/include/net/dsa.h b/include/net/dsa.h index b8af1d6c879a7..7ea3918b2e618 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -485,9 +485,18 @@ static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp) typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { + /* + * Tagging protocol helpers called for the CPU ports and DSA links. + * @get_tag_protocol retrieves the initial tagging protocol and is + * mandatory. Switches which can operate using multiple tagging + * protocols should implement @change_tag_protocol and report in + * @get_tag_protocol the tagger in current use. + */ enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds, int port, enum dsa_tag_protocol mprot); + int (*change_tag_protocol)(struct dsa_switch *ds, int port, + enum dsa_tag_protocol proto); int (*setup)(struct dsa_switch *ds); void (*teardown)(struct dsa_switch *ds); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index f4ce3c5826a01..84cad1be9ce48 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -84,6 +84,32 @@ const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops) return ops->name; }; +/* Function takes a reference on the module owning the tagger, + * so dsa_tag_driver_put must be called afterwards. + */ +const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf) +{ + const struct dsa_device_ops *ops = ERR_PTR(-ENOPROTOOPT); + struct dsa_tag_driver *dsa_tag_driver; + + mutex_lock(&dsa_tag_drivers_lock); + list_for_each_entry(dsa_tag_driver, &dsa_tag_drivers_list, list) { + const struct dsa_device_ops *tmp = dsa_tag_driver->ops; + + if (!sysfs_streq(buf, tmp->name)) + continue; + + if (!try_module_get(dsa_tag_driver->owner)) + break; + + ops = tmp; + break; + } + mutex_unlock(&dsa_tag_drivers_lock); + + return ops; +} + const struct dsa_device_ops *dsa_tag_driver_get(int tag_protocol) { struct dsa_tag_driver *dsa_tag_driver; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 63035a898ca8b..96249c4ad5f27 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -942,6 +942,57 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dst->setup = false; } +/* Since the dsa/tagging sysfs device attribute is per master, the assumption + * is that all DSA switches within a tree share the same tagger, otherwise + * they would have formed disjoint trees (different "dsa,member" values). + */ +int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, + struct net_device *master, + const struct dsa_device_ops *tag_ops, + const struct dsa_device_ops *old_tag_ops) +{ + struct dsa_notifier_tag_proto_info info; + struct dsa_port *dp; + int err = -EBUSY; + + if (!rtnl_trylock()) + return restart_syscall(); + + /* At the moment we don't allow changing the tag protocol under + * traffic. The rtnl_mutex also happens to serialize concurrent + * attempts to change the tagging protocol. If we ever lift the IFF_UP + * restriction, there needs to be another mutex which serializes this. + */ + if (master->flags & IFF_UP) + goto out_unlock; + + list_for_each_entry(dp, &dst->ports, list) { + if (!dsa_is_user_port(dp->ds, dp->index)) + continue; + + if (dp->slave->flags & IFF_UP) + goto out_unlock; + } + + info.tag_ops = tag_ops; + err = dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info); + if (err) + goto out_unwind_tagger; + + dst->tag_ops = tag_ops; + + rtnl_unlock(); + + return 0; + +out_unwind_tagger: + info.tag_ops = old_tag_ops; + dsa_tree_notify(dst, DSA_NOTIFIER_TAG_PROTO, &info); +out_unlock: + rtnl_unlock(); + return err; +} + static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) { struct dsa_switch_tree *dst = ds->dst; @@ -1038,9 +1089,7 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) dp->master = master; dp->type = DSA_PORT_TYPE_CPU; - dp->filter = dst->tag_ops->filter; - dp->rcv = dst->tag_ops->rcv; - dp->tag_ops = dst->tag_ops; + dsa_port_set_tag_protocol(dp, dst->tag_ops); dp->dst = dst; return 0; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 3cc1e6d76e3ac..edca57b558ad6 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -28,6 +28,7 @@ enum { DSA_NOTIFIER_VLAN_ADD, DSA_NOTIFIER_VLAN_DEL, DSA_NOTIFIER_MTU, + DSA_NOTIFIER_TAG_PROTO, }; /* DSA_NOTIFIER_AGEING_TIME */ @@ -82,6 +83,11 @@ struct dsa_notifier_mtu_info { int mtu; }; +/* DSA_NOTIFIER_TAG_PROTO_* */ +struct dsa_notifier_tag_proto_info { + const struct dsa_device_ops *tag_ops; +}; + struct dsa_switchdev_event_work { struct dsa_switch *ds; int port; @@ -115,6 +121,7 @@ struct dsa_slave_priv { /* dsa.c */ const struct dsa_device_ops *dsa_tag_driver_get(int tag_protocol); void dsa_tag_driver_put(const struct dsa_device_ops *ops); +const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf); bool dsa_schedule_work(struct work_struct *work); const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops); @@ -139,6 +146,8 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, } /* port.c */ +void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, + const struct dsa_device_ops *tag_ops); int dsa_port_set_state(struct dsa_port *dp, u8 state); int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy); int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); @@ -201,6 +210,8 @@ int dsa_slave_suspend(struct net_device *slave_dev); int dsa_slave_resume(struct net_device *slave_dev); int dsa_slave_register_notifier(void); void dsa_slave_unregister_notifier(void); +void dsa_slave_setup_tagger(struct net_device *slave); +int dsa_slave_change_mtu(struct net_device *dev, int new_mtu); static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev) { @@ -285,6 +296,10 @@ void dsa_lag_map(struct dsa_switch_tree *dst, struct net_device *lag); void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag); int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v); int dsa_broadcast(unsigned long e, void *v); +int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, + struct net_device *master, + const struct dsa_device_ops *tag_ops, + const struct dsa_device_ops *old_tag_ops); extern struct list_head dsa_tree_list; diff --git a/net/dsa/master.c b/net/dsa/master.c index cb3a5cf99b258..052a977914a6d 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -280,7 +280,44 @@ static ssize_t tagging_show(struct device *d, struct device_attribute *attr, return sprintf(buf, "%s\n", dsa_tag_protocol_to_str(cpu_dp->tag_ops)); } -static DEVICE_ATTR_RO(tagging); + +static ssize_t tagging_store(struct device *d, struct device_attribute *attr, + const char *buf, size_t count) +{ + const struct dsa_device_ops *new_tag_ops, *old_tag_ops; + struct net_device *dev = to_net_dev(d); + struct dsa_port *cpu_dp = dev->dsa_ptr; + int err; + + old_tag_ops = cpu_dp->tag_ops; + new_tag_ops = dsa_find_tagger_by_name(buf); + /* Bad tagger name, or module is not loaded? */ + if (IS_ERR(new_tag_ops)) + return PTR_ERR(new_tag_ops); + + if (new_tag_ops == old_tag_ops) + /* Drop the temporarily held duplicate reference, since + * the DSA switch tree uses this tagger. + */ + goto out; + + err = dsa_tree_change_tag_proto(cpu_dp->ds->dst, dev, new_tag_ops, + old_tag_ops); + if (err) { + /* On failure the old tagger is restored, so we don't need the + * driver for the new one. + */ + dsa_tag_driver_put(new_tag_ops); + return err; + } + + /* On success we no longer need the module for the old tagging protocol + */ +out: + dsa_tag_driver_put(old_tag_ops); + return count; +} +static DEVICE_ATTR_RW(tagging); static struct attribute *dsa_slave_attrs[] = { &dev_attr_tagging.attr, diff --git a/net/dsa/port.c b/net/dsa/port.c index a8886cf401602..5e079a61528e0 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -526,6 +526,14 @@ int dsa_port_vlan_del(struct dsa_port *dp, return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); } +void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, + const struct dsa_device_ops *tag_ops) +{ + cpu_dp->filter = tag_ops->filter; + cpu_dp->rcv = tag_ops->rcv; + cpu_dp->tag_ops = tag_ops; +} + static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp) { struct device_node *phy_dn; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index f2fb433f38284..b0571ab4e5a75 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1430,7 +1430,7 @@ out: dsa_hw_port_list_free(&hw_port_list); } -static int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) +int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) { struct net_device *master = dsa_slave_to_master(dev); struct dsa_port *dp = dsa_slave_to_port(dev); @@ -1708,6 +1708,27 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) return ret; } +void dsa_slave_setup_tagger(struct net_device *slave) +{ + struct dsa_port *dp = dsa_slave_to_port(slave); + struct dsa_slave_priv *p = netdev_priv(slave); + const struct dsa_port *cpu_dp = dp->cpu_dp; + struct net_device *master = cpu_dp->master; + + if (cpu_dp->tag_ops->tail_tag) + slave->needed_tailroom = cpu_dp->tag_ops->overhead; + else + slave->needed_headroom = cpu_dp->tag_ops->overhead; + /* Try to save one extra realloc later in the TX path (in the master) + * by also inheriting the master's needed headroom and tailroom. + * The 8021q driver also does this. + */ + slave->needed_headroom += master->needed_headroom; + slave->needed_tailroom += master->needed_tailroom; + + p->xmit = cpu_dp->tag_ops->xmit; +} + static struct lock_class_key dsa_slave_netdev_xmit_lock_key; static void dsa_slave_set_lockdep_class_one(struct net_device *dev, struct netdev_queue *txq, @@ -1782,16 +1803,6 @@ int dsa_slave_create(struct dsa_port *port) slave_dev->netdev_ops = &dsa_slave_netdev_ops; if (ds->ops->port_max_mtu) slave_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index); - if (cpu_dp->tag_ops->tail_tag) - slave_dev->needed_tailroom = cpu_dp->tag_ops->overhead; - else - slave_dev->needed_headroom = cpu_dp->tag_ops->overhead; - /* Try to save one extra realloc later in the TX path (in the master) - * by also inheriting the master's needed headroom and tailroom. - * The 8021q driver also does this. - */ - slave_dev->needed_headroom += master->needed_headroom; - slave_dev->needed_tailroom += master->needed_tailroom; SET_NETDEV_DEVTYPE(slave_dev, &dsa_type); netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, @@ -1814,8 +1825,8 @@ int dsa_slave_create(struct dsa_port *port) p->dp = port; INIT_LIST_HEAD(&p->mall_tc_list); - p->xmit = cpu_dp->tag_ops->xmit; port->slave = slave_dev; + dsa_slave_setup_tagger(slave_dev); rtnl_lock(); ret = dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN); diff --git a/net/dsa/switch.c b/net/dsa/switch.c index cc0b25f3adea9..5026e4143663f 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -297,6 +297,58 @@ static int dsa_switch_vlan_del(struct dsa_switch *ds, return 0; } +static bool dsa_switch_tag_proto_match(struct dsa_switch *ds, int port, + struct dsa_notifier_tag_proto_info *info) +{ + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) + return true; + + return false; +} + +static int dsa_switch_change_tag_proto(struct dsa_switch *ds, + struct dsa_notifier_tag_proto_info *info) +{ + const struct dsa_device_ops *tag_ops = info->tag_ops; + int port, err; + + if (!ds->ops->change_tag_protocol) + return -EOPNOTSUPP; + + ASSERT_RTNL(); + + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_tag_proto_match(ds, port, info)) { + err = ds->ops->change_tag_protocol(ds, port, + tag_ops->proto); + if (err) + return err; + + if (dsa_is_cpu_port(ds, port)) + dsa_port_set_tag_protocol(dsa_to_port(ds, port), + tag_ops); + } + } + + /* Now that changing the tag protocol can no longer fail, let's update + * the remaining bits which are "duplicated for faster access", and the + * bits that depend on the tagger, such as the MTU. + */ + for (port = 0; port < ds->num_ports; port++) { + if (dsa_is_user_port(ds, port)) { + struct net_device *slave; + + slave = dsa_to_port(ds, port)->slave; + dsa_slave_setup_tagger(slave); + + /* rtnl_mutex is held in dsa_tree_change_tag_proto */ + dsa_slave_change_mtu(slave, slave->mtu); + } + } + + return 0; +} + static int dsa_switch_event(struct notifier_block *nb, unsigned long event, void *info) { @@ -343,6 +395,9 @@ static int dsa_switch_event(struct notifier_block *nb, case DSA_NOTIFIER_MTU: err = dsa_switch_mtu(ds, info); break; + case DSA_NOTIFIER_TAG_PROTO: + err = dsa_switch_change_tag_proto(ds, info); + break; default: err = -EOPNOTSUPP; break; -- GitLab From adb3dccf090bc53ce177cd30bbe5b985336a6f66 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 29 Jan 2021 03:00:07 +0200 Subject: [PATCH 1747/2012] net: dsa: felix: convert to the new .change_tag_protocol DSA API In expectation of the new tag_ocelot_8021q tagger implementation, we need to be able to do runtime switchover between one tagger and another. So we must structure the existing code for the current NPI-based tagger in a certain way. We move the felix_npi_port_init function in expectation of the future driver configuration necessary for tag_ocelot_8021q: we would like to not have the NPI-related bits interspersed with the tag_8021q bits. The conversion from this: ocelot_write_rix(ocelot, ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)), ANA_PGID_PGID, PGID_UC); to this: cpu_flood = ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)); ocelot_rmw_rix(ocelot, cpu_flood, cpu_flood, ANA_PGID_PGID, PGID_UC); is perhaps non-trivial, but is nonetheless non-functional. The PGID_UC (replicator for unknown unicast) is already configured out of hardware reset to flood to all ports except ocelot->num_phys_ports (the CPU port module). All we change is that we use a read-modify-write to only add the CPU port module to the unknown unicast replicator, as opposed to doing a full write to the register. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 186 ++++++++++++++++++----- drivers/net/dsa/ocelot/felix.h | 1 + drivers/net/dsa/ocelot/felix_vsc9959.c | 1 + drivers/net/dsa/ocelot/seville_vsc9953.c | 1 + 4 files changed, 154 insertions(+), 35 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 054e57dd43839..af3cee8762ce0 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2019 NXP Semiconductors +/* Copyright 2019-2021 NXP Semiconductors * * This is an umbrella module for all network switches that are * register-compatible with Ocelot and that perform I/O to their host CPU @@ -24,11 +24,140 @@ #include #include "felix.h" +/* The CPU port module is connected to the Node Processor Interface (NPI). This + * is the mode through which frames can be injected from and extracted to an + * external CPU, over Ethernet. In NXP SoCs, the "external CPU" is the ARM CPU + * running Linux, and this forms a DSA setup together with the enetc or fman + * DSA master. + */ +static void felix_npi_port_init(struct ocelot *ocelot, int port) +{ + ocelot->npi = port; + + ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M | + QSYS_EXT_CPU_CFG_EXT_CPU_PORT(port), + QSYS_EXT_CPU_CFG); + + /* NPI port Injection/Extraction configuration */ + ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_XTR_HDR, + ocelot->npi_xtr_prefix); + ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_INJ_HDR, + ocelot->npi_inj_prefix); + + /* Disable transmission of pause frames */ + ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0); +} + +static void felix_npi_port_deinit(struct ocelot *ocelot, int port) +{ + /* Restore hardware defaults */ + int unused_port = ocelot->num_phys_ports + 2; + + ocelot->npi = -1; + + ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPU_PORT(unused_port), + QSYS_EXT_CPU_CFG); + + ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_XTR_HDR, + OCELOT_TAG_PREFIX_DISABLED); + ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_INJ_HDR, + OCELOT_TAG_PREFIX_DISABLED); + + /* Enable transmission of pause frames */ + ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 1); +} + +static int felix_setup_tag_npi(struct dsa_switch *ds, int cpu) +{ + struct ocelot *ocelot = ds->priv; + unsigned long cpu_flood; + + felix_npi_port_init(ocelot, cpu); + + /* Include the CPU port module (and indirectly, the NPI port) + * in the forwarding mask for unknown unicast - the hardware + * default value for ANA_FLOODING_FLD_UNICAST excludes + * BIT(ocelot->num_phys_ports), and so does ocelot_init, + * since Ocelot relies on whitelisting MAC addresses towards + * PGID_CPU. + * We do this because DSA does not yet perform RX filtering, + * and the NPI port does not perform source address learning, + * so traffic sent to Linux is effectively unknown from the + * switch's perspective. + */ + cpu_flood = ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)); + ocelot_rmw_rix(ocelot, cpu_flood, cpu_flood, ANA_PGID_PGID, PGID_UC); + + return 0; +} + +static void felix_teardown_tag_npi(struct dsa_switch *ds, int cpu) +{ + struct ocelot *ocelot = ds->priv; + + felix_npi_port_deinit(ocelot, cpu); +} + +static int felix_set_tag_protocol(struct dsa_switch *ds, int cpu, + enum dsa_tag_protocol proto) +{ + int err; + + switch (proto) { + case DSA_TAG_PROTO_OCELOT: + err = felix_setup_tag_npi(ds, cpu); + break; + default: + err = -EPROTONOSUPPORT; + } + + return err; +} + +static void felix_del_tag_protocol(struct dsa_switch *ds, int cpu, + enum dsa_tag_protocol proto) +{ + switch (proto) { + case DSA_TAG_PROTO_OCELOT: + felix_teardown_tag_npi(ds, cpu); + break; + default: + break; + } +} + +static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu, + enum dsa_tag_protocol proto) +{ + struct ocelot *ocelot = ds->priv; + struct felix *felix = ocelot_to_felix(ocelot); + enum dsa_tag_protocol old_proto = felix->tag_proto; + int err; + + if (proto != DSA_TAG_PROTO_OCELOT) + return -EPROTONOSUPPORT; + + felix_del_tag_protocol(ds, cpu, old_proto); + + err = felix_set_tag_protocol(ds, cpu, proto); + if (err) { + felix_set_tag_protocol(ds, cpu, old_proto); + return err; + } + + felix->tag_proto = proto; + + return 0; +} + static enum dsa_tag_protocol felix_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol mp) { - return DSA_TAG_PROTO_OCELOT; + struct ocelot *ocelot = ds->priv; + struct felix *felix = ocelot_to_felix(ocelot); + + return felix->tag_proto; } static int felix_set_ageing_time(struct dsa_switch *ds, @@ -527,28 +656,6 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports) return 0; } -/* The CPU port module is connected to the Node Processor Interface (NPI). This - * is the mode through which frames can be injected from and extracted to an - * external CPU, over Ethernet. - */ -static void felix_npi_port_init(struct ocelot *ocelot, int port) -{ - ocelot->npi = port; - - ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M | - QSYS_EXT_CPU_CFG_EXT_CPU_PORT(port), - QSYS_EXT_CPU_CFG); - - /* NPI port Injection/Extraction configuration */ - ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_XTR_HDR, - ocelot->npi_xtr_prefix); - ocelot_fields_write(ocelot, port, SYS_PORT_MODE_INCL_INJ_HDR, - ocelot->npi_inj_prefix); - - /* Disable transmission of pause frames */ - ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0); -} - /* Hardware initialization done here so that we can allocate structures with * devm without fear of dsa_register_switch returning -EPROBE_DEFER and causing * us to allocate structures twice (leak memory) and map PCI memory twice @@ -578,10 +685,10 @@ static int felix_setup(struct dsa_switch *ds) } for (port = 0; port < ds->num_ports; port++) { - ocelot_init_port(ocelot, port); + if (dsa_is_unused_port(ds, port)) + continue; - if (dsa_is_cpu_port(ds, port)) - felix_npi_port_init(ocelot, port); + ocelot_init_port(ocelot, port); /* Set the default QoS Classification based on PCP and DEI * bits of vlan tag. @@ -593,14 +700,15 @@ static int felix_setup(struct dsa_switch *ds) if (err) return err; - /* Include the CPU port module in the forwarding mask for unknown - * unicast - the hardware default value for ANA_FLOODING_FLD_UNICAST - * excludes BIT(ocelot->num_phys_ports), and so does ocelot_init, since - * Ocelot relies on whitelisting MAC addresses towards PGID_CPU. - */ - ocelot_write_rix(ocelot, - ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)), - ANA_PGID_PGID, PGID_UC); + for (port = 0; port < ds->num_ports; port++) { + if (!dsa_is_cpu_port(ds, port)) + continue; + + /* The initial tag protocol is NPI which always returns 0, so + * there's no real point in checking for errors. + */ + felix_set_tag_protocol(ds, port, felix->tag_proto); + } ds->mtu_enforcement_ingress = true; ds->assisted_learning_on_cpu_port = true; @@ -614,6 +722,13 @@ static void felix_teardown(struct dsa_switch *ds) struct felix *felix = ocelot_to_felix(ocelot); int port; + for (port = 0; port < ds->num_ports; port++) { + if (!dsa_is_cpu_port(ds, port)) + continue; + + felix_del_tag_protocol(ds, port, felix->tag_proto); + } + ocelot_devlink_sb_unregister(ocelot); ocelot_deinit_timestamp(ocelot); ocelot_deinit(ocelot); @@ -860,6 +975,7 @@ static int felix_sb_occ_tc_port_bind_get(struct dsa_switch *ds, int port, const struct dsa_switch_ops felix_switch_ops = { .get_tag_protocol = felix_get_tag_protocol, + .change_tag_protocol = felix_change_tag_protocol, .setup = felix_setup, .teardown = felix_teardown, .set_ageing_time = felix_set_ageing_time, diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 994835cb9307f..264b3bbdc4d1f 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -48,6 +48,7 @@ struct felix { struct lynx_pcs **pcs; resource_size_t switch_base; resource_size_t imdio_base; + enum dsa_tag_protocol tag_proto; }; struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port); diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index f9711e69b8d54..e944868cc120b 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1467,6 +1467,7 @@ static int felix_pci_probe(struct pci_dev *pdev, ds->ops = &felix_switch_ops; ds->priv = ocelot; felix->ds = ds; + felix->tag_proto = DSA_TAG_PROTO_OCELOT; err = dsa_register_switch(ds); if (err) { diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index 5e9bfdea50be5..512f677a6c1c5 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -1246,6 +1246,7 @@ static int seville_probe(struct platform_device *pdev) ds->ops = &felix_switch_ops; ds->priv = ocelot; felix->ds = ds; + felix->tag_proto = DSA_TAG_PROTO_OCELOT; err = dsa_register_switch(ds); if (err) { -- GitLab From 7c83a7c539abe9f980996063ac20532a7a7f6eb1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 29 Jan 2021 03:00:08 +0200 Subject: [PATCH 1748/2012] net: dsa: add a second tagger for Ocelot switches based on tag_8021q There are use cases for which the existing tagger, based on the NPI (Node Processor Interface) functionality, is insufficient. Namely: - Frames injected through the NPI port bypass the frame analyzer, so no source address learning is performed, no TSN stream classification, etc. - Flow control is not functional over an NPI port (PAUSE frames are encapsulated in the same Extraction Frame Header as all other frames) - There can be at most one NPI port configured for an Ocelot switch. But in NXP LS1028A and T1040 there are two Ethernet CPU ports. The non-NPI port is currently either disabled, or operated as a plain user port (albeit an internally-facing one). Having the ability to configure the two CPU ports symmetrically could pave the way for e.g. creating a LAG between them, to increase bandwidth seamlessly for the system. So there is a desire to have an alternative to the NPI mode. This change keeps the default tagger for the Seville and Felix switches as "ocelot", but it can be changed via the following device attribute: echo ocelot-8021q > /sys/class//dsa/tagging Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + drivers/net/dsa/ocelot/Kconfig | 2 + include/net/dsa.h | 2 + net/dsa/Kconfig | 21 +++++++++-- net/dsa/Makefile | 1 + net/dsa/tag_ocelot_8021q.c | 68 ++++++++++++++++++++++++++++++++++ 6 files changed, 92 insertions(+), 3 deletions(-) create mode 100644 net/dsa/tag_ocelot_8021q.c diff --git a/MAINTAINERS b/MAINTAINERS index 627e90a9e2961..d1b0057a97970 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12859,6 +12859,7 @@ F: drivers/net/dsa/ocelot/* F: drivers/net/ethernet/mscc/ F: include/soc/mscc/ocelot* F: net/dsa/tag_ocelot.c +F: net/dsa/tag_ocelot_8021q.c F: tools/testing/selftests/drivers/net/ocelot/* OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER diff --git a/drivers/net/dsa/ocelot/Kconfig b/drivers/net/dsa/ocelot/Kconfig index c110e82a79737..932b6b6fe817f 100644 --- a/drivers/net/dsa/ocelot/Kconfig +++ b/drivers/net/dsa/ocelot/Kconfig @@ -6,6 +6,7 @@ config NET_DSA_MSCC_FELIX depends on NET_VENDOR_FREESCALE depends on HAS_IOMEM select MSCC_OCELOT_SWITCH_LIB + select NET_DSA_TAG_OCELOT_8021Q select NET_DSA_TAG_OCELOT select FSL_ENETC_MDIO select PCS_LYNX @@ -19,6 +20,7 @@ config NET_DSA_MSCC_SEVILLE depends on NET_VENDOR_MICROSEMI depends on HAS_IOMEM select MSCC_OCELOT_SWITCH_LIB + select NET_DSA_TAG_OCELOT_8021Q select NET_DSA_TAG_OCELOT select PCS_LYNX help diff --git a/include/net/dsa.h b/include/net/dsa.h index 7ea3918b2e618..60acb9fca124e 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -47,6 +47,7 @@ struct phylink_link_state; #define DSA_TAG_PROTO_RTL4_A_VALUE 17 #define DSA_TAG_PROTO_HELLCREEK_VALUE 18 #define DSA_TAG_PROTO_XRS700X_VALUE 19 +#define DSA_TAG_PROTO_OCELOT_8021Q_VALUE 20 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -69,6 +70,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_RTL4_A = DSA_TAG_PROTO_RTL4_A_VALUE, DSA_TAG_PROTO_HELLCREEK = DSA_TAG_PROTO_HELLCREEK_VALUE, DSA_TAG_PROTO_XRS700X = DSA_TAG_PROTO_XRS700X_VALUE, + DSA_TAG_PROTO_OCELOT_8021Q = DSA_TAG_PROTO_OCELOT_8021Q_VALUE, }; struct packet_type; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 2d226a5c085fb..a45572cfb71a7 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -105,11 +105,26 @@ config NET_DSA_TAG_RTL4_A the Realtek RTL8366RB. config NET_DSA_TAG_OCELOT - tristate "Tag driver for Ocelot family of switches" + tristate "Tag driver for Ocelot family of switches, using NPI port" select PACKING help - Say Y or M if you want to enable support for tagging frames for the - Ocelot switches (VSC7511, VSC7512, VSC7513, VSC7514, VSC9959). + Say Y or M if you want to enable NPI tagging for the Ocelot switches + (VSC7511, VSC7512, VSC7513, VSC7514, VSC9953, VSC9959). In this mode, + the frames over the Ethernet CPU port are prepended with a + hardware-defined injection/extraction frame header. Flow control + (PAUSE frames) over the CPU port is not supported when operating in + this mode. + +config NET_DSA_TAG_OCELOT_8021Q + tristate "Tag driver for Ocelot family of switches, using VLAN" + select NET_DSA_TAG_8021Q + help + Say Y or M if you want to enable support for tagging frames with a + custom VLAN-based header. Frames that require timestamping, such as + PTP, are not delivered over Ethernet but over register-based MMIO. + Flow control over the CPU port is functional in this mode. When using + this mode, less TCAM resources (VCAP IS1, IS2, ES0) are available for + use with tc-flower. config NET_DSA_TAG_QCA tristate "Tag driver for Qualcomm Atheros QCA8K switches" diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 92cea21322415..44bc79952b8b8 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_NET_DSA_TAG_RTL4_A) += tag_rtl4_a.o obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o obj-$(CONFIG_NET_DSA_TAG_OCELOT) += tag_ocelot.o +obj-$(CONFIG_NET_DSA_TAG_OCELOT_8021Q) += tag_ocelot_8021q.o obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o obj-$(CONFIG_NET_DSA_TAG_SJA1105) += tag_sja1105.o obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c new file mode 100644 index 0000000000000..8991ebf098a31 --- /dev/null +++ b/net/dsa/tag_ocelot_8021q.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2020-2021 NXP Semiconductors + * + * An implementation of the software-defined tag_8021q.c tagger format, which + * also preserves full functionality under a vlan_filtering bridge. It does + * this by using the TCAM engines for: + * - pushing the RX VLAN as a second, outer tag, on egress towards the CPU port + * - redirecting towards the correct front port based on TX VLAN and popping + * that on egress + */ +#include +#include "dsa_priv.h" + +static struct sk_buff *ocelot_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + struct dsa_port *dp = dsa_slave_to_port(netdev); + u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index); + u16 queue_mapping = skb_get_queue_mapping(skb); + u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); + + return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q, + ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); +} + +static struct sk_buff *ocelot_rcv(struct sk_buff *skb, + struct net_device *netdev, + struct packet_type *pt) +{ + int src_port, switch_id, qos_class; + u16 vid, tci; + + skb_push_rcsum(skb, ETH_HLEN); + if (skb_vlan_tag_present(skb)) { + tci = skb_vlan_tag_get(skb); + __vlan_hwaccel_clear_tag(skb); + } else { + __skb_vlan_pop(skb, &tci); + } + skb_pull_rcsum(skb, ETH_HLEN); + + vid = tci & VLAN_VID_MASK; + src_port = dsa_8021q_rx_source_port(vid); + switch_id = dsa_8021q_rx_switch_id(vid); + qos_class = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + + skb->dev = dsa_master_find_slave(netdev, switch_id, src_port); + if (!skb->dev) + return NULL; + + skb->offload_fwd_mark = 1; + skb->priority = qos_class; + + return skb; +} + +static const struct dsa_device_ops ocelot_8021q_netdev_ops = { + .name = "ocelot-8021q", + .proto = DSA_TAG_PROTO_OCELOT_8021Q, + .xmit = ocelot_xmit, + .rcv = ocelot_rcv, + .overhead = VLAN_HLEN, +}; + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_OCELOT_8021Q); + +module_dsa_tag_driver(ocelot_8021q_netdev_ops); -- GitLab From e21268efbe26d9ab3f7468577d691b992d76e06a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 29 Jan 2021 03:00:09 +0200 Subject: [PATCH 1749/2012] net: dsa: felix: perform switch setup for tag_8021q Unlike sja1105, the only other user of the software-defined tag_8021q.c tagger format, the implementation we choose for the Felix DSA switch driver preserves full functionality under a vlan_filtering bridge (i.e. IP termination works through the DSA user ports under all circumstances). The tag_8021q protocol just wants: - Identifying the ingress switch port based on the RX VLAN ID, as seen by the CPU. We achieve this by using the TCAM engines (which are also used for tc-flower offload) to push the RX VLAN as a second, outer tag, on egress towards the CPU port. - Steering traffic injected into the switch from the network stack towards the correct front port based on the TX VLAN, and consuming (popping) that header on the switch's egress. A tc-flower pseudocode of the static configuration done by the driver would look like this: $ tc qdisc add dev clsact $ for eth in swp0 swp1 swp2 swp3; do \ tc filter add dev egress flower indev ${eth} \ action vlan push id protocol 802.1ad; \ tc filter add dev ingress protocol 802.1Q flower vlan_id action vlan pop \ action mirred egress redirect dev ${eth}; \ done but of course since DSA does not register network interfaces for the CPU port, this configuration would be impossible for the user to do. Also, due to the same reason, it is impossible for the user to inadvertently delete these rules using tc. These rules do not collide in any way with tc-flower, they just consume some TCAM space, which is something we can live with. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 337 +++++++++++++++++++++++- drivers/net/dsa/ocelot/felix.h | 1 + drivers/net/ethernet/mscc/ocelot.c | 61 ++++- drivers/net/ethernet/mscc/ocelot_vcap.c | 1 + drivers/net/ethernet/mscc/ocelot_vcap.h | 3 - include/soc/mscc/ocelot.h | 2 + include/soc/mscc/ocelot_vcap.h | 3 + 7 files changed, 396 insertions(+), 12 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index af3cee8762ce0..167463010b556 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,329 @@ #include #include "felix.h" +static int felix_tag_8021q_rxvlan_add(struct felix *felix, int port, u16 vid, + bool pvid, bool untagged) +{ + struct ocelot_vcap_filter *outer_tagging_rule; + struct ocelot *ocelot = &felix->ocelot; + struct dsa_switch *ds = felix->ds; + int key_length, upstream, err; + + /* We don't need to install the rxvlan into the other ports' filtering + * tables, because we're just pushing the rxvlan when sending towards + * the CPU + */ + if (!pvid) + return 0; + + key_length = ocelot->vcap[VCAP_ES0].keys[VCAP_ES0_IGR_PORT].length; + upstream = dsa_upstream_port(ds, port); + + outer_tagging_rule = kzalloc(sizeof(struct ocelot_vcap_filter), + GFP_KERNEL); + if (!outer_tagging_rule) + return -ENOMEM; + + outer_tagging_rule->key_type = OCELOT_VCAP_KEY_ANY; + outer_tagging_rule->prio = 1; + outer_tagging_rule->id.cookie = port; + outer_tagging_rule->id.tc_offload = false; + outer_tagging_rule->block_id = VCAP_ES0; + outer_tagging_rule->type = OCELOT_VCAP_FILTER_OFFLOAD; + outer_tagging_rule->lookup = 0; + outer_tagging_rule->ingress_port.value = port; + outer_tagging_rule->ingress_port.mask = GENMASK(key_length - 1, 0); + outer_tagging_rule->egress_port.value = upstream; + outer_tagging_rule->egress_port.mask = GENMASK(key_length - 1, 0); + outer_tagging_rule->action.push_outer_tag = OCELOT_ES0_TAG; + outer_tagging_rule->action.tag_a_tpid_sel = OCELOT_TAG_TPID_SEL_8021AD; + outer_tagging_rule->action.tag_a_vid_sel = 1; + outer_tagging_rule->action.vid_a_val = vid; + + err = ocelot_vcap_filter_add(ocelot, outer_tagging_rule, NULL); + if (err) + kfree(outer_tagging_rule); + + return err; +} + +static int felix_tag_8021q_txvlan_add(struct felix *felix, int port, u16 vid, + bool pvid, bool untagged) +{ + struct ocelot_vcap_filter *untagging_rule, *redirect_rule; + struct ocelot *ocelot = &felix->ocelot; + struct dsa_switch *ds = felix->ds; + int upstream, err; + + /* tag_8021q.c assumes we are implementing this via port VLAN + * membership, which we aren't. So we don't need to add any VCAP filter + * for the CPU port. + */ + if (ocelot->ports[port]->is_dsa_8021q_cpu) + return 0; + + untagging_rule = kzalloc(sizeof(struct ocelot_vcap_filter), GFP_KERNEL); + if (!untagging_rule) + return -ENOMEM; + + redirect_rule = kzalloc(sizeof(struct ocelot_vcap_filter), GFP_KERNEL); + if (!redirect_rule) { + kfree(untagging_rule); + return -ENOMEM; + } + + upstream = dsa_upstream_port(ds, port); + + untagging_rule->key_type = OCELOT_VCAP_KEY_ANY; + untagging_rule->ingress_port_mask = BIT(upstream); + untagging_rule->vlan.vid.value = vid; + untagging_rule->vlan.vid.mask = VLAN_VID_MASK; + untagging_rule->prio = 1; + untagging_rule->id.cookie = port; + untagging_rule->id.tc_offload = false; + untagging_rule->block_id = VCAP_IS1; + untagging_rule->type = OCELOT_VCAP_FILTER_OFFLOAD; + untagging_rule->lookup = 0; + untagging_rule->action.vlan_pop_cnt_ena = true; + untagging_rule->action.vlan_pop_cnt = 1; + untagging_rule->action.pag_override_mask = 0xff; + untagging_rule->action.pag_val = port; + + err = ocelot_vcap_filter_add(ocelot, untagging_rule, NULL); + if (err) { + kfree(untagging_rule); + kfree(redirect_rule); + return err; + } + + redirect_rule->key_type = OCELOT_VCAP_KEY_ANY; + redirect_rule->ingress_port_mask = BIT(upstream); + redirect_rule->pag = port; + redirect_rule->prio = 1; + redirect_rule->id.cookie = port; + redirect_rule->id.tc_offload = false; + redirect_rule->block_id = VCAP_IS2; + redirect_rule->type = OCELOT_VCAP_FILTER_OFFLOAD; + redirect_rule->lookup = 0; + redirect_rule->action.mask_mode = OCELOT_MASK_MODE_REDIRECT; + redirect_rule->action.port_mask = BIT(port); + + err = ocelot_vcap_filter_add(ocelot, redirect_rule, NULL); + if (err) { + ocelot_vcap_filter_del(ocelot, untagging_rule); + kfree(redirect_rule); + return err; + } + + return 0; +} + +static int felix_tag_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid, + u16 flags) +{ + bool untagged = flags & BRIDGE_VLAN_INFO_UNTAGGED; + bool pvid = flags & BRIDGE_VLAN_INFO_PVID; + struct ocelot *ocelot = ds->priv; + + if (vid_is_dsa_8021q_rxvlan(vid)) + return felix_tag_8021q_rxvlan_add(ocelot_to_felix(ocelot), + port, vid, pvid, untagged); + + if (vid_is_dsa_8021q_txvlan(vid)) + return felix_tag_8021q_txvlan_add(ocelot_to_felix(ocelot), + port, vid, pvid, untagged); + + return 0; +} + +static int felix_tag_8021q_rxvlan_del(struct felix *felix, int port, u16 vid) +{ + struct ocelot_vcap_filter *outer_tagging_rule; + struct ocelot_vcap_block *block_vcap_es0; + struct ocelot *ocelot = &felix->ocelot; + + block_vcap_es0 = &ocelot->block[VCAP_ES0]; + + outer_tagging_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_es0, + port, false); + /* In rxvlan_add, we had the "if (!pvid) return 0" logic to avoid + * installing outer tagging ES0 rules where they weren't needed. + * But in rxvlan_del, the API doesn't give us the "flags" anymore, + * so that forces us to be slightly sloppy here, and just assume that + * if we didn't find an outer_tagging_rule it means that there was + * none in the first place, i.e. rxvlan_del is called on a non-pvid + * port. This is most probably true though. + */ + if (!outer_tagging_rule) + return 0; + + return ocelot_vcap_filter_del(ocelot, outer_tagging_rule); +} + +static int felix_tag_8021q_txvlan_del(struct felix *felix, int port, u16 vid) +{ + struct ocelot_vcap_filter *untagging_rule, *redirect_rule; + struct ocelot_vcap_block *block_vcap_is1; + struct ocelot_vcap_block *block_vcap_is2; + struct ocelot *ocelot = &felix->ocelot; + int err; + + if (ocelot->ports[port]->is_dsa_8021q_cpu) + return 0; + + block_vcap_is1 = &ocelot->block[VCAP_IS1]; + block_vcap_is2 = &ocelot->block[VCAP_IS2]; + + untagging_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_is1, + port, false); + if (!untagging_rule) + return 0; + + err = ocelot_vcap_filter_del(ocelot, untagging_rule); + if (err) + return err; + + redirect_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_is2, + port, false); + if (!redirect_rule) + return 0; + + return ocelot_vcap_filter_del(ocelot, redirect_rule); +} + +static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) +{ + struct ocelot *ocelot = ds->priv; + + if (vid_is_dsa_8021q_rxvlan(vid)) + return felix_tag_8021q_rxvlan_del(ocelot_to_felix(ocelot), + port, vid); + + if (vid_is_dsa_8021q_txvlan(vid)) + return felix_tag_8021q_txvlan_del(ocelot_to_felix(ocelot), + port, vid); + + return 0; +} + +static const struct dsa_8021q_ops felix_tag_8021q_ops = { + .vlan_add = felix_tag_8021q_vlan_add, + .vlan_del = felix_tag_8021q_vlan_del, +}; + +/* Alternatively to using the NPI functionality, that same hardware MAC + * connected internally to the enetc or fman DSA master can be configured to + * use the software-defined tag_8021q frame format. As far as the hardware is + * concerned, it thinks it is a "dumb switch" - the queues of the CPU port + * module are now disconnected from it, but can still be accessed through + * register-based MMIO. + */ +static void felix_8021q_cpu_port_init(struct ocelot *ocelot, int port) +{ + ocelot->ports[port]->is_dsa_8021q_cpu = true; + ocelot->npi = -1; + + /* Overwrite PGID_CPU with the non-tagging port */ + ocelot_write_rix(ocelot, BIT(port), ANA_PGID_PGID, PGID_CPU); + + ocelot_apply_bridge_fwd_mask(ocelot); +} + +static void felix_8021q_cpu_port_deinit(struct ocelot *ocelot, int port) +{ + ocelot->ports[port]->is_dsa_8021q_cpu = false; + + /* Restore PGID_CPU */ + ocelot_write_rix(ocelot, BIT(ocelot->num_phys_ports), ANA_PGID_PGID, + PGID_CPU); + + ocelot_apply_bridge_fwd_mask(ocelot); +} + +static int felix_setup_tag_8021q(struct dsa_switch *ds, int cpu) +{ + struct ocelot *ocelot = ds->priv; + struct felix *felix = ocelot_to_felix(ocelot); + unsigned long cpu_flood; + int port, err; + + felix_8021q_cpu_port_init(ocelot, cpu); + + for (port = 0; port < ds->num_ports; port++) { + if (dsa_is_unused_port(ds, port)) + continue; + + /* This overwrites ocelot_init(): + * Do not forward BPDU frames to the CPU port module, + * for 2 reasons: + * - When these packets are injected from the tag_8021q + * CPU port, we want them to go out, not loop back + * into the system. + * - STP traffic ingressing on a user port should go to + * the tag_8021q CPU port, not to the hardware CPU + * port module. + */ + ocelot_write_gix(ocelot, + ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA(0), + ANA_PORT_CPU_FWD_BPDU_CFG, port); + } + + /* In tag_8021q mode, the CPU port module is unused. So we + * want to disable flooding of any kind to the CPU port module, + * since packets going there will end in a black hole. + */ + cpu_flood = ANA_PGID_PGID_PGID(BIT(ocelot->num_phys_ports)); + ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_UC); + ocelot_rmw_rix(ocelot, 0, cpu_flood, ANA_PGID_PGID, PGID_MC); + + felix->dsa_8021q_ctx = kzalloc(sizeof(*felix->dsa_8021q_ctx), + GFP_KERNEL); + if (!felix->dsa_8021q_ctx) + return -ENOMEM; + + felix->dsa_8021q_ctx->ops = &felix_tag_8021q_ops; + felix->dsa_8021q_ctx->proto = htons(ETH_P_8021AD); + felix->dsa_8021q_ctx->ds = ds; + + err = dsa_8021q_setup(felix->dsa_8021q_ctx, true); + if (err) + goto out_free_dsa_8021_ctx; + + return 0; + +out_free_dsa_8021_ctx: + kfree(felix->dsa_8021q_ctx); + return err; +} + +static void felix_teardown_tag_8021q(struct dsa_switch *ds, int cpu) +{ + struct ocelot *ocelot = ds->priv; + struct felix *felix = ocelot_to_felix(ocelot); + int err, port; + + err = dsa_8021q_setup(felix->dsa_8021q_ctx, false); + if (err) + dev_err(ds->dev, "dsa_8021q_setup returned %d", err); + + kfree(felix->dsa_8021q_ctx); + + for (port = 0; port < ds->num_ports; port++) { + if (dsa_is_unused_port(ds, port)) + continue; + + /* Restore the logic from ocelot_init: + * do not forward BPDU frames to the front ports. + */ + ocelot_write_gix(ocelot, + ANA_PORT_CPU_FWD_BPDU_CFG_BPDU_REDIR_ENA(0xffff), + ANA_PORT_CPU_FWD_BPDU_CFG, + port); + } + + felix_8021q_cpu_port_deinit(ocelot, cpu); +} + /* The CPU port module is connected to the Node Processor Interface (NPI). This * is the mode through which frames can be injected from and extracted to an * external CPU, over Ethernet. In NXP SoCs, the "external CPU" is the ARM CPU @@ -107,6 +431,9 @@ static int felix_set_tag_protocol(struct dsa_switch *ds, int cpu, case DSA_TAG_PROTO_OCELOT: err = felix_setup_tag_npi(ds, cpu); break; + case DSA_TAG_PROTO_OCELOT_8021Q: + err = felix_setup_tag_8021q(ds, cpu); + break; default: err = -EPROTONOSUPPORT; } @@ -121,11 +448,18 @@ static void felix_del_tag_protocol(struct dsa_switch *ds, int cpu, case DSA_TAG_PROTO_OCELOT: felix_teardown_tag_npi(ds, cpu); break; + case DSA_TAG_PROTO_OCELOT_8021Q: + felix_teardown_tag_8021q(ds, cpu); + break; default: break; } } +/* This always leaves the switch in a consistent state, because although the + * tag_8021q setup can fail, the NPI setup can't. So either the change is made, + * or the restoration is guaranteed to work. + */ static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu, enum dsa_tag_protocol proto) { @@ -134,7 +468,8 @@ static int felix_change_tag_protocol(struct dsa_switch *ds, int cpu, enum dsa_tag_protocol old_proto = felix->tag_proto; int err; - if (proto != DSA_TAG_PROTO_OCELOT) + if (proto != DSA_TAG_PROTO_OCELOT && + proto != DSA_TAG_PROTO_OCELOT_8021Q) return -EPROTONOSUPPORT; felix_del_tag_protocol(ds, cpu, old_proto); diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 264b3bbdc4d1f..9d4459f2fffb8 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -48,6 +48,7 @@ struct felix { struct lynx_pcs **pcs; resource_size_t switch_base; resource_size_t imdio_base; + struct dsa_8021q_context *dsa_8021q_ctx; enum dsa_tag_protocol tag_proto; }; diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 714165c2f85ab..5f21799ad85bc 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -889,18 +889,60 @@ int ocelot_get_ts_info(struct ocelot *ocelot, int port, } EXPORT_SYMBOL(ocelot_get_ts_info); -static void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot) +static u32 ocelot_get_dsa_8021q_cpu_mask(struct ocelot *ocelot) { + u32 mask = 0; int port; + for (port = 0; port < ocelot->num_phys_ports; port++) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; + + if (!ocelot_port) + continue; + + if (ocelot_port->is_dsa_8021q_cpu) + mask |= BIT(port); + } + + return mask; +} + +void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot) +{ + unsigned long cpu_fwd_mask; + int port; + + /* If a DSA tag_8021q CPU exists, it needs to be included in the + * regular forwarding path of the front ports regardless of whether + * those are bridged or standalone. + * If DSA tag_8021q is not used, this returns 0, which is fine because + * the hardware-based CPU port module can be a destination for packets + * even if it isn't part of PGID_SRC. + */ + cpu_fwd_mask = ocelot_get_dsa_8021q_cpu_mask(ocelot); + /* Apply FWD mask. The loop is needed to add/remove the current port as * a source for the other ports. */ for (port = 0; port < ocelot->num_phys_ports; port++) { - if (ocelot->bridge_fwd_mask & BIT(port)) { - unsigned long mask = ocelot->bridge_fwd_mask & ~BIT(port); + struct ocelot_port *ocelot_port = ocelot->ports[port]; + unsigned long mask; + + if (!ocelot_port) { + /* Unused ports can't send anywhere */ + mask = 0; + } else if (ocelot_port->is_dsa_8021q_cpu) { + /* The DSA tag_8021q CPU ports need to be able to + * forward packets to all other ports except for + * themselves + */ + mask = GENMASK(ocelot->num_phys_ports - 1, 0); + mask &= ~cpu_fwd_mask; + } else if (ocelot->bridge_fwd_mask & BIT(port)) { int lag; + mask = ocelot->bridge_fwd_mask & ~BIT(port); + for (lag = 0; lag < ocelot->num_phys_ports; lag++) { unsigned long bond_mask = ocelot->lags[lag]; @@ -912,15 +954,18 @@ static void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot) break; } } - - ocelot_write_rix(ocelot, mask, - ANA_PGID_PGID, PGID_SRC + port); } else { - ocelot_write_rix(ocelot, 0, - ANA_PGID_PGID, PGID_SRC + port); + /* Standalone ports forward only to DSA tag_8021q CPU + * ports (if those exist), or to the hardware CPU port + * module otherwise. + */ + mask = cpu_fwd_mask; } + + ocelot_write_rix(ocelot, mask, ANA_PGID_PGID, PGID_SRC + port); } } +EXPORT_SYMBOL(ocelot_apply_bridge_fwd_mask); void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state) { diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index b82fd4103a685..37a232911395d 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -1009,6 +1009,7 @@ ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int cookie, return NULL; } +EXPORT_SYMBOL(ocelot_vcap_block_find_filter_by_id); /* If @on=false, then SNAP, ARP, IP and OAM frames will not match on keys based * on destination and source MAC addresses, but only on higher-level protocol diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.h b/drivers/net/ethernet/mscc/ocelot_vcap.h index 3b0c7916056e9..523611ccc48fd 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.h +++ b/drivers/net/ethernet/mscc/ocelot_vcap.h @@ -14,9 +14,6 @@ int ocelot_vcap_filter_stats_update(struct ocelot *ocelot, struct ocelot_vcap_filter *rule); -struct ocelot_vcap_filter * -ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int id, - bool tc_offload); void ocelot_detect_vcap_constants(struct ocelot *ocelot); int ocelot_vcap_init(struct ocelot *ocelot); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 93c22627dedd3..6a61c499a30d1 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -610,6 +610,7 @@ struct ocelot_port { phy_interface_t phy_mode; u8 *xmit_template; + bool is_dsa_8021q_cpu; }; struct ocelot { @@ -760,6 +761,7 @@ void ocelot_adjust_link(struct ocelot *ocelot, int port, struct phy_device *phydev); int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled); void ocelot_bridge_stp_state_set(struct ocelot *ocelot, int port, u8 state); +void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot); int ocelot_port_bridge_join(struct ocelot *ocelot, int port, struct net_device *bridge); int ocelot_port_bridge_leave(struct ocelot *ocelot, int port, diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index 76e01c927e17a..25fd525aaf928 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -693,5 +693,8 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot, struct netlink_ext_ack *extack); int ocelot_vcap_filter_del(struct ocelot *ocelot, struct ocelot_vcap_filter *rule); +struct ocelot_vcap_filter * +ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block, int id, + bool tc_offload); #endif /* _OCELOT_VCAP_H_ */ -- GitLab From 7d0888d52faa8cc8cde05643d7ed267adc74f03f Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 29 Jan 2021 13:51:41 +0200 Subject: [PATCH 1750/2012] net: bridge: mcast: drop hosts limit sysfs support We decided to stop adding new sysfs bridge options and continue with netlink only, so remove hosts limit sysfs support. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_sysfs_if.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index b66305fae26b0..7a59cdddd3ce3 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -16,7 +16,6 @@ #include #include "br_private.h" -#include "br_private_mcast_eht.h" struct brport_attribute { struct attribute attr; @@ -246,29 +245,6 @@ static int store_multicast_router(struct net_bridge_port *p, static BRPORT_ATTR(multicast_router, 0644, show_multicast_router, store_multicast_router); -static ssize_t show_multicast_eht_hosts_limit(struct net_bridge_port *p, - char *buf) -{ - return sprintf(buf, "%u\n", p->multicast_eht_hosts_limit); -} - -static int store_multicast_eht_hosts_limit(struct net_bridge_port *p, - unsigned long v) -{ - return br_multicast_eht_set_hosts_limit(p, v); -} -static BRPORT_ATTR(multicast_eht_hosts_limit, 0644, - show_multicast_eht_hosts_limit, - store_multicast_eht_hosts_limit); - -static ssize_t show_multicast_eht_hosts_cnt(struct net_bridge_port *p, - char *buf) -{ - return sprintf(buf, "%u\n", p->multicast_eht_hosts_cnt); -} -static BRPORT_ATTR(multicast_eht_hosts_cnt, 0444, show_multicast_eht_hosts_cnt, - NULL); - BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE); BRPORT_ATTR_FLAG(multicast_to_unicast, BR_MULTICAST_TO_UNICAST); #endif @@ -298,8 +274,6 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_multicast_router, &brport_attr_multicast_fast_leave, &brport_attr_multicast_to_unicast, - &brport_attr_multicast_eht_hosts_limit, - &brport_attr_multicast_eht_hosts_cnt, #endif &brport_attr_proxyarp, &brport_attr_proxyarp_wifi, -- GitLab From 1e16f382ae0ba0a244ebeea5783153f5c4f7e6c1 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 29 Jan 2021 13:51:42 +0200 Subject: [PATCH 1751/2012] net: bridge: add warning comments to avoid extending sysfs We're moving to netlink-only options, so add comments in the bridge's sysfs files to warn against adding any new sysfs entries. Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_sysfs_br.c | 4 ++++ net/bridge/br_sysfs_if.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 7db06e3f642a0..71f0f671c4ef4 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -19,6 +19,10 @@ #include "br_private.h" +/* IMPORTANT: new bridge options must be added with netlink support only + * please do not add new sysfs entries + */ + #define to_bridge(cd) ((struct net_bridge *)netdev_priv(to_net_dev(cd))) /* diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 7a59cdddd3ce3..96ff63cde1beb 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -17,6 +17,10 @@ #include "br_private.h" +/* IMPORTANT: new bridge port options must be added with netlink support only + * please do not add new sysfs entries + */ + struct brport_attribute { struct attribute attr; ssize_t (*show)(struct net_bridge_port *, char *); -- GitLab From 5399d52233c47905bbf97dcbaa2d7a9cc31670ba Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 29 Jan 2021 23:53:50 +0000 Subject: [PATCH 1752/2012] rxrpc: Fix deadlock around release of dst cached on udp tunnel AF_RXRPC sockets use UDP ports in encap mode. This causes socket and dst from an incoming packet to get stolen and attached to the UDP socket from whence it is leaked when that socket is closed. When a network namespace is removed, the wait for dst records to be cleaned up happens before the cleanup of the rxrpc and UDP socket, meaning that the wait never finishes. Fix this by moving the rxrpc (and, by dependence, the afs) private per-network namespace registrations to the device group rather than subsys group. This allows cached rxrpc local endpoints to be cleared and their UDP sockets closed before we try waiting for the dst records. The symptom is that lines looking like the following: unregister_netdevice: waiting for lo to become free get emitted at regular intervals after running something like the referenced syzbot test. Thanks to Vadim for tracking this down and work out the fix. Reported-by: syzbot+df400f2f24a1677cd7e0@syzkaller.appspotmail.com Reported-by: Vadim Fedorenko Fixes: 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook") Signed-off-by: David Howells Acked-by: Vadim Fedorenko Link: https://lore.kernel.org/r/161196443016.3868642.5577440140646403533.stgit@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- fs/afs/main.c | 6 +++--- net/rxrpc/af_rxrpc.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/afs/main.c b/fs/afs/main.c index accdd8970e7c0..b2975256dadbd 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -193,7 +193,7 @@ static int __init afs_init(void) goto error_cache; #endif - ret = register_pernet_subsys(&afs_net_ops); + ret = register_pernet_device(&afs_net_ops); if (ret < 0) goto error_net; @@ -213,7 +213,7 @@ static int __init afs_init(void) error_proc: afs_fs_exit(); error_fs: - unregister_pernet_subsys(&afs_net_ops); + unregister_pernet_device(&afs_net_ops); error_net: #ifdef CONFIG_AFS_FSCACHE fscache_unregister_netfs(&afs_cache_netfs); @@ -244,7 +244,7 @@ static void __exit afs_exit(void) proc_remove(afs_proc_symlink); afs_fs_exit(); - unregister_pernet_subsys(&afs_net_ops); + unregister_pernet_device(&afs_net_ops); #ifdef CONFIG_AFS_FSCACHE fscache_unregister_netfs(&afs_cache_netfs); #endif diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 0a2f4817ec6cf..41671af6b33f9 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -990,7 +990,7 @@ static int __init af_rxrpc_init(void) goto error_security; } - ret = register_pernet_subsys(&rxrpc_net_ops); + ret = register_pernet_device(&rxrpc_net_ops); if (ret) goto error_pernet; @@ -1035,7 +1035,7 @@ error_key_type: error_sock: proto_unregister(&rxrpc_proto); error_proto: - unregister_pernet_subsys(&rxrpc_net_ops); + unregister_pernet_device(&rxrpc_net_ops); error_pernet: rxrpc_exit_security(); error_security: @@ -1057,7 +1057,7 @@ static void __exit af_rxrpc_exit(void) unregister_key_type(&key_type_rxrpc); sock_unregister(PF_RXRPC); proto_unregister(&rxrpc_proto); - unregister_pernet_subsys(&rxrpc_net_ops); + unregister_pernet_device(&rxrpc_net_ops); ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0); ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0); -- GitLab From 14e8e0f6008865d823a8184a276702a6c3cbef3d Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 29 Jan 2021 13:54:38 -0500 Subject: [PATCH 1753/2012] tcp: shrink inet_connection_sock icsk_mtup enabled and probe_size This commit shrinks inet_connection_sock by 4 bytes, by shrinking icsk_mtup.enabled from 32 bits to 1 bit, and shrinking icsk_mtup.probe_size from s32 to an unsuigned 31 bit field. This is to save space to compensate for the recent introduction of a new u32 in inet_connection_sock, icsk_probes_tstamp, in the recent bug fix commit 9d9b1ee0b2d1 ("tcp: fix TCP_USER_TIMEOUT with zero window"). This should not change functionality, since icsk_mtup.enabled is only ever set to 0 or 1, and icsk_mtup.probe_size can only be either 0 or a positive MTU value returned by tcp_mss_to_mtu() Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20210129185438.1813237-1-ncardwell.kernel@gmail.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c11f80f328f1d..10a625760de91 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -120,14 +120,14 @@ struct inet_connection_sock { __u16 rcv_mss; /* MSS used for delayed ACK decisions */ } icsk_ack; struct { - int enabled; - /* Range of MTUs to search */ int search_high; int search_low; /* Information on the current probe. */ - int probe_size; + u32 probe_size:31, + /* Is the MTUP feature enabled for this connection? */ + enabled:1; u32 probe_timestamp; } icsk_mtup; -- GitLab From eb4e8fac00d1e01ada5e57c05d24739156086677 Mon Sep 17 00:00:00 2001 From: Chinmay Agarwal Date: Wed, 27 Jan 2021 22:24:54 +0530 Subject: [PATCH 1754/2012] neighbour: Prevent a dead entry from updating gc_list Following race condition was detected: - neigh_flush_dev() is under execution and calls neigh_mark_dead(n) marking the neighbour entry 'n' as dead. - Executing: __netif_receive_skb() -> __netif_receive_skb_core() -> arp_rcv() -> arp_process().arp_process() calls __neigh_lookup() which takes a reference on neighbour entry 'n'. - Moves further along neigh_flush_dev() and calls neigh_cleanup_and_release(n), but since reference count increased in t2, 'n' couldn't be destroyed. - Moves further along, arp_process() and calls neigh_update()-> __neigh_update() -> neigh_update_gc_list(), which adds the neighbour entry back in gc_list(neigh_mark_dead(), removed it earlier in t0 from gc_list) - arp_process() finally calls neigh_release(n), destroying the neighbour entry. This leads to 'n' still being part of gc_list, but the actual neighbour structure has been freed. The situation can be prevented from happening if we disallow a dead entry to have any possibility of updating gc_list. This is what the patch intends to achieve. Fixes: 9c29a2f55ec0 ("neighbor: Fix locking order for gc_list changes") Signed-off-by: Chinmay Agarwal Reviewed-by: Cong Wang Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210127165453.GA20514@chinagar-linux.qualcomm.com Signed-off-by: Jakub Kicinski --- net/core/neighbour.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 277ed854aef1c..6d2d557442dc6 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1245,13 +1245,14 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, old = neigh->nud_state; err = -EPERM; - if (!(flags & NEIGH_UPDATE_F_ADMIN) && - (old & (NUD_NOARP | NUD_PERMANENT))) - goto out; if (neigh->dead) { NL_SET_ERR_MSG(extack, "Neighbor entry is now dead"); + new = old; goto out; } + if (!(flags & NEIGH_UPDATE_F_ADMIN) && + (old & (NUD_NOARP | NUD_PERMANENT))) + goto out; ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); -- GitLab From 4e04b11800194f2ec756b5f3e9f2e559df5a0b1e Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Wed, 16 Dec 2020 21:16:53 +0800 Subject: [PATCH 1755/2012] leds: leds-lm3533: convert comma to semicolon Replace a comma between expression statements by a semicolon. Signed-off-by: Zheng Yongjun Signed-off-by: Pavel Machek --- drivers/leds/leds-lm3533.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/leds/leds-lm3533.c b/drivers/leds/leds-lm3533.c index b3edee7031931..9dd205870525c 100644 --- a/drivers/leds/leds-lm3533.c +++ b/drivers/leds/leds-lm3533.c @@ -679,7 +679,7 @@ static int lm3533_led_probe(struct platform_device *pdev) led->cdev.brightness_get = lm3533_led_get; led->cdev.blink_set = lm3533_led_blink_set; led->cdev.brightness = LED_OFF; - led->cdev.groups = lm3533_led_attribute_groups, + led->cdev.groups = lm3533_led_attribute_groups; led->id = pdev->id; mutex_init(&led->mutex); -- GitLab From 47854d2d2ba8f100c419557a7d9d8f155c0a1064 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Mon, 14 Dec 2020 21:45:45 +0800 Subject: [PATCH 1756/2012] leds: leds-ariel: convert comma to semicolon Replace a comma between expression statements by a semicolon. Signed-off-by: Zheng Yongjun Reviewed-by: Alexander Dahl Signed-off-by: Pavel Machek --- drivers/leds/leds-ariel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/leds/leds-ariel.c b/drivers/leds/leds-ariel.c index bb68ba23a7d44..49e1bddaa15e0 100644 --- a/drivers/leds/leds-ariel.c +++ b/drivers/leds/leds-ariel.c @@ -96,14 +96,14 @@ static int ariel_led_probe(struct platform_device *pdev) return -ENOMEM; leds[0].ec_index = EC_BLUE_LED; - leds[0].led_cdev.name = "blue:power", + leds[0].led_cdev.name = "blue:power"; leds[0].led_cdev.default_trigger = "default-on"; leds[1].ec_index = EC_AMBER_LED; - leds[1].led_cdev.name = "amber:status", + leds[1].led_cdev.name = "amber:status"; leds[2].ec_index = EC_GREEN_LED; - leds[2].led_cdev.name = "green:status", + leds[2].led_cdev.name = "green:status"; leds[2].led_cdev.default_trigger = "default-on"; for (i = 0; i < NLEDS; i++) { -- GitLab From 27af8e2c90fba242460b01fa020e6e19ed68c495 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Wed, 25 Nov 2020 16:18:22 +0100 Subject: [PATCH 1757/2012] leds: trigger: fix potential deadlock with libata We have the following potential deadlock condition: ======================================================== WARNING: possible irq lock inversion dependency detected 5.10.0-rc2+ #25 Not tainted -------------------------------------------------------- swapper/3/0 just changed the state of lock: ffff8880063bd618 (&host->lock){-...}-{2:2}, at: ata_bmdma_interrupt+0x27/0x200 but this lock took another, HARDIRQ-READ-unsafe lock in the past: (&trig->leddev_list_lock){.+.?}-{2:2} and interrupts could create inverse lock ordering between them. other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&trig->leddev_list_lock); local_irq_disable(); lock(&host->lock); lock(&trig->leddev_list_lock); lock(&host->lock); *** DEADLOCK *** no locks held by swapper/3/0. the shortest dependencies between 2nd lock and 1st lock: -> (&trig->leddev_list_lock){.+.?}-{2:2} ops: 46 { HARDIRQ-ON-R at: lock_acquire+0x15f/0x420 _raw_read_lock+0x42/0x90 led_trigger_event+0x2b/0x70 rfkill_global_led_trigger_worker+0x94/0xb0 process_one_work+0x240/0x560 worker_thread+0x58/0x3d0 kthread+0x151/0x170 ret_from_fork+0x1f/0x30 IN-SOFTIRQ-R at: lock_acquire+0x15f/0x420 _raw_read_lock+0x42/0x90 led_trigger_event+0x2b/0x70 kbd_bh+0x9e/0xc0 tasklet_action_common.constprop.0+0xe9/0x100 tasklet_action+0x22/0x30 __do_softirq+0xcc/0x46d run_ksoftirqd+0x3f/0x70 smpboot_thread_fn+0x116/0x1f0 kthread+0x151/0x170 ret_from_fork+0x1f/0x30 SOFTIRQ-ON-R at: lock_acquire+0x15f/0x420 _raw_read_lock+0x42/0x90 led_trigger_event+0x2b/0x70 rfkill_global_led_trigger_worker+0x94/0xb0 process_one_work+0x240/0x560 worker_thread+0x58/0x3d0 kthread+0x151/0x170 ret_from_fork+0x1f/0x30 INITIAL READ USE at: lock_acquire+0x15f/0x420 _raw_read_lock+0x42/0x90 led_trigger_event+0x2b/0x70 rfkill_global_led_trigger_worker+0x94/0xb0 process_one_work+0x240/0x560 worker_thread+0x58/0x3d0 kthread+0x151/0x170 ret_from_fork+0x1f/0x30 } ... key at: [] __key.0+0x0/0x10 ... acquired at: _raw_read_lock+0x42/0x90 led_trigger_blink_oneshot+0x3b/0x90 ledtrig_disk_activity+0x3c/0xa0 ata_qc_complete+0x26/0x450 ata_do_link_abort+0xa3/0xe0 ata_port_freeze+0x2e/0x40 ata_hsm_qc_complete+0x94/0xa0 ata_sff_hsm_move+0x177/0x7a0 ata_sff_pio_task+0xc7/0x1b0 process_one_work+0x240/0x560 worker_thread+0x58/0x3d0 kthread+0x151/0x170 ret_from_fork+0x1f/0x30 -> (&host->lock){-...}-{2:2} ops: 69 { IN-HARDIRQ-W at: lock_acquire+0x15f/0x420 _raw_spin_lock_irqsave+0x52/0xa0 ata_bmdma_interrupt+0x27/0x200 __handle_irq_event_percpu+0xd5/0x2b0 handle_irq_event+0x57/0xb0 handle_edge_irq+0x8c/0x230 asm_call_irq_on_stack+0xf/0x20 common_interrupt+0x100/0x1c0 asm_common_interrupt+0x1e/0x40 native_safe_halt+0xe/0x10 arch_cpu_idle+0x15/0x20 default_idle_call+0x59/0x1c0 do_idle+0x22c/0x2c0 cpu_startup_entry+0x20/0x30 start_secondary+0x11d/0x150 secondary_startup_64_no_verify+0xa6/0xab INITIAL USE at: lock_acquire+0x15f/0x420 _raw_spin_lock_irqsave+0x52/0xa0 ata_dev_init+0x54/0xe0 ata_link_init+0x8b/0xd0 ata_port_alloc+0x1f1/0x210 ata_host_alloc+0xf1/0x130 ata_host_alloc_pinfo+0x14/0xb0 ata_pci_sff_prepare_host+0x41/0xa0 ata_pci_bmdma_prepare_host+0x14/0x30 piix_init_one+0x21f/0x600 local_pci_probe+0x48/0x80 pci_device_probe+0x105/0x1c0 really_probe+0x221/0x490 driver_probe_device+0xe9/0x160 device_driver_attach+0xb2/0xc0 __driver_attach+0x91/0x150 bus_for_each_dev+0x81/0xc0 driver_attach+0x1e/0x20 bus_add_driver+0x138/0x1f0 driver_register+0x91/0xf0 __pci_register_driver+0x73/0x80 piix_init+0x1e/0x2e do_one_initcall+0x5f/0x2d0 kernel_init_freeable+0x26f/0x2cf kernel_init+0xe/0x113 ret_from_fork+0x1f/0x30 } ... key at: [] __key.6+0x0/0x10 ... acquired at: __lock_acquire+0x9da/0x2370 lock_acquire+0x15f/0x420 _raw_spin_lock_irqsave+0x52/0xa0 ata_bmdma_interrupt+0x27/0x200 __handle_irq_event_percpu+0xd5/0x2b0 handle_irq_event+0x57/0xb0 handle_edge_irq+0x8c/0x230 asm_call_irq_on_stack+0xf/0x20 common_interrupt+0x100/0x1c0 asm_common_interrupt+0x1e/0x40 native_safe_halt+0xe/0x10 arch_cpu_idle+0x15/0x20 default_idle_call+0x59/0x1c0 do_idle+0x22c/0x2c0 cpu_startup_entry+0x20/0x30 start_secondary+0x11d/0x150 secondary_startup_64_no_verify+0xa6/0xab This lockdep splat is reported after: commit e918188611f0 ("locking: More accurate annotations for read_lock()") To clarify: - read-locks are recursive only in interrupt context (when in_interrupt() returns true) - after acquiring host->lock in CPU1, another cpu (i.e. CPU2) may call write_lock(&trig->leddev_list_lock) that would be blocked by CPU0 that holds trig->leddev_list_lock in read-mode - when CPU1 (ata_ac_complete()) tries to read-lock trig->leddev_list_lock, it would be blocked by the write-lock waiter on CPU2 (because we are not in interrupt context, so the read-lock is not recursive) - at this point if an interrupt happens on CPU0 and ata_bmdma_interrupt() is executed it will try to acquire host->lock, that is held by CPU1, that is currently blocked by CPU2, so: * CPU0 blocked by CPU1 * CPU1 blocked by CPU2 * CPU2 blocked by CPU0 *** DEADLOCK *** The deadlock scenario is better represented by the following schema (thanks to Boqun Feng for the schema and the detailed explanation of the deadlock condition): CPU 0: CPU 1: CPU 2: ----- ----- ----- led_trigger_event(): read_lock(&trig->leddev_list_lock); ata_hsm_qc_complete(): spin_lock_irqsave(&host->lock); write_lock(&trig->leddev_list_lock); ata_port_freeze(): ata_do_link_abort(): ata_qc_complete(): ledtrig_disk_activity(): led_trigger_blink_oneshot(): read_lock(&trig->leddev_list_lock); // ^ not in in_interrupt() context, so could get blocked by CPU 2 ata_bmdma_interrupt(): spin_lock_irqsave(&host->lock); Fix by using read_lock_irqsave/irqrestore() in led_trigger_event(), so that no interrupt can happen in between, preventing the deadlock condition. Apply the same change to led_trigger_blink_setup() as well, since the same deadlock scenario can also happen in power_supply_update_bat_leds() -> led_trigger_blink() -> led_trigger_blink_setup() (workqueue context), and potentially prevent other similar usages. Link: https://lore.kernel.org/lkml/20201101092614.GB3989@xps-13-7390/ Fixes: eb25cb9956cc ("leds: convert IDE trigger to common disk trigger") Signed-off-by: Andrea Righi Signed-off-by: Pavel Machek --- drivers/leds/led-triggers.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c index 91da90cfb11d9..4e7b78a84149b 100644 --- a/drivers/leds/led-triggers.c +++ b/drivers/leds/led-triggers.c @@ -378,14 +378,15 @@ void led_trigger_event(struct led_trigger *trig, enum led_brightness brightness) { struct led_classdev *led_cdev; + unsigned long flags; if (!trig) return; - read_lock(&trig->leddev_list_lock); + read_lock_irqsave(&trig->leddev_list_lock, flags); list_for_each_entry(led_cdev, &trig->led_cdevs, trig_list) led_set_brightness(led_cdev, brightness); - read_unlock(&trig->leddev_list_lock); + read_unlock_irqrestore(&trig->leddev_list_lock, flags); } EXPORT_SYMBOL_GPL(led_trigger_event); @@ -396,11 +397,12 @@ static void led_trigger_blink_setup(struct led_trigger *trig, int invert) { struct led_classdev *led_cdev; + unsigned long flags; if (!trig) return; - read_lock(&trig->leddev_list_lock); + read_lock_irqsave(&trig->leddev_list_lock, flags); list_for_each_entry(led_cdev, &trig->led_cdevs, trig_list) { if (oneshot) led_blink_set_oneshot(led_cdev, delay_on, delay_off, @@ -408,7 +410,7 @@ static void led_trigger_blink_setup(struct led_trigger *trig, else led_blink_set(led_cdev, delay_on, delay_off); } - read_unlock(&trig->leddev_list_lock); + read_unlock_irqrestore(&trig->leddev_list_lock, flags); } void led_trigger_blink(struct led_trigger *trig, -- GitLab From c8283eb79d879ef898f4224ba30e554f83904b0a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 3 Jan 2021 00:15:09 +0100 Subject: [PATCH 1758/2012] dt-bindings: leds: Add DT binding for Richtek RT8515 Add a YAML devicetree binding for the Richtek RT8515 dual channel flash/torch LED driver. Cc: Sakari Ailus Cc: newbytee@protonmail.com Cc: Stephan Gerhold Cc: phone-devel@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: devicetree@vger.kernel.org Reviewed-by: Rob Herring Reviewed-by: Sakari Ailus Signed-off-by: Linus Walleij Signed-off-by: Pavel Machek --- .../bindings/leds/richtek,rt8515.yaml | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 Documentation/devicetree/bindings/leds/richtek,rt8515.yaml diff --git a/Documentation/devicetree/bindings/leds/richtek,rt8515.yaml b/Documentation/devicetree/bindings/leds/richtek,rt8515.yaml new file mode 100644 index 0000000000000..68c328eec03be --- /dev/null +++ b/Documentation/devicetree/bindings/leds/richtek,rt8515.yaml @@ -0,0 +1,111 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/leds/richtek,rt8515.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Richtek RT8515 1.5A dual channel LED driver + +maintainers: + - Linus Walleij + +description: | + The Richtek RT8515 is a dual channel (two mode) LED driver that + supports driving a white LED in flash or torch mode. The maximum + current for each mode is defined in hardware using two resistors + RFS and RTS. + +properties: + compatible: + const: richtek,rt8515 + + enf-gpios: + maxItems: 1 + description: A connection to the 'ENF' (enable flash) pin. + + ent-gpios: + maxItems: 1 + description: A connection to the 'ENT' (enable torch) pin. + + richtek,rfs-ohms: + minimum: 7680 + maximum: 367000 + description: The resistance value of the RFS resistor. This + resistors limits the maximum flash current. This must be set + for the property flash-max-microamp to work, the RFS resistor + defines the range of the dimmer setting (brightness) of the + flash LED. + + richtek,rts-ohms: + minimum: 7680 + maximum: 367000 + description: The resistance value of the RTS resistor. This + resistors limits the maximum torch current. This must be set + for the property torch-max-microamp to work, the RTS resistor + defines the range of the dimmer setting (brightness) of the + torch LED. + + led: + type: object + $ref: common.yaml# + properties: + function: true + color: true + flash-max-timeout-us: true + + flash-max-microamp: + maximum: 700000 + description: The maximum current for flash mode + is hardwired to the component using the RFS resistor to + ground. The maximum hardware current setting is calculated + according to the formula Imax = 5500 / RFS. The lowest + allowed resistance value is 7.86 kOhm giving an absolute + maximum current of 700mA. By setting this attribute in + the device tree, you can further restrict the maximum + current below the hardware limit. This requires the RFS + to be defined as it defines the maximum range. + + led-max-microamp: + maximum: 700000 + description: The maximum current for torch mode + is hardwired to the component using the RTS resistor to + ground. The maximum hardware current setting is calculated + according to the formula Imax = 5500 / RTS. The lowest + allowed resistance value is 7.86 kOhm giving an absolute + maximum current of 700mA. By setting this attribute in + the device tree, you can further restrict the maximum + current below the hardware limit. This requires the RTS + to be defined as it defines the maximum range. + + additionalProperties: false + +required: + - compatible + - ent-gpios + - enf-gpios + - led + +additionalProperties: false + +examples: + - | + #include + #include + + led-controller { + compatible = "richtek,rt8515"; + enf-gpios = <&gpio4 12 GPIO_ACTIVE_HIGH>; + ent-gpios = <&gpio4 13 GPIO_ACTIVE_HIGH>; + richtek,rfs-ohms = <16000>; + richtek,rts-ohms = <100000>; + + led { + function = LED_FUNCTION_FLASH; + color = ; + flash-max-timeout-us = <250000>; + flash-max-microamp = <150000>; + led-max-microamp = <25000>; + }; + }; + +... -- GitLab From e1c6edcbea13de025c3406645b4cce4ac3baf973 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 3 Jan 2021 00:15:10 +0100 Subject: [PATCH 1759/2012] leds: rt8515: Add Richtek RT8515 LED driver This adds a driver for the Richtek RT8515 dual channel torch/flash white LED driver. This LED driver is found in some mobile phones from Samsung such as the GT-S7710 and GT-I8190. A V4L interface is added. We do not have a proper datasheet for the RT8515 but it turns out that RT9387A has a public datasheet and is essentially the same chip. We designed the driver in accordance with this datasheet. The day someone needs to drive a RT9387A this driver can probably easily be augmented to handle that chip too. Sakari Ailus, Pavel Machek and Andy Shevchenko helped significantly in getting this driver right. Cc: Sakari Ailus Cc: newbytee@protonmail.com Cc: Stephan Gerhold Cc: linux-media@vger.kernel.org Cc: phone-devel@vger.kernel.org Reviewed-by: Sakari Ailus Signed-off-by: Linus Walleij Signed-off-by: Pavel Machek --- drivers/leds/Kconfig | 3 + drivers/leds/Makefile | 3 + drivers/leds/flash/Kconfig | 15 ++ drivers/leds/flash/Makefile | 3 + drivers/leds/flash/leds-rt8515.c | 397 +++++++++++++++++++++++++++++++ 5 files changed, 421 insertions(+) create mode 100644 drivers/leds/flash/Kconfig create mode 100644 drivers/leds/flash/Makefile create mode 100644 drivers/leds/flash/leds-rt8515.c diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 849d3c5f908e4..6c1d8b69a465a 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -928,6 +928,9 @@ config LEDS_ACER_A500 This option enables support for the Power Button LED of Acer Iconia Tab A500. +comment "Flash and Torch LED drivers" +source "drivers/leds/flash/Kconfig" + comment "LED Triggers" source "drivers/leds/trigger/Kconfig" diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index 73e603e1727e7..156c0b4e60d95 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -103,5 +103,8 @@ obj-$(CONFIG_LEDS_SPI_BYTE) += leds-spi-byte.o # LED Userspace Drivers obj-$(CONFIG_LEDS_USER) += uleds.o +# Flash and Torch LED Drivers +obj-$(CONFIG_LEDS_CLASS_FLASH) += flash/ + # LED Triggers obj-$(CONFIG_LEDS_TRIGGERS) += trigger/ diff --git a/drivers/leds/flash/Kconfig b/drivers/leds/flash/Kconfig new file mode 100644 index 0000000000000..d21d273ef3da2 --- /dev/null +++ b/drivers/leds/flash/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 + +if LEDS_CLASS_FLASH + +config LEDS_RT8515 + tristate "LED support for Richtek RT8515 flash/torch LED" + depends on GPIOLIB + help + This option enables support for the Richtek RT8515 flash + and torch LEDs found on some mobile phones. + + To compile this driver as a module, choose M here: the module + will be called leds-rt8515. + +endif # LEDS_CLASS_FLASH diff --git a/drivers/leds/flash/Makefile b/drivers/leds/flash/Makefile new file mode 100644 index 0000000000000..e990e257f4d75 --- /dev/null +++ b/drivers/leds/flash/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_LEDS_RT8515) += leds-rt8515.o diff --git a/drivers/leds/flash/leds-rt8515.c b/drivers/leds/flash/leds-rt8515.c new file mode 100644 index 0000000000000..590bfa180d104 --- /dev/null +++ b/drivers/leds/flash/leds-rt8515.c @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * LED driver for Richtek RT8515 flash/torch white LEDs + * found on some Samsung mobile phones. + * + * This is a 1.5A Boost dual channel driver produced around 2011. + * + * The component lacks a datasheet, but in the schematic picture + * from the LG P970 service manual you can see the connections + * from the RT8515 to the LED, with two resistors connected + * from the pins "RFS" and "RTS" to ground. + * + * On the LG P970: + * RFS (resistance flash setting?) is 20 kOhm + * RTS (resistance torch setting?) is 39 kOhm + * + * Some sleuthing finds us the RT9387A which we have a datasheet for: + * https://static5.arrow.com/pdfs/2014/7/27/8/21/12/794/rtt_/manual/94download_ds.jspprt9387a.jspprt9387a.pdf + * This apparently works the same way so in theory this driver + * should cover RT9387A as well. This has not been tested, please + * update the compatibles if you add RT9387A support. + * + * Linus Walleij + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* We can provide 15-700 mA out to the LED */ +#define RT8515_MIN_IOUT_MA 15 +#define RT8515_MAX_IOUT_MA 700 +/* The maximum intensity is 1-16 for flash and 1-100 for torch */ +#define RT8515_FLASH_MAX 16 +#define RT8515_TORCH_MAX 100 + +#define RT8515_TIMEOUT_US 250000U +#define RT8515_MAX_TIMEOUT_US 300000U + +struct rt8515 { + struct led_classdev_flash fled; + struct device *dev; + struct v4l2_flash *v4l2_flash; + struct mutex lock; + struct regulator *reg; + struct gpio_desc *enable_torch; + struct gpio_desc *enable_flash; + struct timer_list powerdown_timer; + u32 max_timeout; /* Flash max timeout */ + int flash_max_intensity; + int torch_max_intensity; +}; + +static struct rt8515 *to_rt8515(struct led_classdev_flash *fled) +{ + return container_of(fled, struct rt8515, fled); +} + +static void rt8515_gpio_led_off(struct rt8515 *rt) +{ + gpiod_set_value(rt->enable_flash, 0); + gpiod_set_value(rt->enable_torch, 0); +} + +static void rt8515_gpio_brightness_commit(struct gpio_desc *gpiod, + int brightness) +{ + int i; + + /* + * Toggling a GPIO line with a small delay increases the + * brightness one step at a time. + */ + for (i = 0; i < brightness; i++) { + gpiod_set_value(gpiod, 0); + udelay(1); + gpiod_set_value(gpiod, 1); + udelay(1); + } +} + +/* This is setting the torch light level */ +static int rt8515_led_brightness_set(struct led_classdev *led, + enum led_brightness brightness) +{ + struct led_classdev_flash *fled = lcdev_to_flcdev(led); + struct rt8515 *rt = to_rt8515(fled); + + mutex_lock(&rt->lock); + + if (brightness == LED_OFF) { + /* Off */ + rt8515_gpio_led_off(rt); + } else if (brightness < RT8515_TORCH_MAX) { + /* Step it up to movie mode brightness using the flash pin */ + rt8515_gpio_brightness_commit(rt->enable_torch, brightness); + } else { + /* Max torch brightness requested */ + gpiod_set_value(rt->enable_torch, 1); + } + + mutex_unlock(&rt->lock); + + return 0; +} + +static int rt8515_led_flash_strobe_set(struct led_classdev_flash *fled, + bool state) +{ + struct rt8515 *rt = to_rt8515(fled); + struct led_flash_setting *timeout = &fled->timeout; + int brightness = rt->flash_max_intensity; + + mutex_lock(&rt->lock); + + if (state) { + /* Enable LED flash mode and set brightness */ + rt8515_gpio_brightness_commit(rt->enable_flash, brightness); + /* Set timeout */ + mod_timer(&rt->powerdown_timer, + jiffies + usecs_to_jiffies(timeout->val)); + } else { + del_timer_sync(&rt->powerdown_timer); + /* Turn the LED off */ + rt8515_gpio_led_off(rt); + } + + fled->led_cdev.brightness = LED_OFF; + /* After this the torch LED will be disabled */ + + mutex_unlock(&rt->lock); + + return 0; +} + +static int rt8515_led_flash_strobe_get(struct led_classdev_flash *fled, + bool *state) +{ + struct rt8515 *rt = to_rt8515(fled); + + *state = timer_pending(&rt->powerdown_timer); + + return 0; +} + +static int rt8515_led_flash_timeout_set(struct led_classdev_flash *fled, + u32 timeout) +{ + /* The timeout is stored in the led-class-flash core */ + return 0; +} + +static const struct led_flash_ops rt8515_flash_ops = { + .strobe_set = rt8515_led_flash_strobe_set, + .strobe_get = rt8515_led_flash_strobe_get, + .timeout_set = rt8515_led_flash_timeout_set, +}; + +static void rt8515_powerdown_timer(struct timer_list *t) +{ + struct rt8515 *rt = from_timer(rt, t, powerdown_timer); + + /* Turn the LED off */ + rt8515_gpio_led_off(rt); +} + +static void rt8515_init_flash_timeout(struct rt8515 *rt) +{ + struct led_classdev_flash *fled = &rt->fled; + struct led_flash_setting *s; + + /* Init flash timeout setting */ + s = &fled->timeout; + s->min = 1; + s->max = rt->max_timeout; + s->step = 1; + /* + * Set default timeout to RT8515_TIMEOUT_US except if + * max_timeout from DT is lower. + */ + s->val = min(rt->max_timeout, RT8515_TIMEOUT_US); +} + +#if IS_ENABLED(CONFIG_V4L2_FLASH_LED_CLASS) +/* Configure the V2L2 flash subdevice */ +static void rt8515_init_v4l2_flash_config(struct rt8515 *rt, + struct v4l2_flash_config *v4l2_sd_cfg) +{ + struct led_classdev *led = &rt->fled.led_cdev; + struct led_flash_setting *s; + + strscpy(v4l2_sd_cfg->dev_name, led->dev->kobj.name, + sizeof(v4l2_sd_cfg->dev_name)); + + /* + * Init flash intensity setting: this is a linear scale + * capped from the device tree max intensity setting + * 1..flash_max_intensity + */ + s = &v4l2_sd_cfg->intensity; + s->min = 1; + s->max = rt->flash_max_intensity; + s->step = 1; + s->val = s->max; +} + +static void rt8515_v4l2_flash_release(struct rt8515 *rt) +{ + v4l2_flash_release(rt->v4l2_flash); +} + +#else +static void rt8515_init_v4l2_flash_config(struct rt8515 *rt, + struct v4l2_flash_config *v4l2_sd_cfg) +{ +} + +static void rt8515_v4l2_flash_release(struct rt8515 *rt) +{ +} +#endif + +static void rt8515_determine_max_intensity(struct rt8515 *rt, + struct fwnode_handle *led, + const char *resistance, + const char *max_ua_prop, int hw_max, + int *max_intensity_setting) +{ + u32 res = 0; /* Can't be 0 so 0 is undefined */ + u32 ua; + u32 max_ma; + int max_intensity; + int ret; + + fwnode_property_read_u32(rt->dev->fwnode, resistance, &res); + ret = fwnode_property_read_u32(led, max_ua_prop, &ua); + + /* Missing info in DT, OK go with hardware maxima */ + if (ret || res == 0) { + dev_err(rt->dev, + "either %s or %s missing from DT, using HW max\n", + resistance, max_ua_prop); + max_ma = RT8515_MAX_IOUT_MA; + max_intensity = hw_max; + goto out_assign_max; + } + + /* + * Formula from the datasheet, this is the maximum current + * defined by the hardware. + */ + max_ma = (5500 * 1000) / res; + /* + * Calculate max intensity (linear scaling) + * Formula is ((ua / 1000) / max_ma) * 100, then simplified + */ + max_intensity = (ua / 10) / max_ma; + + dev_info(rt->dev, + "current restricted from %u to %u mA, max intensity %d/100\n", + max_ma, (ua / 1000), max_intensity); + +out_assign_max: + dev_info(rt->dev, "max intensity %d/%d = %d mA\n", + max_intensity, hw_max, max_ma); + *max_intensity_setting = max_intensity; +} + +static int rt8515_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct fwnode_handle *child; + struct rt8515 *rt; + struct led_classdev *led; + struct led_classdev_flash *fled; + struct led_init_data init_data = {}; + struct v4l2_flash_config v4l2_sd_cfg = {}; + int ret; + + rt = devm_kzalloc(dev, sizeof(*rt), GFP_KERNEL); + if (!rt) + return -ENOMEM; + + rt->dev = dev; + fled = &rt->fled; + led = &fled->led_cdev; + + /* ENF - Enable Flash line */ + rt->enable_flash = devm_gpiod_get(dev, "enf", GPIOD_OUT_LOW); + if (IS_ERR(rt->enable_flash)) + return dev_err_probe(dev, PTR_ERR(rt->enable_flash), + "cannot get ENF (enable flash) GPIO\n"); + + /* ENT - Enable Torch line */ + rt->enable_torch = devm_gpiod_get(dev, "ent", GPIOD_OUT_LOW); + if (IS_ERR(rt->enable_torch)) + return dev_err_probe(dev, PTR_ERR(rt->enable_torch), + "cannot get ENT (enable torch) GPIO\n"); + + child = fwnode_get_next_available_child_node(dev->fwnode, NULL); + if (!child) { + dev_err(dev, + "No fwnode child node found for connected LED.\n"); + return -EINVAL; + } + init_data.fwnode = child; + + rt8515_determine_max_intensity(rt, child, "richtek,rfs-ohms", + "flash-max-microamp", + RT8515_FLASH_MAX, + &rt->flash_max_intensity); + rt8515_determine_max_intensity(rt, child, "richtek,rts-ohms", + "led-max-microamp", + RT8515_TORCH_MAX, + &rt->torch_max_intensity); + + ret = fwnode_property_read_u32(child, "flash-max-timeout-us", + &rt->max_timeout); + if (ret) { + rt->max_timeout = RT8515_MAX_TIMEOUT_US; + dev_warn(dev, + "flash-max-timeout-us property missing\n"); + } + timer_setup(&rt->powerdown_timer, rt8515_powerdown_timer, 0); + rt8515_init_flash_timeout(rt); + + fled->ops = &rt8515_flash_ops; + + led->max_brightness = rt->torch_max_intensity; + led->brightness_set_blocking = rt8515_led_brightness_set; + led->flags |= LED_CORE_SUSPENDRESUME | LED_DEV_CAP_FLASH; + + mutex_init(&rt->lock); + + platform_set_drvdata(pdev, rt); + + ret = devm_led_classdev_flash_register_ext(dev, fled, &init_data); + if (ret) { + dev_err(dev, "can't register LED %s\n", led->name); + mutex_destroy(&rt->lock); + return ret; + } + + rt8515_init_v4l2_flash_config(rt, &v4l2_sd_cfg); + + /* Create a V4L2 Flash device if V4L2 flash is enabled */ + rt->v4l2_flash = v4l2_flash_init(dev, child, fled, NULL, &v4l2_sd_cfg); + if (IS_ERR(rt->v4l2_flash)) { + ret = PTR_ERR(rt->v4l2_flash); + dev_err(dev, "failed to register V4L2 flash device (%d)\n", + ret); + /* + * Continue without the V4L2 flash + * (we still have the classdev) + */ + } + + return 0; +} + +static int rt8515_remove(struct platform_device *pdev) +{ + struct rt8515 *rt = platform_get_drvdata(pdev); + + rt8515_v4l2_flash_release(rt); + del_timer_sync(&rt->powerdown_timer); + mutex_destroy(&rt->lock); + + return 0; +} + +static const struct of_device_id rt8515_match[] = { + { .compatible = "richtek,rt8515", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, rt8515_match); + +static struct platform_driver rt8515_driver = { + .driver = { + .name = "rt8515", + .of_match_table = rt8515_match, + }, + .probe = rt8515_probe, + .remove = rt8515_remove, +}; +module_platform_driver(rt8515_driver); + +MODULE_AUTHOR("Linus Walleij "); +MODULE_DESCRIPTION("Richtek RT8515 LED driver"); +MODULE_LICENSE("GPL"); -- GitLab From 1048ba83fb1c00cd24172e23e8263972f6b5d9ac Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 31 Jan 2021 13:50:09 -0800 Subject: [PATCH 1760/2012] Linux 5.11-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e0af7a4a55985..f5842126e89d4 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Kleptomaniac Octopus # *DOCUMENTATION* -- GitLab From 18fe0fae61252b5ae6e26553e2676b5fac555951 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 1 Feb 2021 09:33:24 +0100 Subject: [PATCH 1761/2012] mac80211: fix station rate table updates on assoc If the driver uses .sta_add, station entries are only uploaded after the sta is in assoc state. Fix early station rate table updates by deferring them until the sta has been uploaded. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20210201083324.3134-1-nbd@nbd.name [use rcu_access_pointer() instead since we won't dereference here] Signed-off-by: Johannes Berg --- net/mac80211/driver-ops.c | 5 ++++- net/mac80211/rate.c | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index c9a8a2433e8ac..48322e45e7ddb 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -125,8 +125,11 @@ int drv_sta_state(struct ieee80211_local *local, } else if (old_state == IEEE80211_STA_AUTH && new_state == IEEE80211_STA_ASSOC) { ret = drv_sta_add(local, sdata, &sta->sta); - if (ret == 0) + if (ret == 0) { sta->uploaded = true; + if (rcu_access_pointer(sta->sta.rates)) + drv_sta_rate_tbl_update(local, sdata, &sta->sta); + } } else if (old_state == IEEE80211_STA_ASSOC && new_state == IEEE80211_STA_AUTH) { drv_sta_remove(local, sdata, &sta->sta); diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 45927202c71c6..63652c39c8e07 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -960,7 +960,8 @@ int rate_control_set_rates(struct ieee80211_hw *hw, if (old) kfree_rcu(old, rcu_head); - drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta); + if (sta->uploaded) + drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta); ieee80211_sta_set_expected_throughput(pubsta, sta_get_expected_throughput(sta)); -- GitLab From 2e99dedc73f004f650b197c9b269c15c7e01ad15 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 2 Dec 2020 15:50:17 +0800 Subject: [PATCH 1762/2012] igc: Report speed and duplex as unknown when device is runtime suspended Similar to commit 165ae7a8feb5 ("igb: Report speed and duplex as unknown when device is runtime suspended"), if we try to read speed and duplex sysfs while the device is runtime suspended, igc will complain and stops working: [ 123.449883] igc 0000:03:00.0 enp3s0: PCIe link lost, device now detached [ 123.450052] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 123.450056] #PF: supervisor read access in kernel mode [ 123.450058] #PF: error_code(0x0000) - not-present page [ 123.450059] PGD 0 P4D 0 [ 123.450064] Oops: 0000 [#1] SMP NOPTI [ 123.450068] CPU: 0 PID: 2525 Comm: udevadm Tainted: G U W OE 5.10.0-1002-oem #2+rkl2-Ubuntu [ 123.450078] RIP: 0010:igc_rd32+0x1c/0x90 [igc] [ 123.450080] Code: c0 5d c3 b8 fd ff ff ff c3 0f 1f 44 00 00 0f 1f 44 00 00 55 89 f0 48 89 e5 41 56 41 55 41 54 49 89 c4 53 48 8b 57 08 48 01 d0 <44> 8b 28 41 83 fd ff 74 0c 5b 44 89 e8 41 5c 41 5d 4 [ 123.450083] RSP: 0018:ffffb0d100d6fcc0 EFLAGS: 00010202 [ 123.450085] RAX: 0000000000000008 RBX: ffffb0d100d6fd30 RCX: 0000000000000000 [ 123.450087] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff945a12716c10 [ 123.450089] RBP: ffffb0d100d6fce0 R08: ffff945a12716550 R09: ffff945a09874000 [ 123.450090] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000008 [ 123.450092] R13: ffff945a12716000 R14: ffff945a037da280 R15: ffff945a037da290 [ 123.450094] FS: 00007f3b34c868c0(0000) GS:ffff945b89200000(0000) knlGS:0000000000000000 [ 123.450096] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 123.450098] CR2: 0000000000000008 CR3: 00000001144de006 CR4: 0000000000770ef0 [ 123.450100] PKRU: 55555554 [ 123.450101] Call Trace: [ 123.450111] igc_ethtool_get_link_ksettings+0xd6/0x1b0 [igc] [ 123.450118] __ethtool_get_link_ksettings+0x71/0xb0 [ 123.450123] duplex_show+0x74/0xc0 [ 123.450129] dev_attr_show+0x1d/0x40 [ 123.450134] sysfs_kf_seq_show+0xa1/0x100 [ 123.450137] kernfs_seq_show+0x27/0x30 [ 123.450142] seq_read+0xb7/0x400 [ 123.450148] ? common_file_perm+0x72/0x170 [ 123.450151] kernfs_fop_read+0x35/0x1b0 [ 123.450155] vfs_read+0xb5/0x1b0 [ 123.450157] ksys_read+0x67/0xe0 [ 123.450160] __x64_sys_read+0x1a/0x20 [ 123.450164] do_syscall_64+0x38/0x90 [ 123.450168] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 123.450170] RIP: 0033:0x7f3b351fe142 [ 123.450173] Code: c0 e9 c2 fe ff ff 50 48 8d 3d 3a ca 0a 00 e8 f5 19 02 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24 [ 123.450174] RSP: 002b:00007fffef2ec138 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 123.450177] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3b351fe142 [ 123.450179] RDX: 0000000000001001 RSI: 00005644c047f070 RDI: 0000000000000003 [ 123.450180] RBP: 00007fffef2ec340 R08: 00005644c047f070 R09: 00007f3b352d9320 [ 123.450182] R10: 00005644c047c010 R11: 0000000000000246 R12: 00005644c047cbf0 [ 123.450184] R13: 00005644c047e6d0 R14: 0000000000000003 R15: 00007fffef2ec140 [ 123.450189] Modules linked in: rfcomm ccm cmac algif_hash algif_skcipher af_alg bnep toshiba_acpi industrialio toshiba_haps hp_accel lis3lv02d btusb btrtl btbcm btintel bluetooth ecdh_generic ecc joydev input_leds nls_iso8859_1 snd_sof_pci snd_sof_intel_byt snd_sof_intel_ipc snd_sof_intel_hda_common snd_soc_hdac_hda snd_hda_codec_hdmi snd_sof_xtensa_dsp snd_sof_intel_hda snd_sof snd_hda_ext_core snd_soc_acpi_intel_match snd_soc_acpi snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio snd_hda_intel snd_intel_dspcfg soundwire_intel soundwire_generic_allocation soundwire_cadence snd_hda_codec snd_hda_core ath10k_pci snd_hwdep intel_rapl_msr intel_rapl_common ath10k_core soundwire_bus snd_soc_core x86_pkg_temp_thermal ath intel_powerclamp snd_compress ac97_bus snd_pcm_dmaengine mac80211 snd_pcm coretemp snd_seq_midi snd_seq_midi_event snd_rawmidi kvm_intel cfg80211 snd_seq snd_seq_device snd_timer mei_hdcp kvm libarc4 snd crct10dif_pclmul ghash_clmulni_intel aesni_intel mei_me dell_wmi [ 123.450266] dell_smbios soundcore sparse_keymap dcdbas crypto_simd cryptd mei dell_uart_backlight glue_helper ee1004 wmi_bmof intel_wmi_thunderbolt dell_wmi_descriptor mac_hid efi_pstore acpi_pad acpi_tad intel_cstate sch_fq_codel parport_pc ppdev lp parport ip_tables x_tables autofs4 btrfs blake2b_generic raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear dm_mirror dm_region_hash dm_log hid_generic usbhid hid i915 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops cec crc32_pclmul rc_core drm intel_lpss_pci i2c_i801 ahci igc intel_lpss i2c_smbus idma64 xhci_pci libahci virt_dma xhci_pci_renesas wmi video pinctrl_tigerlake [ 123.450335] CR2: 0000000000000008 [ 123.450338] ---[ end trace 9f731e38b53c35cc ]--- The more generic approach will be wrap get_link_ksettings() with begin() and complete() callbacks, and calls runtime resume and runtime suspend routine respectively. However, igc is like igb, runtime resume routine uses rtnl_lock() which upper ethtool layer also uses. So to prevent a deadlock on rtnl, take a different approach, use pm_runtime_suspended() to avoid reading register while device is runtime suspended. Fixes: 8c5ad0dae93c ("igc: Add ethtool support") Signed-off-by: Kai-Heng Feng Acked-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 831f2f09de5fb..ec8cd69d49928 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1714,7 +1714,8 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, Asym_Pause); } - status = rd32(IGC_STATUS); + status = pm_runtime_suspended(&adapter->pdev->dev) ? + 0 : rd32(IGC_STATUS); if (status & IGC_STATUS_LU) { if (status & IGC_STATUS_SPEED_1000) { -- GitLab From ebc8d125062e7dccb7922b2190b097c20d88ad96 Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Sun, 20 Dec 2020 22:18:19 +0800 Subject: [PATCH 1763/2012] igc: set the default return value to -IGC_ERR_NVM in igc_write_nvm_srwr This patch sets the default return value to -IGC_ERR_NVM in igc_write_nvm_srwr. Without this change it wouldn't lead to a shadow RAM write EEWR timeout. Fixes: ab4056126813 ("igc: Add NVM support") Signed-off-by: Kevin Lo Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_i225.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index 8b67d9b49a83a..7ec04e48860c6 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -219,9 +219,9 @@ static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words, u16 *data) { struct igc_nvm_info *nvm = &hw->nvm; + s32 ret_val = -IGC_ERR_NVM; u32 attempts = 100000; u32 i, k, eewr = 0; - s32 ret_val = 0; /* A check for invalid values: offset too large, too many words, * too many words for the offset, and not enough words. @@ -229,7 +229,6 @@ static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words, if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) || words == 0) { hw_dbg("nvm parameter(s) out of bounds\n"); - ret_val = -IGC_ERR_NVM; goto out; } -- GitLab From b881145642ce0bbe2be521e0882e72a5cebe93b8 Mon Sep 17 00:00:00 2001 From: Kevin Lo Date: Thu, 7 Jan 2021 14:10:38 +0800 Subject: [PATCH 1764/2012] igc: check return value of ret_val in igc_config_fc_after_link_up Check return value from ret_val to make error check actually work. Fixes: 4eb8080143a9 ("igc: Add setup link functionality") Signed-off-by: Kevin Lo Acked-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c index 09cd0ec7ee87d..67b8ffd21d8af 100644 --- a/drivers/net/ethernet/intel/igc/igc_mac.c +++ b/drivers/net/ethernet/intel/igc/igc_mac.c @@ -638,7 +638,7 @@ s32 igc_config_fc_after_link_up(struct igc_hw *hw) } out: - return 0; + return ret_val; } /** -- GitLab From 50af06d43eab6b09afc37aa7c8bbf69b14a3b2f7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 1 Feb 2021 16:29:56 +0100 Subject: [PATCH 1765/2012] staging: rtl8723bs: Move wiphy setup to after reading the regulatory settings from the chip Commit 81f153faacd0 ("staging: rtl8723bs: fix wireless regulatory API misuse") moved the wiphy_apply_custom_regulatory() call to earlier in the driver's init-sequence, so that it gets called before wiphy_register(). But at this point in time the eFuses which code the regulatory-settings for the chip have not been read by the driver yet, causing _rtw_reg_apply_flags() to set the IEEE80211_CHAN_DISABLED flag on *all* channels. On the device where I initially tested the fix, a Jumper EZpad 7 tablet, this does not cause any problems because shortly after init the rtw_reg_notifier() gets called fixing things up. I guess this happens into response to receiving a (broadcast) packet with regulatory info from the access-point ? But on another device with a RTL8723BS wifi chip, an Acer Switch 10E (SW3-016), the rtw_reg_notifier() never gets called. I assume that some fuse has been set on this device to ignore regulatory info received from access-points. This means that on the Acer the driver is stuck in a state with all channels disabled, leading to non working Wifi. We cannot move the wiphy_apply_custom_regulatory() call back, because that call must be made before the wiphy_register() call. Instead move the entire rtw_wdev_alloc() call to after the Efuses have been read, fixing all channels being disabled in the initial channel-map. Fixes: 81f153faacd0 ("staging: rtl8723bs: fix wireless regulatory API misuse") Cc: Johannes Berg Signed-off-by: Hans de Goede Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20210201152956.370186-2-hdegoede@redhat.com Signed-off-by: Johannes Berg --- drivers/staging/rtl8723bs/os_dep/sdio_intf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c index b2208e5f190ad..301ffff12e82b 100644 --- a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c +++ b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c @@ -339,8 +339,6 @@ static struct adapter *rtw_sdio_if1_init(struct dvobj_priv *dvobj, const struct padapter = rtw_netdev_priv(pnetdev); - rtw_wdev_alloc(padapter, dvobj_to_dev(dvobj)); - /* 3 3. init driver special setting, interface, OS and hardware relative */ /* 4 3.1 set hardware operation functions */ @@ -378,6 +376,8 @@ static struct adapter *rtw_sdio_if1_init(struct dvobj_priv *dvobj, const struct goto free_hal_data; } + rtw_wdev_alloc(padapter, dvobj_to_dev(dvobj)); + /* 3 8. get WLan MAC address */ /* set mac addr */ rtw_macaddr_cfg(&psdio->func->dev, padapter->eeprompriv.mac_addr); -- GitLab From 40c575d1ec71f7a61c73ba1603a69650c130559c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 1 Feb 2021 19:20:50 +0100 Subject: [PATCH 1766/2012] cfg80211: fix netdev registration deadlock If register_netdevice() fails after having called cfg80211's netdev notifier (cfg80211_netdev_notifier_call) it will call the notifier again with UNREGISTER. This would then lock the wiphy mutex because we're marked as registered, which causes a deadlock. Fix this by separately keeping track of whether or not we're in the middle of registering to also skip the notifier call on this unregister. Reported-by: syzbot+2ae0ca9d7737ad1a62b7@syzkaller.appspotmail.com Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver") Link: https://lore.kernel.org/r/20210201192048.ed8bad436737.I7cae042c44b15f80919a285799a15df467e9d42d@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 +++- net/wireless/core.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4741d71ead215..4cdd75449d73b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5299,6 +5299,8 @@ static inline void wiphy_unlock(struct wiphy *wiphy) * @wiphy: pointer to hardware description * @iftype: interface type * @registered: is this wdev already registered with cfg80211 + * @registering: indicates we're doing registration under wiphy lock + * for the notifier * @list: (private) Used to collect the interfaces * @netdev: (private) Used to reference back to the netdev, may be %NULL * @identifier: (private) Identifier used in nl80211 to identify this @@ -5382,7 +5384,7 @@ struct wireless_dev { struct mutex mtx; - bool use_4addr, is_running, registered; + bool use_4addr, is_running, registered, registering; u8 address[ETH_ALEN] __aligned(sizeof(u16)); diff --git a/net/wireless/core.c b/net/wireless/core.c index 18f9a5c214b59..a2785379df6e5 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1346,6 +1346,7 @@ int cfg80211_register_netdevice(struct net_device *dev) /* we'll take care of this */ wdev->registered = true; + wdev->registering = true; ret = register_netdevice(dev); if (ret) goto out; @@ -1361,6 +1362,7 @@ int cfg80211_register_netdevice(struct net_device *dev) cfg80211_register_wdev(rdev, wdev); ret = 0; out: + wdev->registering = false; if (ret) wdev->registered = false; return ret; @@ -1403,7 +1405,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, * It is possible to get NETDEV_UNREGISTER multiple times, * so check wdev->registered. */ - if (wdev->registered) { + if (wdev->registered && !wdev->registering) { wiphy_lock(&rdev->wiphy); _cfg80211_unregister_wdev(wdev, false); wiphy_unlock(&rdev->wiphy); -- GitLab From f559a356043a55bab25a4c00505ea65c50a956fb Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Sat, 23 Jan 2021 00:22:23 +0000 Subject: [PATCH 1767/2012] i40e: Revert "i40e: don't report link up for a VF who hasn't enabled queues" This reverts commit 2ad1274fa35ace5c6360762ba48d33b63da2396c VF queues were not brought up when PF was brought up after being downed if the VF driver disabled VFs queues during PF down. This could happen in some older or external VF driver implementations. The problem was that PF driver used vf->queues_enabled as a condition to decide what link-state it would send out which caused the issue. Remove the check for vf->queues_enabled in the VF link notify. Now VF will always be notified of the current link status. Also remove the queues_enabled member from i40e_vf structure as it is not used anymore. Otherwise VNF implementation was broken and caused a link flap. The original commit was a workaround to avoid breaking existing VFs though it's really a fault of the VF code not the PF. The commit should be safe to revert as all of the VFs we know of have been fixed. Also, since we now know there is a related bug in the workaround, removing it is preferred. Fixes: 2ad1274fa35a ("i40e: don't report link up for a VF who hasn't enabled") Signed-off-by: Aleksandr Loktionov Signed-off-by: Arkadiusz Kubalewski Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 13 +------------ drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 1 - 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 7efc61aacb0a5..1b6ec9be155a6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -55,12 +55,7 @@ static void i40e_vc_notify_vf_link_state(struct i40e_vf *vf) pfe.event = VIRTCHNL_EVENT_LINK_CHANGE; pfe.severity = PF_EVENT_SEVERITY_INFO; - - /* Always report link is down if the VF queues aren't enabled */ - if (!vf->queues_enabled) { - pfe.event_data.link_event.link_status = false; - pfe.event_data.link_event.link_speed = 0; - } else if (vf->link_forced) { + if (vf->link_forced) { pfe.event_data.link_event.link_status = vf->link_up; pfe.event_data.link_event.link_speed = (vf->link_up ? i40e_virtchnl_link_speed(ls->link_speed) : 0); @@ -70,7 +65,6 @@ static void i40e_vc_notify_vf_link_state(struct i40e_vf *vf) pfe.event_data.link_event.link_speed = i40e_virtchnl_link_speed(ls->link_speed); } - i40e_aq_send_msg_to_vf(hw, abs_vf_id, VIRTCHNL_OP_EVENT, 0, (u8 *)&pfe, sizeof(pfe), NULL); } @@ -2443,8 +2437,6 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg) } } - vf->queues_enabled = true; - error_param: /* send the response to the VF */ return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, @@ -2466,9 +2458,6 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; - /* Immediately mark queues as disabled */ - vf->queues_enabled = false; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto error_param; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 5491215d81deb..091e32c1bb46f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -98,7 +98,6 @@ struct i40e_vf { unsigned int tx_rate; /* Tx bandwidth limit in Mbps */ bool link_forced; bool link_up; /* only valid if VF link is forced */ - bool queues_enabled; /* true if the VF queues are enabled */ bool spoofchk; u16 num_vlan; -- GitLab From f72f2fb8fb6be095b98af5d740ac50cffd0b0cae Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Sat, 30 Jan 2021 21:43:34 +0800 Subject: [PATCH 1768/2012] net: dsa: mv88e6xxx: override existent unicast portvec in port_fdb_add Having multiple destination ports for a unicast address does not make sense. Make port_db_load_purge override existent unicast portvec instead of adding a new port bit. Fixes: 884729399260 ("net: dsa: mv88e6xxx: handle multiple ports in ATU") Signed-off-by: DENG Qingfang Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210130134334.10243-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index eafe6bedc692e..54aa942eedaa6 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1676,7 +1676,11 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, if (!entry.portvec) entry.state = 0; } else { - entry.portvec |= BIT(port); + if (state == MV88E6XXX_G1_ATU_DATA_STATE_UC_STATIC) + entry.portvec = BIT(port); + else + entry.portvec |= BIT(port); + entry.state = state; } -- GitLab From 7f976d5cf16d0a747098f67831d746fa25f18dbe Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Sat, 30 Jan 2021 14:59:33 +0100 Subject: [PATCH 1769/2012] net: dsa: hellcreek: Report VLAN table occupancy The VLAN membership configuration is cached in software already. So, it can be reported via devlink. Add support for it: |root@tsn:~# devlink resource show platform/ff240000.switch |platform/ff240000.switch: | name VLAN size 4096 occ 4 unit entry dpipe_tables none Signed-off-by: Kurt Kanzenbach Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/hirschmann/hellcreek.c | 59 ++++++++++++++++++++++++++ drivers/net/dsa/hirschmann/hellcreek.h | 5 +++ 2 files changed, 64 insertions(+) diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 4cc51fb37e678..0ba0f6e81305c 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -1000,6 +1000,51 @@ out: return ret; } +static u64 hellcreek_devlink_vlan_table_get(void *priv) +{ + struct hellcreek *hellcreek = priv; + u64 count = 0; + int i; + + mutex_lock(&hellcreek->reg_lock); + for (i = 0; i < VLAN_N_VID; ++i) + if (hellcreek->vidmbrcfg[i]) + count++; + mutex_unlock(&hellcreek->reg_lock); + + return count; +} + +static int hellcreek_setup_devlink_resources(struct dsa_switch *ds) +{ + struct devlink_resource_size_params size_params; + struct hellcreek *hellcreek = ds->priv; + int err; + + devlink_resource_size_params_init(&size_params, VLAN_N_VID, + VLAN_N_VID, + 1, DEVLINK_RESOURCE_UNIT_ENTRY); + + err = dsa_devlink_resource_register(ds, "VLAN", VLAN_N_VID, + HELLCREEK_DEVLINK_PARAM_ID_VLAN_TABLE, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_params); + if (err) + goto out; + + dsa_devlink_resource_occ_get_register(ds, + HELLCREEK_DEVLINK_PARAM_ID_VLAN_TABLE, + hellcreek_devlink_vlan_table_get, + hellcreek); + + return 0; + +out: + dsa_devlink_resources_unregister(ds); + + return err; +} + static int hellcreek_setup(struct dsa_switch *ds) { struct hellcreek *hellcreek = ds->priv; @@ -1053,9 +1098,22 @@ static int hellcreek_setup(struct dsa_switch *ds) return ret; } + /* Register devlink resources with DSA */ + ret = hellcreek_setup_devlink_resources(ds); + if (ret) { + dev_err(hellcreek->dev, + "Failed to setup devlink resources!\n"); + return ret; + } + return 0; } +static void hellcreek_teardown(struct dsa_switch *ds) +{ + dsa_devlink_resources_unregister(ds); +} + static void hellcreek_phylink_validate(struct dsa_switch *ds, int port, unsigned long *supported, struct phylink_link_state *state) @@ -1447,6 +1505,7 @@ static const struct dsa_switch_ops hellcreek_ds_ops = { .port_vlan_del = hellcreek_vlan_del, .port_vlan_filtering = hellcreek_vlan_filtering, .setup = hellcreek_setup, + .teardown = hellcreek_teardown, }; static int hellcreek_probe(struct platform_device *pdev) diff --git a/drivers/net/dsa/hirschmann/hellcreek.h b/drivers/net/dsa/hirschmann/hellcreek.h index 854639f872478..11539916a6be2 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.h +++ b/drivers/net/dsa/hirschmann/hellcreek.h @@ -298,4 +298,9 @@ struct hellcreek { #define dw_to_hellcreek_port(dw) \ container_of(dw, struct hellcreek_port, schedule_work) +/* Devlink resources */ +enum hellcreek_devlink_resource_id { + HELLCREEK_DEVLINK_PARAM_ID_VLAN_TABLE, +}; + #endif /* _HELLCREEK_H_ */ -- GitLab From 8486e83fe1d8534ae964cb12c6852a824c12318b Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Sat, 30 Jan 2021 14:59:34 +0100 Subject: [PATCH 1770/2012] net: dsa: hellcreek: Report FDB table occupancy Report the FDB table size and occupancy via devlink. The actual size depends on the used Hellcreek version: |root@tsn:~# devlink resource show platform/ff240000.switch |platform/ff240000.switch: | name VLAN size 4096 occ 2 unit entry dpipe_tables none | name FDB size 256 occ 6 unit entry dpipe_tables none Suggested-by: Florian Fainelli Signed-off-by: Kurt Kanzenbach Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- drivers/net/dsa/hirschmann/hellcreek.c | 46 ++++++++++++++++++++++---- drivers/net/dsa/hirschmann/hellcreek.h | 1 + 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 0ba0f6e81305c..f984ca75a71f4 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -221,12 +221,11 @@ static void hellcreek_feature_detect(struct hellcreek *hellcreek) features = hellcreek_read(hellcreek, HR_FEABITS0); - /* Currently we only detect the size of the FDB table */ + /* Only detect the size of the FDB table. The size and current + * utilization can be queried via devlink. + */ hellcreek->fdb_entries = ((features & HR_FEABITS0_FDBBINS_MASK) >> HR_FEABITS0_FDBBINS_SHIFT) * 32; - - dev_info(hellcreek->dev, "Feature detect: FDB entries=%zu\n", - hellcreek->fdb_entries); } static enum dsa_tag_protocol hellcreek_get_tag_protocol(struct dsa_switch *ds, @@ -1015,20 +1014,48 @@ static u64 hellcreek_devlink_vlan_table_get(void *priv) return count; } +static u64 hellcreek_devlink_fdb_table_get(void *priv) +{ + struct hellcreek *hellcreek = priv; + u64 count = 0; + + /* Reading this register has side effects. Synchronize against the other + * FDB operations. + */ + mutex_lock(&hellcreek->reg_lock); + count = hellcreek_read(hellcreek, HR_FDBMAX); + mutex_unlock(&hellcreek->reg_lock); + + return count; +} + static int hellcreek_setup_devlink_resources(struct dsa_switch *ds) { - struct devlink_resource_size_params size_params; + struct devlink_resource_size_params size_vlan_params; + struct devlink_resource_size_params size_fdb_params; struct hellcreek *hellcreek = ds->priv; int err; - devlink_resource_size_params_init(&size_params, VLAN_N_VID, + devlink_resource_size_params_init(&size_vlan_params, VLAN_N_VID, VLAN_N_VID, 1, DEVLINK_RESOURCE_UNIT_ENTRY); + devlink_resource_size_params_init(&size_fdb_params, + hellcreek->fdb_entries, + hellcreek->fdb_entries, + 1, DEVLINK_RESOURCE_UNIT_ENTRY); + err = dsa_devlink_resource_register(ds, "VLAN", VLAN_N_VID, HELLCREEK_DEVLINK_PARAM_ID_VLAN_TABLE, DEVLINK_RESOURCE_ID_PARENT_TOP, - &size_params); + &size_vlan_params); + if (err) + goto out; + + err = dsa_devlink_resource_register(ds, "FDB", hellcreek->fdb_entries, + HELLCREEK_DEVLINK_PARAM_ID_FDB_TABLE, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &size_fdb_params); if (err) goto out; @@ -1037,6 +1064,11 @@ static int hellcreek_setup_devlink_resources(struct dsa_switch *ds) hellcreek_devlink_vlan_table_get, hellcreek); + dsa_devlink_resource_occ_get_register(ds, + HELLCREEK_DEVLINK_PARAM_ID_FDB_TABLE, + hellcreek_devlink_fdb_table_get, + hellcreek); + return 0; out: diff --git a/drivers/net/dsa/hirschmann/hellcreek.h b/drivers/net/dsa/hirschmann/hellcreek.h index 11539916a6be2..305e76dab34d9 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.h +++ b/drivers/net/dsa/hirschmann/hellcreek.h @@ -301,6 +301,7 @@ struct hellcreek { /* Devlink resources */ enum hellcreek_devlink_resource_id { HELLCREEK_DEVLINK_PARAM_ID_VLAN_TABLE, + HELLCREEK_DEVLINK_PARAM_ID_FDB_TABLE, }; #endif /* _HELLCREEK_H_ */ -- GitLab From 5e9eff5dfa460cd1a74b7c1fde4fced7c04383af Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Thu, 28 Jan 2021 22:34:01 -0600 Subject: [PATCH 1771/2012] ibmvnic: device remove has higher precedence over reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Returning -EBUSY in ibmvnic_remove() does not actually hold the removal procedure since driver core doesn't care for the return value (see __device_release_driver() in drivers/base/dd.c calling dev->bus->remove()) though vio_bus_remove (in arch/powerpc/platforms/pseries/vio.c) records the return value and passes it on. [1] During the device removal precedure, checking for resetting bit is dropped so that we can continue executing all the cleanup calls in the rest of the remove function. Otherwise, it can cause latent memory leaks and kernel crashes. [1] https://lore.kernel.org/linuxppc-dev/20210117101242.dpwayq6wdgfdzirl@pengutronix.de/T/#m48f5befd96bc9842ece2a3ad14f4c27747206a53 Reported-by: Uwe Kleine-König Fixes: 7d7195a026ba ("ibmvnic: Do not process device remove during device reset") Signed-off-by: Lijun Pan Link: https://lore.kernel.org/r/20210129043402.95744-1-ljp@linux.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 8820c98ea891e..f79034c786c84 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -5444,11 +5444,6 @@ static int ibmvnic_remove(struct vio_dev *dev) unsigned long flags; spin_lock_irqsave(&adapter->state_lock, flags); - if (test_bit(0, &adapter->resetting)) { - spin_unlock_irqrestore(&adapter->state_lock, flags); - return -EBUSY; - } - adapter->state = VNIC_REMOVING; spin_unlock_irqrestore(&adapter->state_lock, flags); -- GitLab From 938e0fcd3253efdef8924714158911286d08cfe1 Mon Sep 17 00:00:00 2001 From: Alexander Ovechkin Date: Mon, 1 Feb 2021 23:00:49 +0300 Subject: [PATCH 1772/2012] net: sched: replaced invalid qdisc tree flush helper in qdisc_replace Commit e5f0e8f8e456 ("net: sched: introduce and use qdisc tree flush/purge helpers") introduced qdisc tree flush/purge helpers, but erroneously used flush helper instead of purge helper in qdisc_replace function. This issue was found in our CI, that tests various qdisc setups by configuring qdisc and sending data through it. Call of invalid helper sporadically leads to corruption of vt_tree/cf_tree of hfsc_class that causes kernel oops: Oops: 0000 [#1] SMP PTI CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.11.0-8f6859df #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 RIP: 0010:rb_insert_color+0x18/0x190 Code: c3 31 c0 c3 0f 1f 40 00 66 2e 0f 1f 84 00 00 00 00 00 48 8b 07 48 85 c0 0f 84 05 01 00 00 48 8b 10 f6 c2 01 0f 85 34 01 00 00 <48> 8b 4a 08 49 89 d0 48 39 c1 74 7d 48 85 c9 74 32 f6 01 01 75 2d RSP: 0018:ffffc900000b8bb0 EFLAGS: 00010246 RAX: ffff8881ef4c38b0 RBX: ffff8881d956e400 RCX: ffff8881ef4c38b0 RDX: 0000000000000000 RSI: ffff8881d956f0a8 RDI: ffff8881d956e4b0 RBP: 0000000000000000 R08: 000000d5c4e249da R09: 1600000000000000 R10: ffffc900000b8be0 R11: ffffc900000b8b28 R12: 0000000000000001 R13: 000000000000005a R14: ffff8881f0905000 R15: ffff8881f0387d00 FS: 0000000000000000(0000) GS:ffff8881f8b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 00000001f4796004 CR4: 0000000000060ee0 Call Trace: init_vf.isra.19+0xec/0x250 [sch_hfsc] hfsc_enqueue+0x245/0x300 [sch_hfsc] ? fib_rules_lookup+0x12a/0x1d0 ? __dev_queue_xmit+0x4b6/0x930 ? hfsc_delete_class+0x250/0x250 [sch_hfsc] __dev_queue_xmit+0x4b6/0x930 ? ip6_finish_output2+0x24d/0x590 ip6_finish_output2+0x24d/0x590 ? ip6_output+0x6c/0x130 ip6_output+0x6c/0x130 ? __ip6_finish_output+0x110/0x110 mld_sendpack+0x224/0x230 mld_ifc_timer_expire+0x186/0x2c0 ? igmp6_group_dropped+0x200/0x200 call_timer_fn+0x2d/0x150 run_timer_softirq+0x20c/0x480 ? tick_sched_do_timer+0x60/0x60 ? tick_sched_timer+0x37/0x70 __do_softirq+0xf7/0x2cb irq_exit+0xa0/0xb0 smp_apic_timer_interrupt+0x74/0x150 apic_timer_interrupt+0xf/0x20 Fixes: e5f0e8f8e456 ("net: sched: introduce and use qdisc tree flush/purge helpers") Signed-off-by: Alexander Ovechkin Reported-by: Alexander Kuznetsov Acked-by: Dmitry Monakhov Acked-by: Dmitry Yakunin Acked-by: Cong Wang Link: https://lore.kernel.org/r/20210201200049.299153-1-ovov@yandex-team.ru Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 639e465a108f4..5b490b5591df5 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1143,7 +1143,7 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new, old = *pold; *pold = new; if (old != NULL) - qdisc_tree_flush_backlog(old); + qdisc_purge_queue(old); sch_tree_unlock(sch); return old; -- GitLab From c518adafa39f37858697ac9309c6cf1805581446 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Mon, 1 Feb 2021 11:47:19 +0300 Subject: [PATCH 1773/2012] vsock: fix the race conditions in multi-transport support There are multiple similar bugs implicitly introduced by the commit c0cfa2d8a788fcf4 ("vsock: add multi-transports support") and commit 6a2c0962105ae8ce ("vsock: prevent transport modules unloading"). The bug pattern: [1] vsock_sock.transport pointer is copied to a local variable, [2] lock_sock() is called, [3] the local variable is used. VSOCK multi-transport support introduced the race condition: vsock_sock.transport value may change between [1] and [2]. Let's copy vsock_sock.transport pointer to local variables after the lock_sock() call. Fixes: c0cfa2d8a788fcf4 ("vsock: add multi-transports support") Signed-off-by: Alexander Popov Reviewed-by: Stefano Garzarella Reviewed-by: Jorgen Hansen Link: https://lore.kernel.org/r/20210201084719.2257066-1-alex.popov@linux.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/af_vsock.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index b12d3a3222428..6894f21dc1475 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1014,9 +1014,12 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; } else if (sock->type == SOCK_STREAM) { - const struct vsock_transport *transport = vsk->transport; + const struct vsock_transport *transport; + lock_sock(sk); + transport = vsk->transport; + /* Listening sockets that have connections in their accept * queue can be read. */ @@ -1099,10 +1102,11 @@ static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = 0; sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; lock_sock(sk); + transport = vsk->transport; + err = vsock_auto_bind(vsk); if (err) goto out; @@ -1561,10 +1565,11 @@ static int vsock_stream_setsockopt(struct socket *sock, err = 0; sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; lock_sock(sk); + transport = vsk->transport; + switch (optname) { case SO_VM_SOCKETS_BUFFER_SIZE: COPY_IN(val); @@ -1697,7 +1702,6 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; total_written = 0; err = 0; @@ -1706,6 +1710,8 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); + transport = vsk->transport; + /* Callers should not provide a destination with stream sockets. */ if (msg->msg_namelen) { err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; @@ -1840,11 +1846,12 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sk = sock->sk; vsk = vsock_sk(sk); - transport = vsk->transport; err = 0; lock_sock(sk); + transport = vsk->transport; + if (!transport || sk->sk_state != TCP_ESTABLISHED) { /* Recvmsg is supposed to return 0 if a peer performs an * orderly shutdown. Differentiate between that case and when a -- GitLab From 28e104d00281ade30250b24e098bf50887671ea4 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Sat, 30 Jan 2021 01:27:47 +0300 Subject: [PATCH 1774/2012] net: ip_tunnel: fix mtu calculation dev->hard_header_len for tunnel interface is set only when header_ops are set too and already contains full overhead of any tunnel encapsulation. That's why there is not need to use this overhead twice in mtu calc. Fixes: fdafed459998 ("ip_gre: set dev->hard_header_len and dev->needed_headroom properly") Reported-by: Slava Bacherikov Signed-off-by: Vadim Fedorenko Link: https://lore.kernel.org/r/1611959267-20536-1-git-send-email-vfedorenko@novek.ru Signed-off-by: Jakub Kicinski --- net/ipv4/ip_tunnel.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 64594aa755f05..76a420c76f16e 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -317,7 +317,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev) } dev->needed_headroom = t_hlen + hlen; - mtu -= (dev->hard_header_len + t_hlen); + mtu -= t_hlen; if (mtu < IPV4_MIN_MTU) mtu = IPV4_MIN_MTU; @@ -347,7 +347,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, nt = netdev_priv(dev); t_hlen = nt->hlen + sizeof(struct iphdr); dev->min_mtu = ETH_MIN_MTU; - dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen; + dev->max_mtu = IP_MAX_MTU - t_hlen; ip_tunnel_add(itn, nt); return nt; @@ -488,11 +488,10 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, int mtu; tunnel_hlen = md ? tunnel_hlen : tunnel->hlen; - pkt_size = skb->len - tunnel_hlen - dev->hard_header_len; + pkt_size = skb->len - tunnel_hlen; if (df) - mtu = dst_mtu(&rt->dst) - dev->hard_header_len - - sizeof(struct iphdr) - tunnel_hlen; + mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen); else mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; @@ -972,7 +971,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) { struct ip_tunnel *tunnel = netdev_priv(dev); int t_hlen = tunnel->hlen + sizeof(struct iphdr); - int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen; + int max_mtu = IP_MAX_MTU - t_hlen; if (new_mtu < ETH_MIN_MTU) return -EINVAL; @@ -1149,10 +1148,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], mtu = ip_tunnel_bind_dev(dev); if (tb[IFLA_MTU]) { - unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen; + unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr)); - mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, - (unsigned int)(max - sizeof(struct iphdr))); + mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max); } err = dev_set_mtu(dev, mtu); -- GitLab From c3df39ac9b0e3747bf8233ea9ce4ed5ceb3199d3 Mon Sep 17 00:00:00 2001 From: Dongseok Yi Date: Sat, 30 Jan 2021 08:13:27 +0900 Subject: [PATCH 1775/2012] udp: ipv4: manipulate network header of NATed UDP GRO fraglist UDP/IP header of UDP GROed frag_skbs are not updated even after NAT forwarding. Only the header of head_skb from ip_finish_output_gso -> skb_gso_segment is updated but following frag_skbs are not updated. A call path skb_mac_gso_segment -> inet_gso_segment -> udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list does not try to update UDP/IP header of the segment list but copy only the MAC header. Update port, addr and check of each skb of the segment list in __udp_gso_segment_list. It covers both SNAT and DNAT. Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.) Signed-off-by: Dongseok Yi Acked-by: Steffen Klassert Link: https://lore.kernel.org/r/1611962007-80092-1-git-send-email-dseok.yi@samsung.com Signed-off-by: Jakub Kicinski --- include/net/udp.h | 2 +- net/ipv4/udp_offload.c | 69 +++++++++++++++++++++++++++++++++++++++--- net/ipv6/udp_offload.c | 2 +- 3 files changed, 66 insertions(+), 7 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 877832bed4713..01351ba25b874 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -178,7 +178,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, - netdev_features_t features); + netdev_features_t features, bool is_ipv6); static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) { diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index ff39e94781bfb..cfc872689b997 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -187,8 +187,67 @@ out_unlock: } EXPORT_SYMBOL(skb_udp_tunnel_segment); +static void __udpv4_gso_segment_csum(struct sk_buff *seg, + __be32 *oldip, __be32 *newip, + __be16 *oldport, __be16 *newport) +{ + struct udphdr *uh; + struct iphdr *iph; + + if (*oldip == *newip && *oldport == *newport) + return; + + uh = udp_hdr(seg); + iph = ip_hdr(seg); + + if (uh->check) { + inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip, + true); + inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport, + false); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + *oldport = *newport; + + csum_replace4(&iph->check, *oldip, *newip); + *oldip = *newip; +} + +static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs) +{ + struct sk_buff *seg; + struct udphdr *uh, *uh2; + struct iphdr *iph, *iph2; + + seg = segs; + uh = udp_hdr(seg); + iph = ip_hdr(seg); + + if ((udp_hdr(seg)->dest == udp_hdr(seg->next)->dest) && + (udp_hdr(seg)->source == udp_hdr(seg->next)->source) && + (ip_hdr(seg)->daddr == ip_hdr(seg->next)->daddr) && + (ip_hdr(seg)->saddr == ip_hdr(seg->next)->saddr)) + return segs; + + while ((seg = seg->next)) { + uh2 = udp_hdr(seg); + iph2 = ip_hdr(seg); + + __udpv4_gso_segment_csum(seg, + &iph2->saddr, &iph->saddr, + &uh2->source, &uh->source); + __udpv4_gso_segment_csum(seg, + &iph2->daddr, &iph->daddr, + &uh2->dest, &uh->dest); + } + + return segs; +} + static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, - netdev_features_t features) + netdev_features_t features, + bool is_ipv6) { unsigned int mss = skb_shinfo(skb)->gso_size; @@ -198,11 +257,11 @@ static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss); - return skb; + return is_ipv6 ? skb : __udpv4_gso_segment_list_csum(skb); } struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, - netdev_features_t features) + netdev_features_t features, bool is_ipv6) { struct sock *sk = gso_skb->sk; unsigned int sum_truesize = 0; @@ -214,7 +273,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, __be16 newlen; if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) - return __udp_gso_segment_list(gso_skb, features); + return __udp_gso_segment_list(gso_skb, features, is_ipv6); mss = skb_shinfo(gso_skb)->gso_size; if (gso_skb->len <= sizeof(*uh) + mss) @@ -328,7 +387,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) - return __udp_gso_segment(skb, features); + return __udp_gso_segment(skb, features, false); mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index c7bd7b1a04c13..faa823c242923 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -42,7 +42,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, goto out; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) - return __udp_gso_segment(skb, features); + return __udp_gso_segment(skb, features, true); mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) -- GitLab From 31628201545548e1ef167f2c55eb6fd7d3562f12 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 30 Jan 2021 20:05:18 +0100 Subject: [PATCH 1776/2012] docs: networking: swap words in icmp_errors_use_inbound_ifaddr doc Signed-off-by: Vincent Bernat Link: https://lore.kernel.org/r/20210130190518.854806-1-vincent@bernat.ch Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index c7b775da95542..fa544e9037b99 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1196,7 +1196,7 @@ icmp_errors_use_inbound_ifaddr - BOOLEAN If non-zero, the message will be sent with the primary address of the interface that received the packet that caused the icmp error. - This is the behaviour network many administrators will expect from + This is the behaviour many network administrators will expect from a router. And it can make debugging complicated network layouts much easier. -- GitLab From 665ab1eb18d7e8eaa8377fb8bf4924bfeb63bbce Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Fri, 29 Jan 2021 19:19:04 -0600 Subject: [PATCH 1777/2012] ibmvnic: rework to ensure SCRQ entry reads are properly ordered Move the dma_rmb() between pending_scrq() and ibmvnic_next_scrq() into the end of pending_scrq() to save the duplicated code since this dma_rmb will be used 3 times. Signed-off-by: Lijun Pan Acked-by: Thomas Falcon Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index a40af510018a3..331ebca2f57a2 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2444,12 +2444,6 @@ restart_poll: if (!pending_scrq(adapter, rx_scrq)) break; - /* The queue entry at the current index is peeked at above - * to determine that there is a valid descriptor awaiting - * processing. We want to be sure that the current slot - * holds a valid descriptor before reading its contents. - */ - dma_rmb(); next = ibmvnic_next_scrq(adapter, rx_scrq); rx_buff = (struct ibmvnic_rx_buff *)be64_to_cpu(next-> @@ -3189,13 +3183,6 @@ restart_loop: int total_bytes = 0; int num_packets = 0; - /* The queue entry at the current index is peeked at above - * to determine that there is a valid descriptor awaiting - * processing. We want to be sure that the current slot - * holds a valid descriptor before reading its contents. - */ - dma_rmb(); - next = ibmvnic_next_scrq(adapter, scrq); for (i = 0; i < next->tx_comp.num_comps; i++) { if (next->tx_comp.rcs[i]) @@ -3569,11 +3556,16 @@ static int pending_scrq(struct ibmvnic_adapter *adapter, struct ibmvnic_sub_crq_queue *scrq) { union sub_crq *entry = &scrq->msgs[scrq->cur]; + int rc; - if (entry->generic.first & IBMVNIC_CRQ_CMD_RSP) - return 1; - else - return 0; + rc = !!(entry->generic.first & IBMVNIC_CRQ_CMD_RSP); + + /* Ensure that the SCRQ valid flag is loaded prior to loading the + * contents of the SCRQ descriptor + */ + dma_rmb(); + + return rc; } static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter, @@ -3592,8 +3584,8 @@ static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *adapter, } spin_unlock_irqrestore(&scrq->lock, flags); - /* Ensure that the entire buffer descriptor has been - * loaded before reading its contents + /* Ensure that the SCRQ valid flag is loaded prior to loading the + * contents of the SCRQ descriptor */ dma_rmb(); -- GitLab From 2719cb445da5fec698e961abdf75cf9e4d61fba4 Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Fri, 29 Jan 2021 19:19:05 -0600 Subject: [PATCH 1778/2012] ibmvnic: remove unnecessary rmb() inside ibmvnic_poll rmb() can be removed since: 1. pending_scrq() has dma_rmb() at the function end; 2. dma_rmb(), though weaker, is enough here. Signed-off-by: Lijun Pan Acked-by: Dwip Banerjee Acked-by: Thomas Falcon Reviewed-by: Brian King Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 331ebca2f57a2..0ed169ef1cfcb 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2510,7 +2510,6 @@ restart_poll: if (napi_complete_done(napi, frames_processed)) { enable_scrq_irq(adapter, rx_scrq); if (pending_scrq(adapter, rx_scrq)) { - rmb(); if (napi_reschedule(napi)) { disable_scrq_irq(adapter, rx_scrq); goto restart_poll; -- GitLab From 3ef14e463f6ed0218710f56b97e1a7d0448784d2 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 25 Feb 2020 14:37:39 -0800 Subject: [PATCH 1779/2012] net/mlx5e: Separate between netdev objects and mlx5e profiles initialization 1) Initialize netdevice features and structures on netdevice allocation and outside of the mlx5e profile. 2) As now mlx5e netdevice private params will be setup on profile init only after netdevice features are already set, we add a call to netde_update_features() to resolve any conflict. This is nice since we reuse the fix_features ndo code if a profile wants different default features, instead of duplicating features conflict resolution code on profile initialization. 3) With this we achieve total separation between mlx5e profiles and netdevices, and will allow replacing mlx5e profiles on the fly to reuse the same netdevice for multiple profiles. e.g. for uplink representor profile as shown in the following patch 4) Profile callbacks are not allowed to touch netdev->features directly anymore, since in downstream patch we will detach/attach netdev dynamically to profile, hence we move the code dealing with netdev->features from profile->init() to fix_features ndo, and we will call netdev_update_features() on mlx5e_attach_netdev(profile, netdev); Signed-off-by: Saeed Mahameed Reviewed-by: Roi Dayan --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 23 ++-- .../net/ethernet/mellanox/mlx5/core/en_main.c | 127 +++++++++--------- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 51 ++++--- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 23 ++-- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.h | 5 +- .../mellanox/mlx5/core/ipoib/ipoib_vlan.c | 6 +- 6 files changed, 121 insertions(+), 114 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 39f389cc40fc9..bf5de1e791341 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -895,8 +895,7 @@ extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic; struct mlx5e_profile { int (*init)(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, void *ppriv); + struct net_device *netdev); void (*cleanup)(struct mlx5e_priv *priv); int (*init_rx)(struct mlx5e_priv *priv); void (*cleanup_rx)(struct mlx5e_priv *priv); @@ -1155,24 +1154,22 @@ int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, struct ethtool_pauseparam *pauseparam); /* mlx5e generic netdev management API */ +static inline unsigned int mlx5e_calc_max_nch(struct mlx5e_priv *priv) +{ + return priv->netdev->num_rx_queues / max_t(u8, priv->profile->rq_groups, 1); +} + int mlx5e_netdev_init(struct net_device *netdev, struct mlx5e_priv *priv, - struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile, - void *ppriv); + struct mlx5_core_dev *mdev); void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv); -struct net_device* -mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, - int nch, void *ppriv); +struct net_device * +mlx5e_create_netdev(struct mlx5_core_dev *mdev, unsigned int txqs, unsigned int rxqs); int mlx5e_attach_netdev(struct mlx5e_priv *priv); void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); -void mlx5e_build_nic_params(struct mlx5e_priv *priv, - struct mlx5e_xsk *xsk, - struct mlx5e_rss_params *rss_params, - struct mlx5e_params *params, - u16 mtu); +void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu); void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index aad3887e3c1a3..260ced27014d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4093,6 +4093,7 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, if (!params->vlan_strip_disable) netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n"); } + if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) { if (features & NETIF_F_LRO) { netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n"); @@ -4928,15 +4929,15 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, tirc_default_config[tt].rx_hash_fields; } -void mlx5e_build_nic_params(struct mlx5e_priv *priv, - struct mlx5e_xsk *xsk, - struct mlx5e_rss_params *rss_params, - struct mlx5e_params *params, - u16 mtu) +void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu) { + struct mlx5e_rss_params *rss_params = &priv->rss_params; + struct mlx5e_params *params = &priv->channels.params; struct mlx5_core_dev *mdev = priv->mdev; u8 rx_cq_period_mode; + priv->max_nch = mlx5e_calc_max_nch(priv); + params->sw_mtu = mtu; params->hard_mtu = MLX5E_ETH_HARD_MTU; params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2, @@ -4994,6 +4995,11 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, /* AF_XDP */ params->xsk = xsk; + + /* Do not update netdev->features directly in here + * on mlx5e_attach_netdev() we will call mlx5e_update_features() + * To update netdev->features please modify mlx5e_fix_features() + */ } static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) @@ -5146,18 +5152,12 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_RXFCS; netdev->features = netdev->hw_features; - if (!priv->channels.params.lro_en) - netdev->features &= ~NETIF_F_LRO; + /* Defaults */ if (fcs_enabled) netdev->features &= ~NETIF_F_RXALL; - - if (!priv->channels.params.scatter_fcs_en) - netdev->features &= ~NETIF_F_RXFCS; - - /* prefere CQE compression over rxhash */ - if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) - netdev->features &= ~NETIF_F_RXHASH; + netdev->features &= ~NETIF_F_LRO; + netdev->features &= ~NETIF_F_RXFCS; #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) if (FT_CAP(flow_modify_en) && @@ -5223,33 +5223,27 @@ void mlx5e_destroy_q_counters(struct mlx5e_priv *priv) } static int mlx5e_nic_init(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) + struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_rss_params *rss = &priv->rss_params; int err; - err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv); - if (err) - return err; - - mlx5e_build_nic_params(priv, &priv->xsk, rss, &priv->channels.params, - netdev->mtu); + mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu); mlx5e_timestamp_init(priv); err = mlx5e_ipsec_init(priv); if (err) mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err); + err = mlx5e_tls_init(priv); if (err) mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); - mlx5e_build_nic_netdev(netdev); + err = mlx5e_devlink_port_register(priv); if (err) mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); + mlx5e_health_create_reporters(priv); return 0; @@ -5261,7 +5255,6 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) mlx5e_devlink_port_unregister(priv); mlx5e_tls_cleanup(priv); mlx5e_ipsec_cleanup(priv); - mlx5e_netdev_cleanup(priv->netdev, priv); } static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) @@ -5464,21 +5457,14 @@ static const struct mlx5e_profile mlx5e_nic_profile = { }; /* mlx5e generic netdev management API (move to en_common.c) */ - -/* mlx5e_netdev_init/cleanup must be called from profile->init/cleanup callbacks */ int mlx5e_netdev_init(struct net_device *netdev, struct mlx5e_priv *priv, - struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile, - void *ppriv) + struct mlx5_core_dev *mdev) { /* priv init */ priv->mdev = mdev; priv->netdev = netdev; - priv->profile = profile; - priv->ppriv = ppriv; priv->msglevel = MLX5E_MSG_LEVEL; - priv->max_nch = netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1); priv->max_opened_tc = 1; if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL)) @@ -5518,35 +5504,24 @@ void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv) kvfree(priv->htb.qos_sq_stats); } -struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile, - int nch, - void *ppriv) +struct net_device * +mlx5e_create_netdev(struct mlx5_core_dev *mdev, unsigned int txqs, unsigned int rxqs) { struct net_device *netdev; - unsigned int ptp_txqs = 0; - int qos_sqs = 0; int err; - if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) - ptp_txqs = profile->max_tc; - - if (mlx5_qos_is_supported(mdev)) - qos_sqs = mlx5e_qos_max_leaf_nodes(mdev); - - netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), - nch * profile->max_tc + ptp_txqs + qos_sqs, - nch * profile->rq_groups); + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), txqs, rxqs); if (!netdev) { mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); return NULL; } - err = profile->init(mdev, netdev, profile, ppriv); + err = mlx5e_netdev_init(netdev, netdev_priv(netdev), mdev); if (err) { - mlx5_core_err(mdev, "failed to init mlx5e profile %d\n", err); + mlx5_core_err(mdev, "mlx5e_netdev_init failed, err=%d\n", err); goto err_free_netdev; } + dev_net_set(netdev, mlx5_core_net(mdev)); return netdev; @@ -5556,14 +5531,23 @@ err_free_netdev: return NULL; } +static void mlx5e_update_features(struct net_device *netdev) +{ + if (netdev->reg_state != NETREG_REGISTERED) + return; /* features will be updated on netdev registration */ + + rtnl_lock(); + netdev_update_features(netdev); + rtnl_unlock(); +} + int mlx5e_attach_netdev(struct mlx5e_priv *priv) { const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED; - const struct mlx5e_profile *profile; + const struct mlx5e_profile *profile = priv->profile; int max_nch; int err; - profile = priv->profile; clear_bit(MLX5E_STATE_DESTROYING, &priv->state); /* max number of channels may have changed */ @@ -5603,6 +5587,8 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) if (profile->enable) profile->enable(priv); + mlx5e_update_features(priv->netdev); + return 0; err_cleanup_tx: @@ -5631,11 +5617,9 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv) void mlx5e_destroy_netdev(struct mlx5e_priv *priv) { - const struct mlx5e_profile *profile = priv->profile; struct net_device *netdev = priv->netdev; - if (profile->cleanup) - profile->cleanup(priv); + mlx5e_netdev_cleanup(netdev, priv); free_netdev(netdev); } @@ -5681,28 +5665,48 @@ static int mlx5e_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); + const struct mlx5e_profile *profile = &mlx5e_nic_profile; struct mlx5_core_dev *mdev = edev->mdev; struct net_device *netdev; pm_message_t state = {}; - void *priv; + unsigned int txqs, rxqs, ptp_txqs = 0; + struct mlx5e_priv *priv; + int qos_sqs = 0; int err; int nch; + if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) + ptp_txqs = profile->max_tc; + + if (mlx5_qos_is_supported(mdev)) + qos_sqs = mlx5e_qos_max_leaf_nodes(mdev); + nch = mlx5e_get_max_num_channels(mdev); - netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, NULL); + txqs = nch * profile->max_tc + ptp_txqs + qos_sqs; + rxqs = nch * profile->rq_groups; + netdev = mlx5e_create_netdev(mdev, txqs, rxqs); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); return -ENOMEM; } - dev_net_set(netdev, mlx5_core_net(mdev)); + mlx5e_build_nic_netdev(netdev); + priv = netdev_priv(netdev); dev_set_drvdata(&adev->dev, priv); + priv->profile = profile; + priv->ppriv = NULL; + err = profile->init(mdev, netdev); + if (err) { + mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err); + goto err_destroy_netdev; + } + err = mlx5e_resume(adev); if (err) { mlx5_core_err(mdev, "mlx5e_resume failed, %d\n", err); - goto err_destroy_netdev; + goto err_profile_cleanup; } err = register_netdev(netdev); @@ -5718,6 +5722,8 @@ static int mlx5e_probe(struct auxiliary_device *adev, err_resume: mlx5e_suspend(adev, state); +err_profile_cleanup: + profile->cleanup(priv); err_destroy_netdev: mlx5e_destroy_netdev(priv); return err; @@ -5731,6 +5737,7 @@ static void mlx5e_remove(struct auxiliary_device *adev) mlx5e_dcbnl_delete_app(priv); unregister_netdev(priv->netdev); mlx5e_suspend(adev, state); + priv->profile->cleanup(priv); mlx5e_destroy_netdev(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 629ae6ccb4cd6..d94d2ff9d312f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -684,7 +684,10 @@ static void mlx5e_build_rep_params(struct net_device *netdev) MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + priv->max_nch = mlx5e_calc_max_nch(priv); params = &priv->channels.params; + + params->num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS; params->hard_mtu = MLX5E_ETH_HARD_MTU; params->sw_mtu = netdev->mtu; @@ -710,12 +713,11 @@ static void mlx5e_build_rep_params(struct net_device *netdev) mlx5e_build_rss_params(&priv->rss_params, params->num_channels); } -static void mlx5e_build_rep_netdev(struct net_device *netdev) +static void mlx5e_build_rep_netdev(struct net_device *netdev, + struct mlx5_core_dev *mdev, + struct mlx5_eswitch_rep *rep) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; - struct mlx5_core_dev *mdev = priv->mdev; SET_NETDEV_DEV(netdev, mdev->device); if (rep->vport == MLX5_VPORT_UPLINK) { @@ -755,22 +757,11 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) } static int mlx5e_init_rep(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) + struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - int err; - - err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv); - if (err) - return err; - - priv->channels.params.num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS; mlx5e_build_rep_params(netdev); - mlx5e_build_rep_netdev(netdev); - mlx5e_timestamp_init(priv); return 0; @@ -778,7 +769,6 @@ static int mlx5e_init_rep(struct mlx5_core_dev *mdev, static void mlx5e_cleanup_rep(struct mlx5e_priv *priv) { - mlx5e_netdev_cleanup(priv->netdev, priv); } static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) @@ -1201,6 +1191,8 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) struct mlx5e_rep_priv *rpriv; struct devlink_port *dl_port; struct net_device *netdev; + struct mlx5e_priv *priv; + unsigned int txqs, rxqs; int nch, err; rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); @@ -1210,10 +1202,13 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) /* rpriv->rep to be looked up when profile->init() is called */ rpriv->rep = rep; - nch = mlx5e_get_max_num_channels(dev); profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_rep_profile; - netdev = mlx5e_create_netdev(dev, profile, nch, rpriv); + + nch = mlx5e_get_max_num_channels(dev); + txqs = nch * profile->max_tc; + rxqs = nch * profile->rq_groups; + netdev = mlx5e_create_netdev(dev, txqs, rxqs); if (!netdev) { mlx5_core_warn(dev, "Failed to create representor netdev for vport %d\n", @@ -1222,7 +1217,8 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) return -EINVAL; } - dev_net_set(netdev, mlx5_core_net(dev)); + mlx5e_build_rep_netdev(netdev, dev, rep); + rpriv->netdev = netdev; rep->rep_data[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); @@ -1233,12 +1229,21 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) goto err_destroy_netdev; } + priv = netdev_priv(netdev); + priv->profile = profile; + priv->ppriv = rpriv; + err = profile->init(dev, netdev); + if (err) { + netdev_warn(netdev, "rep profile init failed, %d\n", err); + goto err_destroy_mdev_resources; + } + err = mlx5e_attach_netdev(netdev_priv(netdev)); if (err) { netdev_warn(netdev, "Failed to attach representor netdev for vport %d\n", rep->vport); - goto err_destroy_mdev_resources; + goto err_cleanup_profile; } err = mlx5e_rep_neigh_init(rpriv); @@ -1268,6 +1273,9 @@ err_neigh_cleanup: err_detach_netdev: mlx5e_detach_netdev(netdev_priv(netdev)); +err_cleanup_profile: + priv->profile->cleanup(priv); + err_destroy_mdev_resources: if (rep->vport == MLX5_VPORT_UPLINK) mlx5e_destroy_mdev_resources(dev); @@ -1294,6 +1302,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) unregister_netdev(netdev); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); + priv->profile->cleanup(priv); if (rep->vport == MLX5_VPORT_UPLINK) mlx5e_destroy_mdev_resources(priv->mdev); mlx5e_destroy_netdev(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 97b5fcb1f4064..5889029c2adf2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -72,23 +72,14 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, } /* Called directly after IPoIB netdevice was created to initialize SW structs */ -int mlx5i_init(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) +int mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); - int err; - - err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv); - if (err) - return err; mlx5e_set_netdev_mtu_boundaries(priv); netdev->mtu = netdev->max_mtu; - mlx5e_build_nic_params(priv, NULL, &priv->rss_params, &priv->channels.params, - netdev->mtu); + mlx5e_build_nic_params(priv, NULL, netdev->mtu); mlx5i_build_nic_params(mdev, &priv->channels.params); mlx5e_timestamp_init(priv); @@ -753,7 +744,14 @@ static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num, goto destroy_ht; } - prof->init(mdev, netdev, prof, ipriv); + err = mlx5e_netdev_init(netdev, epriv, mdev); + if (err) + goto destroy_mdev_resources; + + epriv->profile = prof; + epriv->ppriv = ipriv; + + prof->init(mdev, netdev); err = mlx5e_attach_netdev(epriv); if (err) @@ -777,6 +775,7 @@ detach: prof->cleanup(epriv); if (ipriv->sub_interface) return err; +destroy_mdev_resources: mlx5e_destroy_mdev_resources(mdev); destroy_ht: mlx5i_pkey_qpn_ht_cleanup(netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index b79dc1e28c418..99d46fda9f82f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -87,10 +87,7 @@ void mlx5i_dev_cleanup(struct net_device *dev); int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); /* Parent profile functions */ -int mlx5i_init(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv); +int mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev); void mlx5i_cleanup(struct mlx5e_priv *priv); int mlx5i_update_nic_rx(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 7163d9f6c4a6f..3d0a18a0bed4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -276,14 +276,12 @@ static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu) /* Called directly after IPoIB netdevice was created to initialize SW structs */ static int mlx5i_pkey_init(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) + struct net_device *netdev) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); int err; - err = mlx5i_init(mdev, netdev, profile, ppriv); + err = mlx5i_init(mdev, netdev); if (err) return err; -- GitLab From c4d7eb57687f358cd498ea3624519236af8db97e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sun, 22 Mar 2020 23:17:14 -0700 Subject: [PATCH 1780/2012] net/mxl5e: Add change profile method Port nic netdevice will be used as uplink representor in downstream patches. Add change profile method to allow changing a mlx5e netdevice profile dynamically. Signed-off-by: Saeed Mahameed Reviewed-by: Roi Dayan --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 7 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 68 ++++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- 3 files changed, 73 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index bf5de1e791341..fa461cfd64106 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1154,9 +1154,10 @@ int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, struct ethtool_pauseparam *pauseparam); /* mlx5e generic netdev management API */ -static inline unsigned int mlx5e_calc_max_nch(struct mlx5e_priv *priv) +static inline unsigned int +mlx5e_calc_max_nch(struct mlx5e_priv *priv, const struct mlx5e_profile *profile) { - return priv->netdev->num_rx_queues / max_t(u8, priv->profile->rq_groups, 1); + return priv->netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1); } int mlx5e_netdev_init(struct net_device *netdev, @@ -1168,6 +1169,8 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, unsigned int txqs, unsigned int int mlx5e_attach_netdev(struct mlx5e_priv *priv); void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); +int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, + const struct mlx5e_profile *new_profile, void *new_ppriv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu); void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 260ced27014d9..91f23871ded55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4936,7 +4936,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 struct mlx5_core_dev *mdev = priv->mdev; u8 rx_cq_period_mode; - priv->max_nch = mlx5e_calc_max_nch(priv); + priv->max_nch = mlx5e_calc_max_nch(priv, priv->profile); params->sw_mtu = mtu; params->hard_mtu = MLX5E_ETH_HARD_MTU; @@ -5461,6 +5461,8 @@ int mlx5e_netdev_init(struct net_device *netdev, struct mlx5e_priv *priv, struct mlx5_core_dev *mdev) { + memset(priv, 0, sizeof(*priv)); + /* priv init */ priv->mdev = mdev; priv->netdev = netdev; @@ -5615,6 +5617,70 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv) cancel_work_sync(&priv->update_stats_work); } +static int +mlx5e_netdev_attach_profile(struct mlx5e_priv *priv, + const struct mlx5e_profile *new_profile, void *new_ppriv) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5e_netdev_init(netdev, priv, mdev); + if (err) { + mlx5_core_err(mdev, "mlx5e_netdev_init failed, err=%d\n", err); + return err; + } + priv->profile = new_profile; + priv->ppriv = new_ppriv; + err = new_profile->init(priv->mdev, priv->netdev); + if (err) + return err; + err = mlx5e_attach_netdev(priv); + if (err) + new_profile->cleanup(priv); + return err; +} + +int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, + const struct mlx5e_profile *new_profile, void *new_ppriv) +{ + unsigned int new_max_nch = mlx5e_calc_max_nch(priv, new_profile); + const struct mlx5e_profile *orig_profile = priv->profile; + void *orig_ppriv = priv->ppriv; + int err, rollback_err; + + /* sanity */ + if (new_max_nch != priv->max_nch) { + netdev_warn(priv->netdev, + "%s: Replacing profile with different max channles\n", + __func__); + return -EINVAL; + } + + /* cleanup old profile */ + mlx5e_detach_netdev(priv); + priv->profile->cleanup(priv); + mlx5e_netdev_cleanup(priv->netdev, priv); + + err = mlx5e_netdev_attach_profile(priv, new_profile, new_ppriv); + if (err) { /* roll back to original profile */ + netdev_warn(priv->netdev, "%s: new profile init failed, %d\n", + __func__, err); + goto rollback; + } + + return 0; + +rollback: + rollback_err = mlx5e_netdev_attach_profile(priv, orig_profile, orig_ppriv); + if (rollback_err) { + netdev_err(priv->netdev, + "%s: failed to rollback to orig profile, %d\n", + __func__, rollback_err); + } + return err; +} + void mlx5e_destroy_netdev(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index d94d2ff9d312f..c8a0f4c88d4ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -684,7 +684,7 @@ static void mlx5e_build_rep_params(struct net_device *netdev) MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - priv->max_nch = mlx5e_calc_max_nch(priv); + priv->max_nch = mlx5e_calc_max_nch(priv, priv->profile); params = &priv->channels.params; params->num_channels = MLX5E_REP_PARAMS_DEF_NUM_CHANNELS; -- GitLab From c9fd1e33e989d14fe695c33bf8c7b935b3bea111 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 16 Sep 2020 10:10:30 +0300 Subject: [PATCH 1781/2012] net/mlx5e: Refactor mlx5e_netdev_init/cleanup to mlx5e_priv_init/cleanup We actually initialize priv and not netdev. The only call to set netdev carrier will be moved in the following commit. Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 8 ++++---- .../net/ethernet/mellanox/mlx5/core/en_main.c | 20 +++++++++---------- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index fa461cfd64106..8cc80c31341fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1160,10 +1160,10 @@ mlx5e_calc_max_nch(struct mlx5e_priv *priv, const struct mlx5e_profile *profile) return priv->netdev->num_rx_queues / max_t(u8, profile->rq_groups, 1); } -int mlx5e_netdev_init(struct net_device *netdev, - struct mlx5e_priv *priv, - struct mlx5_core_dev *mdev); -void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv); +int mlx5e_priv_init(struct mlx5e_priv *priv, + struct net_device *netdev, + struct mlx5_core_dev *mdev); +void mlx5e_priv_cleanup(struct mlx5e_priv *priv); struct net_device * mlx5e_create_netdev(struct mlx5_core_dev *mdev, unsigned int txqs, unsigned int rxqs); int mlx5e_attach_netdev(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 91f23871ded55..177e076f6cce3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5457,9 +5457,9 @@ static const struct mlx5e_profile mlx5e_nic_profile = { }; /* mlx5e generic netdev management API (move to en_common.c) */ -int mlx5e_netdev_init(struct net_device *netdev, - struct mlx5e_priv *priv, - struct mlx5_core_dev *mdev) +int mlx5e_priv_init(struct mlx5e_priv *priv, + struct net_device *netdev, + struct mlx5_core_dev *mdev) { memset(priv, 0, sizeof(*priv)); @@ -5494,7 +5494,7 @@ err_free_cpumask: return -ENOMEM; } -void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv) +void mlx5e_priv_cleanup(struct mlx5e_priv *priv) { int i; @@ -5518,9 +5518,9 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, unsigned int txqs, unsigned int return NULL; } - err = mlx5e_netdev_init(netdev, netdev_priv(netdev), mdev); + err = mlx5e_priv_init(netdev_priv(netdev), netdev, mdev); if (err) { - mlx5_core_err(mdev, "mlx5e_netdev_init failed, err=%d\n", err); + mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err); goto err_free_netdev; } dev_net_set(netdev, mlx5_core_net(mdev)); @@ -5625,9 +5625,9 @@ mlx5e_netdev_attach_profile(struct mlx5e_priv *priv, struct mlx5_core_dev *mdev = priv->mdev; int err; - err = mlx5e_netdev_init(netdev, priv, mdev); + err = mlx5e_priv_init(priv, netdev, mdev); if (err) { - mlx5_core_err(mdev, "mlx5e_netdev_init failed, err=%d\n", err); + mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err); return err; } priv->profile = new_profile; @@ -5660,7 +5660,7 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, /* cleanup old profile */ mlx5e_detach_netdev(priv); priv->profile->cleanup(priv); - mlx5e_netdev_cleanup(priv->netdev, priv); + mlx5e_priv_cleanup(priv); err = mlx5e_netdev_attach_profile(priv, new_profile, new_ppriv); if (err) { /* roll back to original profile */ @@ -5685,7 +5685,7 @@ void mlx5e_destroy_netdev(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; - mlx5e_netdev_cleanup(netdev, priv); + mlx5e_priv_cleanup(priv); free_netdev(netdev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 5889029c2adf2..8641bd9bbb539 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -103,7 +103,7 @@ int mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev) /* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ void mlx5i_cleanup(struct mlx5e_priv *priv) { - mlx5e_netdev_cleanup(priv->netdev, priv); + mlx5e_priv_cleanup(priv); } static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv) @@ -744,7 +744,7 @@ static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num, goto destroy_ht; } - err = mlx5e_netdev_init(netdev, epriv, mdev); + err = mlx5e_priv_init(epriv, netdev, mdev); if (err) goto destroy_mdev_resources; -- GitLab From 1227bbc5d09e73d4ff952d833b20be8855840401 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 16 Sep 2020 10:10:42 +0300 Subject: [PATCH 1782/2012] net/mlx5e: Move netif_carrier_off() out of mlx5e_priv_init() It's not part of priv initialization. Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 177e076f6cce3..e468d8329c2a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5483,9 +5483,6 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, if (!priv->wq) goto err_free_cpumask; - /* netdev init */ - netif_carrier_off(netdev); - return 0; err_free_cpumask: @@ -5523,6 +5520,8 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, unsigned int txqs, unsigned int mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err); goto err_free_netdev; } + + netif_carrier_off(netdev); dev_net_set(netdev, mlx5_core_net(mdev)); return netdev; @@ -5630,6 +5629,7 @@ mlx5e_netdev_attach_profile(struct mlx5e_priv *priv, mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err); return err; } + netif_carrier_off(netdev); priv->profile = new_profile; priv->ppriv = new_ppriv; err = new_profile->init(priv->mdev, priv->netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 8641bd9bbb539..1eeca45cfcdf2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -76,6 +76,7 @@ int mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); + netif_carrier_off(netdev); mlx5e_set_netdev_mtu_boundaries(priv); netdev->mtu = netdev->max_mtu; -- GitLab From 84db6612471416984685d37ab35ad30dffc28179 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 16 Sep 2020 10:11:12 +0300 Subject: [PATCH 1783/2012] net/mlx5e: Move set vxlan nic info to profile init Since its profile dependent let's init the vxlan info as part of profile initialization. Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 14 ++++++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e468d8329c2a3..b9d2cb6f178d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5101,8 +5101,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; - mlx5e_vxlan_set_netdev_info(priv); - if (mlx5e_tunnel_any_tx_proto_supported(mdev)) { netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; @@ -5229,6 +5227,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, int err; mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu); + mlx5e_vxlan_set_netdev_info(priv); mlx5e_timestamp_init(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index c8a0f4c88d4ba..45669a1db4269 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -717,15 +717,12 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev, struct mlx5_core_dev *mdev, struct mlx5_eswitch_rep *rep) { - struct mlx5e_priv *priv = netdev_priv(netdev); - SET_NETDEV_DEV(netdev, mdev->device); if (rep->vport == MLX5_VPORT_UPLINK) { netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep; /* we want a persistent mac for the uplink rep */ mlx5_query_mac_address(mdev, netdev->dev_addr); netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops; - mlx5e_vxlan_set_netdev_info(priv); mlx5e_dcbnl_build_rep_netdev(netdev); } else { netdev->netdev_ops = &mlx5e_netdev_ops_rep; @@ -767,6 +764,15 @@ static int mlx5e_init_rep(struct mlx5_core_dev *mdev, return 0; } +static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev, + struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_vxlan_set_netdev_info(priv); + return mlx5e_init_rep(mdev, netdev); +} + static void mlx5e_cleanup_rep(struct mlx5e_priv *priv) { } @@ -1165,7 +1171,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = { }; static const struct mlx5e_profile mlx5e_uplink_rep_profile = { - .init = mlx5e_init_rep, + .init = mlx5e_init_ul_rep, .cleanup = mlx5e_cleanup_rep, .init_rx = mlx5e_init_ul_rep_rx, .cleanup_rx = mlx5e_cleanup_ul_rep_rx, -- GitLab From 9ba33339c043addc4aee241fd7ac37593f7c9e7e Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 25 Jan 2021 11:28:07 +0200 Subject: [PATCH 1784/2012] net/mlx5e: Avoid false lock depenency warning on tc_ht To avoid false lock dependency warning set the tc_ht lock class different than the lock class of the ht being used when deleting last flow from a group and then deleting a group, we get into del_sw_flow_group() which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but it's different than the ht->mutex here. ====================================================== WARNING: possible circular locking dependency detected 5.11.0-rc4_net_next_mlx5_949fdcc #1 Not tainted ------------------------------------------------------ modprobe/12950 is trying to acquire lock: ffff88816510f910 (&node->lock){++++}-{3:3}, at: mlx5_del_flow_rules+0x2a/0x210 [mlx5_core] but task is already holding lock: ffff88815834e3e8 (&ht->mutex){+.+.}-{3:3}, at: rhashtable_free_and_destroy+0x37/0x340 which lock already depends on the new lock. Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 8fd38ad8113bb..280ea1e1e039d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -190,6 +190,14 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct, }; +/* To avoid false lock dependency warning set the tc_ht lock + * class different than the lock class of the ht being used when deleting + * last flow from a group and then deleting a group, we get into del_sw_flow_group() + * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but + * it's different than the ht->mutex here. + */ +static struct lock_class_key tc_ht_lock_key; + static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); void @@ -5215,6 +5223,8 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) if (err) return err; + lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key); + if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) { attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED | MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; @@ -5333,6 +5343,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) if (err) goto err_ht_init; + lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key); + return err; err_ht_init: -- GitLab From 6b424e13b01003b52ed9624067abb027789c7bb6 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 16 Sep 2020 10:11:38 +0300 Subject: [PATCH 1785/2012] net/mlx5e: Move representor neigh init into profile enable Also cleanup neigh in profile disable. This is for logical separation. Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/rep/neigh.c | 18 ++++++++---- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 29 ++++++++++--------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c index 58e27038c947b..616ee585a9855 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c @@ -279,7 +279,7 @@ int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); if (err) - return err; + goto out_err; INIT_LIST_HEAD(&neigh_update->neigh_list); mutex_init(&neigh_update->encap_lock); @@ -287,14 +287,19 @@ int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) mlx5e_rep_neigh_stats_work); mlx5e_rep_neigh_update_init_interval(rpriv); - rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event; - err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb); + neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event; + err = register_netevent_notifier(&neigh_update->netevent_nb); if (err) - goto out_err; + goto out_notifier; return 0; -out_err: +out_notifier: + neigh_update->netevent_nb.notifier_call = NULL; rhashtable_destroy(&neigh_update->neigh_ht); +out_err: + netdev_warn(rpriv->netdev, + "Failed to initialize neighbours handling for vport %d\n", + rpriv->rep->vport); return err; } @@ -303,6 +308,9 @@ void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + if (!rpriv->neigh_update.netevent_nb.notifier_call) + return; + unregister_netevent_notifier(&neigh_update->netevent_nb); flush_workqueue(priv->wq); /* flush neigh update works */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 45669a1db4269..84eeaa33033f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1051,7 +1051,17 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) static void mlx5e_rep_enable(struct mlx5e_priv *priv) { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + mlx5e_set_netdev_mtu_boundaries(priv); + mlx5e_rep_neigh_init(rpriv); +} + +static void mlx5e_rep_disable(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + mlx5e_rep_neigh_cleanup(rpriv); } static int mlx5e_update_rep_rx(struct mlx5e_priv *priv) @@ -1086,6 +1096,7 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) { + struct mlx5e_rep_priv *rpriv = priv->ppriv; struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; u16 max_mtu; @@ -1104,12 +1115,15 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) mlx5_notifier_register(mdev, &priv->events_nb); mlx5e_dcbnl_initialize(priv); mlx5e_dcbnl_init_app(priv); + mlx5e_rep_neigh_init(rpriv); } static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) { + struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_core_dev *mdev = priv->mdev; + mlx5e_rep_neigh_cleanup(rpriv); mlx5e_dcbnl_delete_app(priv); mlx5_notifier_unregister(mdev, &priv->events_nb); mlx5e_rep_tc_disable(priv); @@ -1161,6 +1175,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = { .init_tx = mlx5e_init_rep_tx, .cleanup_tx = mlx5e_cleanup_rep_tx, .enable = mlx5e_rep_enable, + .disable = mlx5e_rep_disable, .update_rx = mlx5e_update_rep_rx, .update_stats = mlx5e_stats_update_ndo_stats, .rx_handlers = &mlx5e_rx_handlers_rep, @@ -1252,20 +1267,12 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) goto err_cleanup_profile; } - err = mlx5e_rep_neigh_init(rpriv); - if (err) { - netdev_warn(netdev, - "Failed to initialized neighbours handling for vport %d\n", - rep->vport); - goto err_detach_netdev; - } - err = register_netdev(netdev); if (err) { netdev_warn(netdev, "Failed to register representor netdev for vport %d\n", rep->vport); - goto err_neigh_cleanup; + goto err_detach_netdev; } dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport); @@ -1273,9 +1280,6 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) devlink_port_type_eth_set(dl_port, netdev); return 0; -err_neigh_cleanup: - mlx5e_rep_neigh_cleanup(rpriv); - err_detach_netdev: mlx5e_detach_netdev(netdev_priv(netdev)); @@ -1306,7 +1310,6 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) if (dl_port) devlink_port_type_clear(dl_port); unregister_netdev(netdev); - mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); priv->profile->cleanup(priv); if (rep->vport == MLX5_VPORT_UPLINK) -- GitLab From 7637e499e219ae89e5c42d947b96603ef3ab5a44 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 30 Dec 2019 14:41:53 +0200 Subject: [PATCH 1786/2012] net/mlx5e: Enable napi in channel's activation stage The channel's napi is first needed upon activation, not creation. Minimize its enabled scope by moving it from the channel's open/close stage into the activate/deactivate stage. Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 12 ++++++------ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 14 ++++++-------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index eeddd1137dda5..a76cfefec708c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -428,16 +428,13 @@ static int mlx5e_ptp_open_queues(struct mlx5e_port_ptp *c, if (err) return err; - napi_enable(&c->napi); - err = mlx5e_ptp_open_txqsqs(c, cparams); if (err) - goto disable_napi; + goto close_cqs; return 0; -disable_napi: - napi_disable(&c->napi); +close_cqs: mlx5e_ptp_close_cqs(c); return err; @@ -446,7 +443,6 @@ disable_napi: static void mlx5e_ptp_close_queues(struct mlx5e_port_ptp *c) { mlx5e_ptp_close_txqsqs(c); - napi_disable(&c->napi); mlx5e_ptp_close_cqs(c); } @@ -515,6 +511,8 @@ void mlx5e_ptp_activate_channel(struct mlx5e_port_ptp *c) { int tc; + napi_enable(&c->napi); + for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->ptpsq[tc].txqsq); } @@ -525,4 +523,6 @@ void mlx5e_ptp_deactivate_channel(struct mlx5e_port_ptp *c) for (tc = 0; tc < c->num_tc; tc++) mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq); + + napi_disable(&c->napi); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b9d2cb6f178d5..41c611197211a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1892,13 +1892,11 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, if (err) goto err_close_rx_cq; - napi_enable(&c->napi); - spin_lock_init(&c->async_icosq_lock); err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq); if (err) - goto err_disable_napi; + goto err_close_xdpsq_cq; err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); if (err) @@ -1941,9 +1939,7 @@ err_close_icosq: err_close_async_icosq: mlx5e_close_icosq(&c->async_icosq); -err_disable_napi: - napi_disable(&c->napi); - +err_close_xdpsq_cq: if (c->xdp) mlx5e_close_cq(&c->rq_xdpsq.cq); @@ -1974,7 +1970,6 @@ static void mlx5e_close_queues(struct mlx5e_channel *c) mlx5e_close_sqs(c); mlx5e_close_icosq(&c->icosq); mlx5e_close_icosq(&c->async_icosq); - napi_disable(&c->napi); if (c->xdp) mlx5e_close_cq(&c->rq_xdpsq.cq); mlx5e_close_cq(&c->rq.cq); @@ -2059,6 +2054,8 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) { int tc; + napi_enable(&c->napi); + for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_icosq(&c->icosq); @@ -2081,8 +2078,9 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) mlx5e_deactivate_icosq(&c->icosq); for (tc = 0; tc < c->num_tc; tc++) mlx5e_deactivate_txqsq(&c->sq[tc]); - mlx5e_qos_deactivate_queues(c); + + napi_disable(&c->napi); } static void mlx5e_close_channel(struct mlx5e_channel *c) -- GitLab From 1dd55ba2fb7029a6558f9dc10bbe570a3ffcc767 Mon Sep 17 00:00:00 2001 From: Noam Stolero Date: Tue, 8 Dec 2020 09:31:36 +0200 Subject: [PATCH 1787/2012] net/mlx5e: Increase indirection RQ table size to 256 Increasing the indirection RQ table size from 128 to 256 improves the packet distribution over the NIC HW queues for various cases. Let's take a look at the following scenario: Assuming RSS result distributed uniformly and indirection table is filled with queues in a cyclic manner. Let N be the number of queues on a given setup. If 256%N = 128%N = 0, then all queues have the same probability to be chosen for a given RSS result. This case doesn't improves nor degrade by this change. If 256%N != 0 and 128%N != 0, there is a remainder which will favor some queues. Increasing the indirection RQ table size to 256 reduce the ratio between the favored queues probability to be selected to the rest of the queues and improves the distribution. For example, let's assume the number of queues is 56. For a table size of 128, we have 128%56=16 queues which will have a 3/128 probability to be chosen and 2/128 for the rest 40. 16 queues have 1.5 times the probability to be chosen over the other 40. For a table size of 256, we have 256%56=32 queues which will have a 5/256 probability to be chosen and 4/256 probability for the rest 24 queues. Here 32 queues have 1.25 more probability to be chosen over the other 24. This shows that the larger indirection table size would more likely cause an even distribution. This change also aligns our mlx5 driver's indirection table size with other vendors. Signed-off-by: Noam Stolero Reviewed-by: Tal Gilboa Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 8cc80c31341fb..a8e31cdd4a4ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -137,10 +137,10 @@ struct page_pool; #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2 -#define MLX5E_LOG_INDIR_RQT_SIZE 0x7 +#define MLX5E_LOG_INDIR_RQT_SIZE 0x8 #define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) #define MLX5E_MIN_NUM_CHANNELS 0x1 -#define MLX5E_MAX_NUM_CHANNELS MLX5E_INDIR_RQT_SIZE +#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2) #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_TX_XSK_POLL_BUDGET 64 -- GitLab From 1d3a3f3bfe3cfe9afc58a89b5de00efb30c55271 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 23 Dec 2020 11:45:12 -0800 Subject: [PATCH 1788/2012] net/mlx5e: remove h from printk format specifier This change fixes the checkpatch warning described in this commit commit cbacb5ab0aa0 ("docs: printk-formats: Stop encouraging use of unnecessary %h[xudi] and %hh[xudi]") Standard integer promotion is already done and %hx and %hhx is useless so do not encourage the use of %hh[xudi] or %h[xudi]. Signed-off-by: Tom Rix Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 43271a3856ca3..36381a2ed5a51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -179,7 +179,7 @@ int mlx5e_validate_params(struct mlx5e_priv *priv, struct mlx5e_params *params) stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params); if (stop_room >= sq_size) { - netdev_err(priv->netdev, "Stop room %hu is bigger than the SQ size %zu\n", + netdev_err(priv->netdev, "Stop room %u is bigger than the SQ size %zu\n", stop_room, sq_size); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 41c611197211a..2a6f9d042f517 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3888,7 +3888,7 @@ static int set_feature_lro(struct net_device *netdev, bool enable) mutex_lock(&priv->state_lock); if (enable && priv->xsk.refcnt) { - netdev_warn(netdev, "LRO is incompatible with AF_XDP (%hu XSKs are active)\n", + netdev_warn(netdev, "LRO is incompatible with AF_XDP (%u XSKs are active)\n", priv->xsk.refcnt); err = -EINVAL; goto out; @@ -4139,7 +4139,7 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk); max_mtu = min(max_mtu_frame, max_mtu_page); - netdev_err(netdev, "MTU %d is too big for an XSK running on channel %hu. Try MTU <= %d\n", + netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u. Try MTU <= %d\n", new_params->sw_mtu, ix, max_mtu); return false; } -- GitLab From 26432001b5c4c2fe513d4166da58e35f5591389d Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 3 Jan 2021 11:34:04 +0200 Subject: [PATCH 1789/2012] net/mlx5e: kTLS, Improve TLS RX workqueue scope The TLS RX workqueue is needed only when kTLS RX device offload is supported. Move its creation from the general TLS init function to the kTLS RX init. Create it once at init time if supported, avoid creation/destroy everytime the feature bit is toggled. Signed-off-by: Tariq Toukan Reviewed-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en_accel/ktls.c | 24 ++++++++++++++++--- .../mellanox/mlx5/core/en_accel/tls.c | 7 ------ 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index 1b392696280d2..95293ee0d38da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -2,6 +2,7 @@ // Copyright (c) 2019 Mellanox Technologies. #include "en.h" +#include "en_accel/tls.h" #include "en_accel/ktls.h" #include "en_accel/ktls_utils.h" #include "en_accel/fs_tcp.h" @@ -86,16 +87,33 @@ int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable) int mlx5e_ktls_init_rx(struct mlx5e_priv *priv) { - int err = 0; + int err; - if (priv->netdev->features & NETIF_F_HW_TLS_RX) + if (!mlx5_accel_is_ktls_rx(priv->mdev)) + return 0; + + priv->tls->rx_wq = create_singlethread_workqueue("mlx5e_tls_rx"); + if (!priv->tls->rx_wq) + return -ENOMEM; + + if (priv->netdev->features & NETIF_F_HW_TLS_RX) { err = mlx5e_accel_fs_tcp_create(priv); + if (err) { + destroy_workqueue(priv->tls->rx_wq); + return err; + } + } - return err; + return 0; } void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv) { + if (!mlx5_accel_is_ktls_rx(priv->mdev)) + return; + if (priv->netdev->features & NETIF_F_HW_TLS_RX) mlx5e_accel_fs_tcp_destroy(priv); + + destroy_workqueue(priv->tls->rx_wq); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c index fee991f5ee7c3..d6b21b899dbcc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c @@ -231,12 +231,6 @@ int mlx5e_tls_init(struct mlx5e_priv *priv) if (!tls) return -ENOMEM; - tls->rx_wq = create_singlethread_workqueue("mlx5e_tls_rx"); - if (!tls->rx_wq) { - kfree(tls); - return -ENOMEM; - } - priv->tls = tls; return 0; } @@ -248,7 +242,6 @@ void mlx5e_tls_cleanup(struct mlx5e_priv *priv) if (!tls) return; - destroy_workqueue(tls->rx_wq); kfree(tls); priv->tls = NULL; } -- GitLab From 8271e341ed631b9fb393d04a4f406defb601a76d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 7 Jan 2021 20:24:12 -0800 Subject: [PATCH 1790/2012] net/mlx5e: accel, remove redundant space Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 6488098d27006..959bb6cd7203d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -85,7 +85,7 @@ mlx5e_tx_tunnel_accel(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, u16 ih } mlx5e_set_eseg_swp(skb, eseg, &swp_spec); - if (skb_vlan_tag_present(skb) && ihs) + if (skb_vlan_tag_present(skb) && ihs) mlx5e_eseg_swp_offsets_add_vlan(eseg); } -- GitLab From 902c02458925c49062984b7cfe746410321b6a0b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 7 Jan 2021 20:49:57 -0800 Subject: [PATCH 1791/2012] net/mlx5e: CT: remove useless conversion to PTR_ERR then ERR_PTR Just return the ptr directly. Reported-by: Jakub Kicinski Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index e417904ae17f2..40aaa105b2fcd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -770,7 +770,6 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_counter *shared_counter; struct mlx5_ct_entry *rev_entry; __be16 tmp_port; - int ret; /* get the reversed tuple */ tmp_port = rev_tuple.port.src; @@ -804,10 +803,8 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, mutex_unlock(&ct_priv->shared_counter_lock); shared_counter = mlx5_tc_ct_counter_create(ct_priv); - if (IS_ERR(shared_counter)) { - ret = PTR_ERR(shared_counter); - return ERR_PTR(ret); - } + if (IS_ERR(shared_counter)) + return shared_counter; shared_counter->is_shared = true; refcount_set(&shared_counter->refcount, 1); -- GitLab From a283ea1b97163d21e0f1a3df387b71787042b990 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 29 Oct 2020 01:35:47 +0200 Subject: [PATCH 1792/2012] net/mlx5: DR, Avoid unnecessary csum recalculation on supporting devices If as part of the actions the TTL of the packet is modified, the packet's checksum needs to be recalculated. Connect-X6DX can handle this csum recalculation natively. Older devices require this additional recalculation. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/steering/dr_action.c | 9 +++++---- .../net/ethernet/mellanox/mlx5/core/steering/dr_ste.c | 5 +++++ .../net/ethernet/mellanox/mlx5/core/steering/dr_types.h | 2 ++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 27c2b8416d029..28a7971cac6ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -447,7 +447,8 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, case DR_ACTION_TYP_MODIFY_HDR: attr.modify_index = action->rewrite.index; attr.modify_actions = action->rewrite.num_of_actions; - recalc_cs_required = action->rewrite.modify_ttl; + recalc_cs_required = action->rewrite.modify_ttl && + !mlx5dr_ste_supp_ttl_cs_recalc(&dmn->info.caps); break; case DR_ACTION_TYP_L2_TO_TNL_L2: case DR_ACTION_TYP_L2_TO_TNL_L3: @@ -501,9 +502,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, *new_hw_ste_arr_sz = nic_matcher->num_of_builders; last_ste = ste_arr + DR_STE_SIZE * (nic_matcher->num_of_builders - 1); - /* Due to a HW bug, modifying TTL on RX flows will cause an incorrect - * checksum calculation. In this case we will use a FW table to - * recalculate. + /* Due to a HW bug in some devices, modifying TTL on RX flows will + * cause an incorrect checksum calculation. In this case we will + * use a FW table to recalculate. */ if (dmn->type == MLX5DR_DOMAIN_TYPE_FDB && rx_rule && recalc_cs_required && dest_action) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 9cd5c50c5d42d..f49abc7a4b9b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -18,6 +18,11 @@ static u32 dr_ste_crc32_calc(const void *input_data, size_t length) return (__force u32)htonl(crc); } +bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps) +{ + return caps->sw_format_ver > MLX5_STEERING_FORMAT_CONNECTX_5; +} + u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl) { struct dr_hw_ste_format *hw_ste = (struct dr_hw_ste_format *)hw_ste_p; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index a8b497cbb844e..4af0e4e6a13c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -1211,6 +1211,8 @@ int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev, u32 group_id, struct mlx5dr_cmd_fte_info *fte); +bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps); + struct mlx5dr_fw_recalc_cs_ft { u64 rx_icm_addr; u32 table_id; -- GitLab From ed5e83a3c02948dad9dc4e68fb4e535baa5da630 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Mon, 1 Feb 2021 18:11:10 +0200 Subject: [PATCH 1793/2012] net/mlx5: Fix function calculation for page trees The function calculation always results in a value of 0. This works generally, but when the release all pages feature is enabled it will result in crashes. Fixes: 0aa128475d33 ("net/mlx5: Maintain separate page trees for ECPF and PF functions") Signed-off-by: Daniel Jurgens Reported-by: Colin Ian King Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index eaa8958e24d79..c0656d4782e1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -76,7 +76,7 @@ enum { static u32 get_function(u16 func_id, bool ec_function) { - return func_id & (ec_function << 16); + return (u32)func_id | (ec_function << 16); } static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function) -- GitLab From a5bfe6b4675e0eefbd9418055b5cc6e89af27eb4 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 20 Jan 2021 17:41:18 +0200 Subject: [PATCH 1794/2012] net/mlx5: Fix leak upon failure of rule creation When creation of a new rule that requires allocation of an FTE fails, need to call to tree_put_node on the FTE in order to release its' resource. Fixes: cefc23554fc2 ("net/mlx5: Fix FTE cleanup") Signed-off-by: Maor Gottlieb Reviewed-by: Alaa Hleihel Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 0fcee702b808b..ee4d86c1f4360 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1760,6 +1760,7 @@ search_again_locked: if (!fte_tmp) continue; rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp); + /* No error check needed here, because insert_fte() is not called */ up_write_ref_node(&fte_tmp->node, false); tree_put_node(&fte_tmp->node, false); kmem_cache_free(steering->ftes_cache, fte); @@ -1812,6 +1813,8 @@ skip_search: up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); + if (IS_ERR(rule)) + tree_put_node(&fte->node, false); return rule; } rule = ERR_PTR(-ENOENT); @@ -1910,6 +1913,8 @@ search_again_locked: up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); + if (IS_ERR(rule)) + tree_put_node(&fte->node, false); tree_put_node(&g->node, false); return rule; -- GitLab From 5a2ba25a55c4dc0f143567c99aede768b6628ebd Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Thu, 28 Jan 2021 14:37:59 +0200 Subject: [PATCH 1795/2012] net/mlx5e: Update max_opened_tc also when channels are closed max_opened_tc is used for stats, so that potentially non-zero stats won't disappear when num_tc decreases. However, mlx5e_setup_tc_mqprio fails to update it in the flow where channels are closed. This commit fixes it. The new value of priv->channels.params.num_tc is always checked on exit. In case of errors it will just be the old value, and in case of success it will be the updated value. Fixes: 05909babce53 ("net/mlx5e: Avoid reset netdev stats on configuration changes") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a9d824a9cb05a..3fc7d18ac868b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3627,12 +3627,10 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_num_channels_changed_ctx, NULL); - if (err) - goto out; - priv->max_opened_tc = max_t(u8, priv->max_opened_tc, - new_channels.params.num_tc); out: + priv->max_opened_tc = max_t(u8, priv->max_opened_tc, + priv->channels.params.num_tc); mutex_unlock(&priv->state_lock); return err; } -- GitLab From a34ffec8af8ff1c730697a99e09ec7b74a3423b6 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Sun, 31 Jan 2021 18:47:15 +0200 Subject: [PATCH 1796/2012] net/mlx5e: Release skb in case of failure in tc update skb In case of failure in tc update skb the packet is dropped without freeing the skb. Fixed by freeing the skb in case failure in tc update skb. Fixes: d6d27782864f ("net/mlx5: E-Switch, Restore chain id on miss") Fixes: c75690972228 ("net/mlx5e: Add tc chains offload support for nic flows") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7f5851c612181..ca4b55839a8a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1262,8 +1262,10 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); if (mlx5e_cqe_regb_chain(cqe)) - if (!mlx5e_tc_update_skb(cqe, skb)) + if (!mlx5e_tc_update_skb(cqe, skb)) { + dev_kfree_skb_any(skb); goto free_wqe; + } napi_gro_receive(rq->cq.napi, skb); @@ -1316,8 +1318,10 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) if (rep->vlan && skb_vlan_tag_present(skb)) skb_vlan_pop(skb); - if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv)) + if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv)) { + dev_kfree_skb_any(skb); goto free_wqe; + } napi_gro_receive(rq->cq.napi, skb); @@ -1371,8 +1375,10 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); - if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv)) + if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv)) { + dev_kfree_skb_any(skb); goto mpwrq_cqe_out; + } napi_gro_receive(rq->cq.napi, skb); @@ -1528,8 +1534,10 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); if (mlx5e_cqe_regb_chain(cqe)) - if (!mlx5e_tc_update_skb(cqe, skb)) + if (!mlx5e_tc_update_skb(cqe, skb)) { + dev_kfree_skb_any(skb); goto mpwrq_cqe_out; + } napi_gro_receive(rq->cq.napi, skb); -- GitLab From 88c7a9fd9bdd3e453f04018920964c6f848a591a Mon Sep 17 00:00:00 2001 From: Xie He Date: Sun, 31 Jan 2021 21:57:06 -0800 Subject: [PATCH 1797/2012] net: lapb: Copy the skb before sending a packet When sending a packet, we will prepend it with an LAPB header. This modifies the shared parts of a cloned skb, so we should copy the skb rather than just clone it, before we prepend the header. In "Documentation/networking/driver.rst" (the 2nd point), it states that drivers shouldn't modify the shared parts of a cloned skb when transmitting. The "dev_queue_xmit_nit" function in "net/core/dev.c", which is called when an skb is being sent, clones the skb and sents the clone to AF_PACKET sockets. Because the LAPB drivers first remove a 1-byte pseudo-header before handing over the skb to us, if we don't copy the skb before prepending the LAPB header, the first byte of the packets received on AF_PACKET sockets can be corrupted. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xie He Acked-by: Martin Schiller Link: https://lore.kernel.org/r/20210201055706.415842-1-xie.he.0141@gmail.com Signed-off-by: Jakub Kicinski --- net/lapb/lapb_out.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/lapb/lapb_out.c b/net/lapb/lapb_out.c index 7a4d0715d1c32..a966d29c772d9 100644 --- a/net/lapb/lapb_out.c +++ b/net/lapb/lapb_out.c @@ -82,7 +82,8 @@ void lapb_kick(struct lapb_cb *lapb) skb = skb_dequeue(&lapb->write_queue); do { - if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { + skbn = skb_copy(skb, GFP_ATOMIC); + if (!skbn) { skb_queue_head(&lapb->write_queue, skb); break; } -- GitLab From 43f4a20a1266d393840ce010f547486d14cc0071 Mon Sep 17 00:00:00 2001 From: Stefan Chulski Date: Mon, 1 Feb 2021 11:35:39 +0200 Subject: [PATCH 1798/2012] net: mvpp2: TCAM entry enable should be written after SRAM data Last TCAM data contains TCAM enable bit. It should be written after SRAM data before entry enabled. Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit") Signed-off-by: Stefan Chulski Link: https://lore.kernel.org/r/1612172139-28343-1-git-send-email-stefanc@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index a30eb90ba3d28..dd590086fe6a5 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -29,16 +29,16 @@ static int mvpp2_prs_hw_write(struct mvpp2 *priv, struct mvpp2_prs_entry *pe) /* Clear entry invalidation bit */ pe->tcam[MVPP2_PRS_TCAM_INV_WORD] &= ~MVPP2_PRS_TCAM_INV_MASK; - /* Write tcam index - indirect access */ - mvpp2_write(priv, MVPP2_PRS_TCAM_IDX_REG, pe->index); - for (i = 0; i < MVPP2_PRS_TCAM_WORDS; i++) - mvpp2_write(priv, MVPP2_PRS_TCAM_DATA_REG(i), pe->tcam[i]); - /* Write sram index - indirect access */ mvpp2_write(priv, MVPP2_PRS_SRAM_IDX_REG, pe->index); for (i = 0; i < MVPP2_PRS_SRAM_WORDS; i++) mvpp2_write(priv, MVPP2_PRS_SRAM_DATA_REG(i), pe->sram[i]); + /* Write tcam index - indirect access */ + mvpp2_write(priv, MVPP2_PRS_TCAM_IDX_REG, pe->index); + for (i = 0; i < MVPP2_PRS_TCAM_WORDS; i++) + mvpp2_write(priv, MVPP2_PRS_TCAM_DATA_REG(i), pe->tcam[i]); + return 0; } -- GitLab From a11148e6fcce2ae53f47f0a442d098d860b4f7db Mon Sep 17 00:00:00 2001 From: Sabyrzhan Tasbolatov Date: Tue, 2 Feb 2021 02:32:33 +0600 Subject: [PATCH 1799/2012] net/rds: restrict iovecs length for RDS_CMSG_RDMA_ARGS syzbot found WARNING in rds_rdma_extra_size [1] when RDS_CMSG_RDMA_ARGS control message is passed with user-controlled 0x40001 bytes of args->nr_local, causing order >= MAX_ORDER condition. The exact value 0x40001 can be checked with UIO_MAXIOV which is 0x400. So for kcalloc() 0x400 iovecs with sizeof(struct rds_iovec) = 0x10 is the closest limit, with 0x10 leftover. Same condition is currently done in rds_cmsg_rdma_args(). [1] WARNING: mm/page_alloc.c:5011 [..] Call Trace: alloc_pages_current+0x18c/0x2a0 mm/mempolicy.c:2267 alloc_pages include/linux/gfp.h:547 [inline] kmalloc_order+0x2e/0xb0 mm/slab_common.c:837 kmalloc_order_trace+0x14/0x120 mm/slab_common.c:853 kmalloc_array include/linux/slab.h:592 [inline] kcalloc include/linux/slab.h:621 [inline] rds_rdma_extra_size+0xb2/0x3b0 net/rds/rdma.c:568 rds_rm_size net/rds/send.c:928 [inline] Reported-by: syzbot+1bd2b07f93745fa38425@syzkaller.appspotmail.com Signed-off-by: Sabyrzhan Tasbolatov Acked-by: Santosh Shilimkar Link: https://lore.kernel.org/r/20210201203233.1324704-1-snovitoll@gmail.com Signed-off-by: Jakub Kicinski --- net/rds/rdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 1d0afb1dd77b5..6f1a50d50d06d 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -565,6 +565,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args, if (args->nr_local == 0) return -EINVAL; + if (args->nr_local > UIO_MAXIOV) + return -EMSGSIZE; + iov->iov = kcalloc(args->nr_local, sizeof(struct rds_iovec), GFP_KERNEL); -- GitLab From cc9f07a838c4988ed244d0907cb71d54b85482a5 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 1 Feb 2021 21:50:56 +0100 Subject: [PATCH 1800/2012] r8169: fix WoL on shutdown if CONFIG_DEBUG_SHIRQ is set So far phy_disconnect() is called before free_irq(). If CONFIG_DEBUG_SHIRQ is set and interrupt is shared, then free_irq() creates an "artificial" interrupt by calling the interrupt handler. The "link change" flag is set in the interrupt status register, causing phylib to eventually call phy_suspend(). Because the net_device is detached from the PHY already, the PHY driver can't recognize that WoL is configured and powers down the PHY. Fixes: f1e911d5d0df ("r8169: add basic phylib support") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/fe732c2c-a473-9088-3974-df83cfbd6efd@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index f2269c9f5f055..0d78408b4e269 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4704,10 +4704,10 @@ static int rtl8169_close(struct net_device *dev) cancel_work_sync(&tp->wk.work); - phy_disconnect(tp->phydev); - free_irq(pci_irq_vector(pdev, 0), tp); + phy_disconnect(tp->phydev); + dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray, tp->RxPhyAddr); dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray, -- GitLab From 4ace7a6e287b7e3b33276cd9fe870c326f880480 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Feb 2021 08:55:25 +0300 Subject: [PATCH 1801/2012] net: ipa: pass correct dma_handle to dma_free_coherent() The "ring->addr = addr;" assignment is done a few lines later so we can't use "ring->addr" yet. The correct dma_handle is "addr". Fixes: 650d1603825d ("soc: qcom: ipa: the generic software interface") Signed-off-by: Dan Carpenter Reviewed-by: Alex Elder Link: https://lore.kernel.org/r/YBjpTU2oejkNIULT@mwanda Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 14d9a791924bf..b8f39e48a0093 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -1373,7 +1373,7 @@ static int gsi_ring_alloc(struct gsi *gsi, struct gsi_ring *ring, u32 count) /* Hardware requires a 2^n ring size, with alignment equal to size */ ring->virt = dma_alloc_coherent(dev, size, &addr, GFP_KERNEL); if (ring->virt && addr % size) { - dma_free_coherent(dev, size, ring->virt, ring->addr); + dma_free_coherent(dev, size, ring->virt, addr); dev_err(dev, "unable to alloc 0x%zx-aligned ring buffer\n", size); return -EINVAL; /* Not a good error value, but distinct */ -- GitLab From e6cdd6d80baedadb96d7060a509f51769e53021d Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 1 Feb 2021 17:26:06 -0600 Subject: [PATCH 1802/2012] net: ipa: add a missing __iomem attribute The virt local variable in gsi_channel_state() does not have an __iomem attribute but should. Fix this. Signed-off-by: Alex Elder Reviewed-by: Amy Parker Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index b8f39e48a0093..34e5f2155d620 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -440,7 +440,7 @@ static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id) static enum gsi_channel_state gsi_channel_state(struct gsi_channel *channel) { u32 channel_id = gsi_channel_id(channel); - void *virt = channel->gsi->virt; + void __iomem *virt = channel->gsi->virt; u32 val; val = ioread32(virt + GSI_CH_C_CNTXT_0_OFFSET(channel_id)); -- GitLab From 088f8a2396d813e7ee49272a1a59b55139c81e64 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 1 Feb 2021 17:26:07 -0600 Subject: [PATCH 1803/2012] net: ipa: be explicit about endianness Sparse warns that the assignment of the metadata mask for a QMAP endpoint in ipa_endpoint_init_hdr_metadata_mask() is a bad assignment. We know we want the mask value to be big endian, even though the value we write is in host byte order. Use a __force tag to indicate we really mean it. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_endpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 9f4be9812a1f3..448d89da1e456 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -588,7 +588,7 @@ static void ipa_endpoint_init_hdr_metadata_mask(struct ipa_endpoint *endpoint) /* Note that HDR_ENDIANNESS indicates big endian header fields */ if (endpoint->data->qmap) - val = cpu_to_be32(IPA_ENDPOINT_QMAP_METADATA_MASK); + val = (__force u32)cpu_to_be32(IPA_ENDPOINT_QMAP_METADATA_MASK); iowrite32(val, endpoint->ipa->reg_virt + offset); } -- GitLab From c13899f187285eaa5bfc30f8692888ba2e7765cb Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 1 Feb 2021 17:26:08 -0600 Subject: [PATCH 1804/2012] net: ipa: use the right accessor in ipa_endpoint_status_skip() When extracting the destination endpoint ID from the status in ipa_endpoint_status_skip(), u32_get_bits() is used. This happens to work, but it's wrong: the structure field is only 8 bits wide instead of 32. Fix this by using u8_get_bits() to get the destination endpoint ID. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_endpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 448d89da1e456..612afece303f3 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -1164,8 +1164,8 @@ static bool ipa_endpoint_status_skip(struct ipa_endpoint *endpoint, return true; if (!status->pkt_len) return true; - endpoint_id = u32_get_bits(status->endp_dst_idx, - IPA_STATUS_DST_IDX_FMASK); + endpoint_id = u8_get_bits(status->endp_dst_idx, + IPA_STATUS_DST_IDX_FMASK); if (endpoint_id != endpoint->endpoint_id) return true; -- GitLab From 113b6ea09ccd46157d8d37fa9fabf1ca2315e503 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 1 Feb 2021 17:26:09 -0600 Subject: [PATCH 1805/2012] net: ipa: fix two format specifier errors Fix two format specifiers that used %lu for a size_t in "ipa_mem.c". Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_mem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c index 0cc3a3374caa2..f25029b9ec857 100644 --- a/drivers/net/ipa/ipa_mem.c +++ b/drivers/net/ipa/ipa_mem.c @@ -336,7 +336,7 @@ static void ipa_imem_exit(struct ipa *ipa) size = iommu_unmap(domain, ipa->imem_iova, ipa->imem_size); if (size != ipa->imem_size) - dev_warn(dev, "unmapped %zu IMEM bytes, expected %lu\n", + dev_warn(dev, "unmapped %zu IMEM bytes, expected %zu\n", size, ipa->imem_size); } else { dev_err(dev, "couldn't get IPA IOMMU domain for IMEM\n"); @@ -440,7 +440,7 @@ static void ipa_smem_exit(struct ipa *ipa) size = iommu_unmap(domain, ipa->smem_iova, ipa->smem_size); if (size != ipa->smem_size) - dev_warn(dev, "unmapped %zu SMEM bytes, expected %lu\n", + dev_warn(dev, "unmapped %zu SMEM bytes, expected %zu\n", size, ipa->smem_size); } else { -- GitLab From 6c9f18f294c4a1a6d8b1097e39c325481664ee1c Mon Sep 17 00:00:00 2001 From: Andreas Oetken Date: Tue, 2 Feb 2021 10:03:04 +0100 Subject: [PATCH 1806/2012] net: hsr: align sup_multicast_addr in struct hsr_priv to u16 boundary sup_multicast_addr is passed to ether_addr_equal for address comparison which casts the address inputs to u16 leading to an unaligned access. Aligning the sup_multicast_addr to u16 boundary fixes the issue. Signed-off-by: Andreas Oetken Link: https://lore.kernel.org/r/20210202090304.2740471-1-ennoerlangen@gmail.com Signed-off-by: Jakub Kicinski --- net/hsr/hsr_main.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 7dc92ce5a1340..a9c30a608e35d 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -217,7 +217,10 @@ struct hsr_priv { u8 net_id; /* for PRP, it occupies most significant 3 bits * of lan_id */ - unsigned char sup_multicast_addr[ETH_ALEN]; + unsigned char sup_multicast_addr[ETH_ALEN] __aligned(sizeof(u16)); + /* Align to u16 boundary to avoid unaligned access + * in ether_addr_equal + */ #ifdef CONFIG_DEBUG_FS struct dentry *node_tbl_root; #endif -- GitLab From 5bb98b2cfc2bd995010e71ce3147bdbca4dce92a Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sun, 31 Jan 2021 00:47:22 +0100 Subject: [PATCH 1807/2012] arcnet: use new tasklet API This converts the driver to use the new tasklet API introduced in commit 12cc923f1ccc ("tasklet: Introduce new initialization API") Signed-off-by: Emil Renner Berthing Signed-off-by: Jakub Kicinski --- drivers/net/arcnet/arcnet.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index d76dd7d14299e..1bad1866ae462 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -427,9 +427,9 @@ out: rtnl_unlock(); } -static void arcnet_reply_tasklet(unsigned long data) +static void arcnet_reply_tasklet(struct tasklet_struct *t) { - struct arcnet_local *lp = (struct arcnet_local *)data; + struct arcnet_local *lp = from_tasklet(lp, t, reply_tasklet); struct sk_buff *ackskb, *skb; struct sock_exterr_skb *serr; @@ -530,8 +530,7 @@ int arcnet_open(struct net_device *dev) arc_cont(D_PROTO, "\n"); } - tasklet_init(&lp->reply_tasklet, arcnet_reply_tasklet, - (unsigned long)lp); + tasklet_setup(&lp->reply_tasklet, arcnet_reply_tasklet); arc_printk(D_INIT, dev, "arcnet_open: resetting card.\n"); -- GitLab From ca5ae9e44ece723317d21b00778bd0afcc3493db Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sun, 31 Jan 2021 00:47:23 +0100 Subject: [PATCH 1808/2012] caif_virtio: use new tasklet API This converts the driver to use the new tasklet API introduced in commit 12cc923f1ccc ("tasklet: Introduce new initialization API") Signed-off-by: Emil Renner Berthing Signed-off-by: Jakub Kicinski --- drivers/net/caif/caif_virtio.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index 47a6d62b75111..106f089eb2a87 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -598,9 +598,9 @@ err: return NETDEV_TX_OK; } -static void cfv_tx_release_tasklet(unsigned long drv) +static void cfv_tx_release_tasklet(struct tasklet_struct *t) { - struct cfv_info *cfv = (struct cfv_info *)drv; + struct cfv_info *cfv = from_tasklet(cfv, t, tx_release_tasklet); cfv_release_used_buf(cfv->vq_tx); } @@ -716,9 +716,7 @@ static int cfv_probe(struct virtio_device *vdev) cfv->ctx.head = USHRT_MAX; netif_napi_add(netdev, &cfv->napi, cfv_rx_poll, CFV_DEFAULT_QUOTA); - tasklet_init(&cfv->tx_release_tasklet, - cfv_tx_release_tasklet, - (unsigned long)cfv); + tasklet_setup(&cfv->tx_release_tasklet, cfv_tx_release_tasklet); /* Carrier is off until netdevice is opened */ netif_carrier_off(netdev); -- GitLab From 08267523110a5f528c7b400091b273d5f70b14af Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sun, 31 Jan 2021 00:47:24 +0100 Subject: [PATCH 1809/2012] ifb: use new tasklet API This converts the driver to use the new tasklet API introduced in commit 12cc923f1ccc ("tasklet: Introduce new initialization API") Signed-off-by: Emil Renner Berthing Signed-off-by: Jakub Kicinski --- drivers/net/ifb.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index fa63d4dee0ba3..ab7022582154e 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -59,9 +59,9 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev); static int ifb_open(struct net_device *dev); static int ifb_close(struct net_device *dev); -static void ifb_ri_tasklet(unsigned long _txp) +static void ifb_ri_tasklet(struct tasklet_struct *t) { - struct ifb_q_private *txp = (struct ifb_q_private *)_txp; + struct ifb_q_private *txp = from_tasklet(txp, t, ifb_tasklet); struct netdev_queue *txq; struct sk_buff *skb; @@ -170,8 +170,7 @@ static int ifb_dev_init(struct net_device *dev) __skb_queue_head_init(&txp->tq); u64_stats_init(&txp->rsync); u64_stats_init(&txp->tsync); - tasklet_init(&txp->ifb_tasklet, ifb_ri_tasklet, - (unsigned long)txp); + tasklet_setup(&txp->ifb_tasklet, ifb_ri_tasklet); netif_tx_start_queue(netdev_get_tx_queue(dev, i)); } return 0; -- GitLab From 64ca5aba5178483f7ee34356d6292df680fe779d Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sun, 31 Jan 2021 00:47:25 +0100 Subject: [PATCH 1810/2012] ppp: use new tasklet API This converts the async and synctty drivers to use the new tasklet API n commit 12cc923f1ccc ("tasklet: Introduce new initialization API") Signed-off-by: Emil Renner Berthing Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_async.c | 8 ++++---- drivers/net/ppp/ppp_synctty.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index 29a0917a81e60..2b66cf301b0e0 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -101,7 +101,7 @@ static void ppp_async_input(struct asyncppp *ap, const unsigned char *buf, char *flags, int count); static int ppp_async_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg); -static void ppp_async_process(unsigned long arg); +static void ppp_async_process(struct tasklet_struct *t); static void async_lcp_peek(struct asyncppp *ap, unsigned char *data, int len, int inbound); @@ -179,7 +179,7 @@ ppp_asynctty_open(struct tty_struct *tty) ap->lcp_fcs = -1; skb_queue_head_init(&ap->rqueue); - tasklet_init(&ap->tsk, ppp_async_process, (unsigned long) ap); + tasklet_setup(&ap->tsk, ppp_async_process); refcount_set(&ap->refcnt, 1); init_completion(&ap->dead); @@ -488,9 +488,9 @@ ppp_async_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg) * to the ppp_generic code, and to tell the ppp_generic code * if we can accept more output now. */ -static void ppp_async_process(unsigned long arg) +static void ppp_async_process(struct tasklet_struct *t) { - struct asyncppp *ap = (struct asyncppp *) arg; + struct asyncppp *ap = from_tasklet(ap, t, tsk); struct sk_buff *skb; /* process received packets */ diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index 0f338752c38b9..86ee5149f4f2a 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c @@ -90,7 +90,7 @@ static struct sk_buff* ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *); static int ppp_sync_send(struct ppp_channel *chan, struct sk_buff *skb); static int ppp_sync_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg); -static void ppp_sync_process(unsigned long arg); +static void ppp_sync_process(struct tasklet_struct *t); static int ppp_sync_push(struct syncppp *ap); static void ppp_sync_flush_output(struct syncppp *ap); static void ppp_sync_input(struct syncppp *ap, const unsigned char *buf, @@ -177,7 +177,7 @@ ppp_sync_open(struct tty_struct *tty) ap->raccm = ~0U; skb_queue_head_init(&ap->rqueue); - tasklet_init(&ap->tsk, ppp_sync_process, (unsigned long) ap); + tasklet_setup(&ap->tsk, ppp_sync_process); refcount_set(&ap->refcnt, 1); init_completion(&ap->dead_cmp); @@ -480,9 +480,9 @@ ppp_sync_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg) * to the ppp_generic code, and to tell the ppp_generic code * if we can accept more output now. */ -static void ppp_sync_process(unsigned long arg) +static void ppp_sync_process(struct tasklet_struct *t) { - struct syncppp *ap = (struct syncppp *) arg; + struct syncppp *ap = from_tasklet(ap, t, tsk); struct sk_buff *skb; /* process received packets */ -- GitLab From fb1eb9b31c7831ee299d2afb325f7a368225a614 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sun, 31 Jan 2021 00:47:26 +0100 Subject: [PATCH 1811/2012] net: usb: hso: use new tasklet API This converts the driver to use the new tasklet API introduced in commit 12cc923f1ccc ("tasklet: Introduce new initialization API") Signed-off-by: Emil Renner Berthing Signed-off-by: Jakub Kicinski --- drivers/net/usb/hso.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index ef6dd012b8c48..31d51346786ab 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -1213,9 +1213,10 @@ static void hso_std_serial_read_bulk_callback(struct urb *urb) * This needs to be a tasklet otherwise we will * end up recursively calling this function. */ -static void hso_unthrottle_tasklet(unsigned long data) +static void hso_unthrottle_tasklet(struct tasklet_struct *t) { - struct hso_serial *serial = (struct hso_serial *)data; + struct hso_serial *serial = from_tasklet(serial, t, + unthrottle_tasklet); unsigned long flags; spin_lock_irqsave(&serial->serial_lock, flags); @@ -1264,9 +1265,8 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp) serial->rx_state = RX_IDLE; /* Force default termio settings */ _hso_serial_set_termios(tty, NULL); - tasklet_init(&serial->unthrottle_tasklet, - hso_unthrottle_tasklet, - (unsigned long)serial); + tasklet_setup(&serial->unthrottle_tasklet, + hso_unthrottle_tasklet); result = hso_start_serial_device(serial->parent, GFP_KERNEL); if (result) { hso_stop_serial_device(serial->parent); -- GitLab From c23d544e995fd383ae96b3e414e7bdd88ae0a016 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sun, 31 Jan 2021 00:47:27 +0100 Subject: [PATCH 1812/2012] net: usb: lan78xx: use new tasklet API This converts the driver to use the new tasklet API introduced in commit 12cc923f1ccc ("tasklet: Introduce new initialization API") Signed-off-by: Emil Renner Berthing Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index bf243edeb0641..e81c5699c952e 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3375,9 +3375,9 @@ static void lan78xx_rx_bh(struct lan78xx_net *dev) netif_wake_queue(dev->net); } -static void lan78xx_bh(unsigned long param) +static void lan78xx_bh(struct tasklet_struct *t) { - struct lan78xx_net *dev = (struct lan78xx_net *)param; + struct lan78xx_net *dev = from_tasklet(dev, t, bh); struct sk_buff *skb; struct skb_data *entry; @@ -3655,7 +3655,7 @@ static int lan78xx_probe(struct usb_interface *intf, skb_queue_head_init(&dev->txq_pend); mutex_init(&dev->phy_mutex); - tasklet_init(&dev->bh, lan78xx_bh, (unsigned long)dev); + tasklet_setup(&dev->bh, lan78xx_bh); INIT_DELAYED_WORK(&dev->wq, lan78xx_delayedwork); init_usb_anchor(&dev->deferred); -- GitLab From 23a64c514631fc7c96dbe3ac3fe355e51df2a9a8 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sun, 31 Jan 2021 00:47:28 +0100 Subject: [PATCH 1813/2012] net: usb: pegasus: use new tasklet API This converts the driver to use the new tasklet API introduced in commit 12cc923f1ccc ("tasklet: Introduce new initialization API") Signed-off-by: Emil Renner Berthing Signed-off-by: Jakub Kicinski --- drivers/net/usb/pegasus.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 32e1335c94ad0..9a907182569cf 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -553,12 +553,11 @@ tl_sched: tasklet_schedule(&pegasus->rx_tl); } -static void rx_fixup(unsigned long data) +static void rx_fixup(struct tasklet_struct *t) { - pegasus_t *pegasus; + pegasus_t *pegasus = from_tasklet(pegasus, t, rx_tl); int status; - pegasus = (pegasus_t *) data; if (pegasus->flags & PEGASUS_UNPLUG) return; @@ -1129,7 +1128,7 @@ static int pegasus_probe(struct usb_interface *intf, goto out1; } - tasklet_init(&pegasus->rx_tl, rx_fixup, (unsigned long) pegasus); + tasklet_setup(&pegasus->rx_tl, rx_fixup); INIT_DELAYED_WORK(&pegasus->carrier_check, check_carrier); -- GitLab From f3163f1cb87141c7a41a15a5d4c98b353f807b04 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sun, 31 Jan 2021 00:47:29 +0100 Subject: [PATCH 1814/2012] net: usb: r8152: use new tasklet API This converts the driver to use the new tasklet API introduced in commit 12cc923f1ccc ("tasklet: Introduce new initialization API") Signed-off-by: Emil Renner Berthing Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 67cd6986634fb..0d7d2938e21d8 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2393,11 +2393,9 @@ static void tx_bottom(struct r8152 *tp) } while (res == 0); } -static void bottom_half(unsigned long data) +static void bottom_half(struct tasklet_struct *t) { - struct r8152 *tp; - - tp = (struct r8152 *)data; + struct r8152 *tp = from_tasklet(tp, t, tx_tl); if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; @@ -6714,7 +6712,7 @@ static int rtl8152_probe(struct usb_interface *intf, mutex_init(&tp->control); INIT_DELAYED_WORK(&tp->schedule, rtl_work_func_t); INIT_DELAYED_WORK(&tp->hw_phy_work, rtl_hw_phy_work_func_t); - tasklet_init(&tp->tx_tl, bottom_half, (unsigned long)tp); + tasklet_setup(&tp->tx_tl, bottom_half); tasklet_disable(&tp->tx_tl); netdev->netdev_ops = &rtl8152_netdev_ops; -- GitLab From 1999ad32d4ff00581007543adffc465694b2e77b Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sun, 31 Jan 2021 00:47:30 +0100 Subject: [PATCH 1815/2012] net: usb: rtl8150: use new tasklet API This converts the driver to use the new tasklet API introduced in commit 12cc923f1ccc ("tasklet: Introduce new initialization API") Signed-off-by: Emil Renner Berthing Signed-off-by: Jakub Kicinski --- drivers/net/usb/rtl8150.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index bf8a60533f3e7..7656f2a3afd93 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -577,9 +577,9 @@ static void free_skb_pool(rtl8150_t *dev) dev_kfree_skb(dev->rx_skb_pool[i]); } -static void rx_fixup(unsigned long data) +static void rx_fixup(struct tasklet_struct *t) { - struct rtl8150 *dev = (struct rtl8150 *)data; + struct rtl8150 *dev = from_tasklet(dev, t, tl); struct sk_buff *skb; int status; @@ -878,7 +878,7 @@ static int rtl8150_probe(struct usb_interface *intf, return -ENOMEM; } - tasklet_init(&dev->tl, rx_fixup, (unsigned long)dev); + tasklet_setup(&dev->tl, rx_fixup); spin_lock_init(&dev->rx_pool_lock); dev->udev = udev; -- GitLab From ec99a470c7d5517c97dee6dd7953275a92c63834 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 1 Feb 2021 14:05:26 +0100 Subject: [PATCH 1816/2012] mptcp: fix length of MP_PRIO suboption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With version 0 of the protocol it was legal to encode the 'Subflow Id' in the MP_PRIO suboption, to specify which subflow would change its 'Backup' flag. This has been removed from v1 specification: thus, according to RFC 8684 §3.3.8, the resulting 'Length' for MP_PRIO changed from 4 to 3 byte. Current Linux generates / parses MP_PRIO according to the old spec, using 'Length' equal to 4, and hardcoding 1 as 'Subflow Id'; RFC compliance can improve if we change 'Length' in other to become 3, leaving a 'Nop' after the MP_PRIO suboption. In this way the kernel will emit and accept *only* MP_PRIO suboptions that are compliant to version 1 of the MPTCP protocol. unpatched 5.11-rc kernel: [root@bottarga ~]# tcpdump -tnnr unpatched.pcap | grep prio reading from file unpatched.pcap, link-type LINUX_SLL (Linux cooked v1) dropped privs to tcpdump IP 10.0.3.2.48433 > 10.0.1.1.10006: Flags [.], ack 1, win 502, options [nop,nop,TS val 4032325513 ecr 1876514270,mptcp prio non-backup id 1,mptcp dss ack 14084896651682217737], length 0 patched 5.11-rc kernel: [root@bottarga ~]# tcpdump -tnnr patched.pcap | grep prio reading from file patched.pcap, link-type LINUX_SLL (Linux cooked v1) dropped privs to tcpdump IP 10.0.3.2.49735 > 10.0.1.1.10006: Flags [.], ack 1, win 502, options [nop,nop,TS val 1276737699 ecr 2686399734,mptcp prio non-backup,nop,mptcp dss ack 18433038869082491686], length 0 Changes since v2: - when accounting for option space, don't increment 'TCPOLEN_MPTCP_PRIO' and use 'TCPOLEN_MPTCP_PRIO_ALIGN' instead, thanks to Matthieu Baerts. Changes since v1: - refactor patch to avoid using 'TCPOLEN_MPTCP_PRIO' with its old value, thanks to Geliang Tang. Fixes: 067065422fcd ("mptcp: add the outgoing MP_PRIO support") Reviewed-by: Mat Martineau Reviewed-by: Matthieu Baerts Signed-off-by: Davide Caratti Reviewed-by: Matteo Croce Link: https://lore.kernel.org/r/846cdd41e6ad6ec88ef23fee1552ab39c2f5a3d1.1612184361.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 5 +++-- net/mptcp/protocol.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index c9643344a8d74..17ad42c65087d 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -699,10 +699,11 @@ static bool mptcp_established_options_mp_prio(struct sock *sk, if (!subflow->send_mp_prio) return false; - if (remaining < TCPOLEN_MPTCP_PRIO) + /* account for the trailing 'nop' option */ + if (remaining < TCPOLEN_MPTCP_PRIO_ALIGN) return false; - *size = TCPOLEN_MPTCP_PRIO; + *size = TCPOLEN_MPTCP_PRIO_ALIGN; opts->suboptions |= OPTION_MPTCP_PRIO; opts->backup = subflow->request_bkup; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 1460705aaad05..07ee319f78472 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -60,7 +60,8 @@ #define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 24 #define TCPOLEN_MPTCP_PORT_LEN 4 #define TCPOLEN_MPTCP_RM_ADDR_BASE 4 -#define TCPOLEN_MPTCP_PRIO 4 +#define TCPOLEN_MPTCP_PRIO 3 +#define TCPOLEN_MPTCP_PRIO_ALIGN 4 #define TCPOLEN_MPTCP_FASTCLOSE 12 /* MPTCP MP_JOIN flags */ -- GitLab From 097b9146c0e26aabaa6ff3e5ea536a53f5254a79 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 1 Feb 2021 17:04:20 +0100 Subject: [PATCH 1817/2012] net: fix up truesize of cloned skb in skb_prepare_for_shift() Avoid the assumption that ksize(kmalloc(S)) == ksize(kmalloc(S)): when cloning an skb, save and restore truesize after pskb_expand_head(). This can occur if the allocator decides to service an allocation of the same size differently (e.g. use a different size class, or pass the allocation on to KFENCE). Because truesize is used for bookkeeping (such as sk_wmem_queued), a modified truesize of a cloned skb may result in corrupt bookkeeping and relevant warnings (such as in sk_stream_kill_queues()). Link: https://lkml.kernel.org/r/X9JR/J6dMMOy1obu@elver.google.com Reported-by: syzbot+7b99aafdcc2eedea6178@syzkaller.appspotmail.com Suggested-by: Eric Dumazet Signed-off-by: Marco Elver Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20210201160420.2826895-1-elver@google.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2af12f7e170c7..3787093239f58 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3289,7 +3289,19 @@ EXPORT_SYMBOL(skb_split); */ static int skb_prepare_for_shift(struct sk_buff *skb) { - return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + int ret = 0; + + if (skb_cloned(skb)) { + /* Save and restore truesize: pskb_expand_head() may reallocate + * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we + * cannot change truesize at this point. + */ + unsigned int save_truesize = skb->truesize; + + ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + skb->truesize = save_truesize; + } + return ret; } /** -- GitLab From 6b00a76a1db6e8898b5f09e0f09ed129ce870ce3 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 1 Feb 2021 11:28:44 -0600 Subject: [PATCH 1818/2012] net: ipa: don't thaw channel if error starting If an error occurs starting a channel, don't "thaw" it. We should assume the channel remains in a non-started state. Update the comment in gsi_channel_stop(); calls to this function are no longer retried. Signed-off-by: Alex Elder Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 204fd8ba77285..1df7775a9bee3 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -885,7 +885,9 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id) mutex_unlock(&gsi->mutex); - gsi_channel_thaw(channel); + /* Thaw the channel if successful */ + if (!ret) + gsi_channel_thaw(channel); return ret; } @@ -910,7 +912,7 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id) mutex_unlock(&gsi->mutex); - /* Thaw the channel if we need to retry (or on error) */ + /* Re-thaw the channel if an error occurred while stopping */ if (ret) gsi_channel_thaw(channel); -- GitLab From 697e834e143afa733913fe2c9bc06b5e4d139c66 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 1 Feb 2021 11:28:45 -0600 Subject: [PATCH 1819/2012] net: ipa: introduce gsi_channel_stop_retry() Create a new helper function that encapsulates issuing a set of channel stop commands, retrying if appropriate, with a short delay between attempts. Signed-off-by: Alex Elder Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 1df7775a9bee3..1d89ea6b9fe7a 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -892,15 +892,12 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id) return ret; } -/* Stop a started channel */ -int gsi_channel_stop(struct gsi *gsi, u32 channel_id) +static int gsi_channel_stop_retry(struct gsi_channel *channel) { - struct gsi_channel *channel = &gsi->channel[channel_id]; u32 retries = GSI_CHANNEL_STOP_RETRIES; + struct gsi *gsi = channel->gsi; int ret; - gsi_channel_freeze(channel); - mutex_lock(&gsi->mutex); do { @@ -912,6 +909,19 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id) mutex_unlock(&gsi->mutex); + return ret; +} + +/* Stop a started channel */ +int gsi_channel_stop(struct gsi *gsi, u32 channel_id) +{ + struct gsi_channel *channel = &gsi->channel[channel_id]; + int ret; + + gsi_channel_freeze(channel); + + ret = gsi_channel_stop_retry(channel); + /* Re-thaw the channel if an error occurred while stopping */ if (ret) gsi_channel_thaw(channel); -- GitLab From 893b838e73391c97c9388ef972899213bbb3049d Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 1 Feb 2021 11:28:46 -0600 Subject: [PATCH 1820/2012] net: ipa: introduce __gsi_channel_start() Create a new function that does most of the work of starting a channel. What's different is that it takes a flag indicating whether the channel should really be started or not. Create another new function __gsi_channel_stop() that behaves similarly. IPA v3.5.1 implements suspend using a special SUSPEND endpoint setting. If the endpoint is suspended when an I/O completes on the underlying GSI channel, a SUSPEND interrupt is generated. Newer versions of IPA do not implement the SUSPEND endpoint mode. Instead, endpoint suspend is implemented by simply stopping the underlying GSI channel. In this case, a completing I/O on a *stopped* channel causes the SUSPEND interrupt condition. These new functions put all activity related to starting or stopping a channel (including "thawing/freezing" the channel) in one place, whether or not the channel is actually started or stopped. Signed-off-by: Alex Elder Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 1d89ea6b9fe7a..32c27fa39873d 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -873,15 +873,14 @@ static void gsi_channel_deprogram(struct gsi_channel *channel) /* Nothing to do */ } -/* Start an allocated GSI channel */ -int gsi_channel_start(struct gsi *gsi, u32 channel_id) +static int __gsi_channel_start(struct gsi_channel *channel, bool start) { - struct gsi_channel *channel = &gsi->channel[channel_id]; + struct gsi *gsi = channel->gsi; int ret; mutex_lock(&gsi->mutex); - ret = gsi_channel_start_command(channel); + ret = start ? gsi_channel_start_command(channel) : 0; mutex_unlock(&gsi->mutex); @@ -892,6 +891,14 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id) return ret; } +/* Start an allocated GSI channel */ +int gsi_channel_start(struct gsi *gsi, u32 channel_id) +{ + struct gsi_channel *channel = &gsi->channel[channel_id]; + + return __gsi_channel_start(channel, true); +} + static int gsi_channel_stop_retry(struct gsi_channel *channel) { u32 retries = GSI_CHANNEL_STOP_RETRIES; @@ -912,15 +919,13 @@ static int gsi_channel_stop_retry(struct gsi_channel *channel) return ret; } -/* Stop a started channel */ -int gsi_channel_stop(struct gsi *gsi, u32 channel_id) +static int __gsi_channel_stop(struct gsi_channel *channel, bool stop) { - struct gsi_channel *channel = &gsi->channel[channel_id]; int ret; gsi_channel_freeze(channel); - ret = gsi_channel_stop_retry(channel); + ret = stop ? gsi_channel_stop_retry(channel) : 0; /* Re-thaw the channel if an error occurred while stopping */ if (ret) @@ -929,6 +934,14 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id) return ret; } +/* Stop a started channel */ +int gsi_channel_stop(struct gsi *gsi, u32 channel_id) +{ + struct gsi_channel *channel = &gsi->channel[channel_id]; + + return __gsi_channel_stop(channel, true); +} + /* Reset and reconfigure a channel, (possibly) enabling the doorbell engine */ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell) { @@ -952,12 +965,7 @@ int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop) { struct gsi_channel *channel = &gsi->channel[channel_id]; - if (stop) - return gsi_channel_stop(gsi, channel_id); - - gsi_channel_freeze(channel); - - return 0; + return __gsi_channel_stop(channel, stop); } /* Resume a suspended channel (starting will be requested if STOPPED) */ @@ -965,12 +973,7 @@ int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start) { struct gsi_channel *channel = &gsi->channel[channel_id]; - if (start) - return gsi_channel_start(gsi, channel_id); - - gsi_channel_thaw(channel); - - return 0; + return __gsi_channel_start(channel, start); } /** -- GitLab From bd1ea1e46448992a4a3dfb6e6e2c410ca069a41c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 1 Feb 2021 11:28:47 -0600 Subject: [PATCH 1821/2012] net: ipa: kill gsi_channel_freeze() and gsi_channel_thaw() Open-code gsi_channel_freeze() and gsi_channel_thaw() in all callers and get rid of these two functions. This is part of reworking the sequence of things done during channel suspend/resume and start/stop. Signed-off-by: Alex Elder Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 32c27fa39873d..049a63e21bca4 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -764,24 +764,6 @@ static void gsi_channel_trans_quiesce(struct gsi_channel *channel) } } -/* Stop channel activity. Transactions may not be allocated until thawed. */ -static void gsi_channel_freeze(struct gsi_channel *channel) -{ - gsi_channel_trans_quiesce(channel); - - napi_disable(&channel->napi); - - gsi_irq_ieob_disable_one(channel->gsi, channel->evt_ring_id); -} - -/* Allow transactions to be used on the channel again. */ -static void gsi_channel_thaw(struct gsi_channel *channel) -{ - gsi_irq_ieob_enable_one(channel->gsi, channel->evt_ring_id); - - napi_enable(&channel->napi); -} - /* Program a channel for use */ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell) { @@ -884,9 +866,10 @@ static int __gsi_channel_start(struct gsi_channel *channel, bool start) mutex_unlock(&gsi->mutex); - /* Thaw the channel if successful */ - if (!ret) - gsi_channel_thaw(channel); + if (!ret) { + gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id); + napi_enable(&channel->napi); + } return ret; } @@ -921,15 +904,19 @@ static int gsi_channel_stop_retry(struct gsi_channel *channel) static int __gsi_channel_stop(struct gsi_channel *channel, bool stop) { + struct gsi *gsi = channel->gsi; int ret; - gsi_channel_freeze(channel); + gsi_channel_trans_quiesce(channel); + napi_disable(&channel->napi); + gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id); ret = stop ? gsi_channel_stop_retry(channel) : 0; - /* Re-thaw the channel if an error occurred while stopping */ - if (ret) - gsi_channel_thaw(channel); + if (ret) { + gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id); + napi_enable(&channel->napi); + } return ret; } -- GitLab From 4fef691c9b6ab5ed92b874bc6ddfb14a34c650ab Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 1 Feb 2021 11:28:48 -0600 Subject: [PATCH 1822/2012] net: ipa: disable interrupt and NAPI after channel stop Disable both the I/O completion interrupt and NAPI polling on a channel *after* we successfully stop it rather than before. This ensures a completion occurring just before the channel is stopped gets processed. Enable NAPI polling and the interrupt *before* starting a channel rather than after, to be symmetric. A stopped channel won't generate any completion interrupts anyway. Enable NAPI before the interrupt and disable it afterward. Signed-off-by: Alex Elder Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 049a63e21bca4..9b3d5b639dac3 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -860,15 +860,18 @@ static int __gsi_channel_start(struct gsi_channel *channel, bool start) struct gsi *gsi = channel->gsi; int ret; + napi_enable(&channel->napi); + gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id); + mutex_lock(&gsi->mutex); ret = start ? gsi_channel_start_command(channel) : 0; mutex_unlock(&gsi->mutex); - if (!ret) { - gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id); - napi_enable(&channel->napi); + if (ret) { + gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id); + napi_disable(&channel->napi); } return ret; @@ -908,14 +911,11 @@ static int __gsi_channel_stop(struct gsi_channel *channel, bool stop) int ret; gsi_channel_trans_quiesce(channel); - napi_disable(&channel->napi); - gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id); ret = stop ? gsi_channel_stop_retry(channel) : 0; - - if (ret) { - gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id); - napi_enable(&channel->napi); + if (!ret) { + gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id); + napi_disable(&channel->napi); } return ret; -- GitLab From a65c0288b355a8bb71f43cf9b4c33ada51e0ec26 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 1 Feb 2021 11:28:49 -0600 Subject: [PATCH 1823/2012] net: ipa: don't disable interrupt on suspend No completion interrupts will occur while an endpoint is suspended, nor when a channel has been stopped for suspend. So there's no need to disable the interrupt during suspend and re-enable it when resuming. Without any interrupts occurring, there is no need to disable/re-enable NAPI for channel suspend/resume either. We'll only enable NAPI and the interrupt when we first start the channel, and disable it again only when it's "really" stopped. To accomplish this, move the enable/disable calls out of __gsi_channel_start() and __gsi_channel_stop(), and into gsi_channel_start() and gsi_channel_stop() instead. Add a call to napi_synchronize() to gsi_channel_suspend(), to ensure NAPI polling is done before moving on. Signed-off-by: Alex Elder Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 44 ++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 9b3d5b639dac3..dcc27b56102b0 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -860,20 +860,15 @@ static int __gsi_channel_start(struct gsi_channel *channel, bool start) struct gsi *gsi = channel->gsi; int ret; - napi_enable(&channel->napi); - gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id); + if (!start) + return 0; mutex_lock(&gsi->mutex); - ret = start ? gsi_channel_start_command(channel) : 0; + ret = gsi_channel_start_command(channel); mutex_unlock(&gsi->mutex); - if (ret) { - gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id); - napi_disable(&channel->napi); - } - return ret; } @@ -881,8 +876,19 @@ static int __gsi_channel_start(struct gsi_channel *channel, bool start) int gsi_channel_start(struct gsi *gsi, u32 channel_id) { struct gsi_channel *channel = &gsi->channel[channel_id]; + int ret; + + /* Enable NAPI and the completion interrupt */ + napi_enable(&channel->napi); + gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id); - return __gsi_channel_start(channel, true); + ret = __gsi_channel_start(channel, true); + if (ret) { + gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id); + napi_disable(&channel->napi); + } + + return ret; } static int gsi_channel_stop_retry(struct gsi_channel *channel) @@ -907,16 +913,15 @@ static int gsi_channel_stop_retry(struct gsi_channel *channel) static int __gsi_channel_stop(struct gsi_channel *channel, bool stop) { - struct gsi *gsi = channel->gsi; int ret; + /* Wait for any underway transactions to complete before stopping. */ gsi_channel_trans_quiesce(channel); ret = stop ? gsi_channel_stop_retry(channel) : 0; - if (!ret) { - gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id); - napi_disable(&channel->napi); - } + /* Finally, ensure NAPI polling has finished. */ + if (!ret) + napi_synchronize(&channel->napi); return ret; } @@ -925,8 +930,17 @@ static int __gsi_channel_stop(struct gsi_channel *channel, bool stop) int gsi_channel_stop(struct gsi *gsi, u32 channel_id) { struct gsi_channel *channel = &gsi->channel[channel_id]; + int ret; - return __gsi_channel_stop(channel, true); + /* Only disable the completion interrupt if stop is successful */ + ret = __gsi_channel_stop(channel, true); + if (ret) + return ret; + + gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id); + napi_disable(&channel->napi); + + return 0; } /* Reset and reconfigure a channel, (possibly) enabling the doorbell engine */ -- GitLab From e63169208b25f1aaf3b6dc47a1df986d260efc3f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 1 Feb 2021 11:28:50 -0600 Subject: [PATCH 1824/2012] net: ipa: expand last transaction check Transactions to send data for a network device can be allocated at any time up until the point the TX queue is stopped. It is possible for ipa_start_xmit() to be called in one context just before a the transmit queue is stopped in another. Update gsi_channel_trans_last() so that for TX channels the allocated and pending transaction lists are checked--in addition to the completed and polled lists--to determine the "last" transaction. This means any transaction that has been allocated before the TX queue is stopped will be allowed to complete before we conclude the channel is quiesced. Rework the function a bit to use a list pointer and gotos. Signed-off-by: Alex Elder Acked-by: Willem de Bruijn Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index dcc27b56102b0..53640447bf123 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -725,22 +725,38 @@ static void gsi_evt_ring_program(struct gsi *gsi, u32 evt_ring_id) gsi_evt_ring_doorbell(gsi, evt_ring_id, 0); } -/* Return the last (most recent) transaction completed on a channel. */ +/* Find the transaction whose completion indicates a channel is quiesced */ static struct gsi_trans *gsi_channel_trans_last(struct gsi_channel *channel) { struct gsi_trans_info *trans_info = &channel->trans_info; + const struct list_head *list; struct gsi_trans *trans; spin_lock_bh(&trans_info->spinlock); - if (!list_empty(&trans_info->complete)) - trans = list_last_entry(&trans_info->complete, - struct gsi_trans, links); - else if (!list_empty(&trans_info->polled)) - trans = list_last_entry(&trans_info->polled, - struct gsi_trans, links); - else - trans = NULL; + /* There is a small chance a TX transaction got allocated just + * before we disabled transmits, so check for that. + */ + if (channel->toward_ipa) { + list = &trans_info->alloc; + if (!list_empty(list)) + goto done; + list = &trans_info->pending; + if (!list_empty(list)) + goto done; + } + + /* Otherwise (TX or RX) we want to wait for anything that + * has completed, or has been polled but not released yet. + */ + list = &trans_info->complete; + if (!list_empty(list)) + goto done; + list = &trans_info->polled; + if (list_empty(list)) + list = NULL; +done: + trans = list ? list_last_entry(list, struct gsi_trans, links) : NULL; /* Caller will wait for this, so take a reference */ if (trans) -- GitLab From 9e635a21cae0650a8d1ba200888bd09a51ac4847 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 1 Feb 2021 21:47:48 +0200 Subject: [PATCH 1825/2012] netdevsim: fib: Convert the current occupancy to an atomic variable When route is added/deleted, the appropriate counter is increased/decreased to maintain number of routes. User can limit the number of routes and then according to the appropriate counter, adding more routes than the limitation is forbidden. Currently, there is one lock which protects hashtable, list and accounting. Handling the counters will be performed from both atomic context and non-atomic context, while the hashtable and the list will be used only from non-atomic context and therefore will be protected by a separate lock. Protect accounting by using an atomic variable, so lock is not needed. v2: * Use atomic64_sub() in nsim_nexthop_account()'s error path Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/fib.c | 55 ++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index f140bbca98c5d..7be603e067694 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -31,7 +31,7 @@ struct nsim_fib_entry { u64 max; - u64 num; + atomic64_t num; }; struct nsim_per_fib_data { @@ -46,7 +46,7 @@ struct nsim_fib_data { struct nsim_fib_entry nexthops; struct rhashtable fib_rt_ht; struct list_head fib_rt_list; - spinlock_t fib_lock; /* Protects hashtable, list and accounting */ + spinlock_t fib_lock; /* Protects hashtable and list */ struct notifier_block nexthop_nb; struct rhashtable nexthop_ht; struct devlink *devlink; @@ -128,7 +128,7 @@ u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, return 0; } - return max ? entry->max : entry->num; + return max ? entry->max : atomic64_read(&entry->num); } static void nsim_fib_set_max(struct nsim_fib_data *fib_data, @@ -165,14 +165,12 @@ static int nsim_fib_rule_account(struct nsim_fib_entry *entry, bool add, int err = 0; if (add) { - if (entry->num < entry->max) { - entry->num++; - } else { + if (!atomic64_add_unless(&entry->num, 1, entry->max)) { err = -ENOSPC; NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib rule entries"); } } else { - entry->num--; + atomic64_dec_if_positive(&entry->num); } return err; @@ -202,14 +200,12 @@ static int nsim_fib_account(struct nsim_fib_entry *entry, bool add, int err = 0; if (add) { - if (entry->num < entry->max) { - entry->num++; - } else { + if (!atomic64_add_unless(&entry->num, 1, entry->max)) { err = -ENOSPC; NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib entries"); } } else { - entry->num--; + atomic64_dec_if_positive(&entry->num); } return err; @@ -769,25 +765,22 @@ static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, struct fib_notifier_info *info = ptr; int err = 0; - /* IPv6 routes can be added via RAs from softIRQ. */ - spin_lock_bh(&data->fib_lock); - switch (event) { case FIB_EVENT_RULE_ADD: case FIB_EVENT_RULE_DEL: err = nsim_fib_rule_event(data, info, event == FIB_EVENT_RULE_ADD); break; - case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_APPEND: case FIB_EVENT_ENTRY_DEL: + /* IPv6 routes can be added via RAs from softIRQ. */ + spin_lock_bh(&data->fib_lock); err = nsim_fib_event(data, info, event); + spin_unlock_bh(&data->fib_lock); break; } - spin_unlock_bh(&data->fib_lock); - return notifier_from_errno(err); } @@ -847,8 +840,8 @@ static void nsim_fib_dump_inconsistent(struct notifier_block *nb) nsim_fib_rt_free(fib_rt, data); } - data->ipv4.rules.num = 0ULL; - data->ipv6.rules.num = 0ULL; + atomic64_set(&data->ipv4.rules.num, 0ULL); + atomic64_set(&data->ipv6.rules.num, 0ULL); } static struct nsim_nexthop *nsim_nexthop_create(struct nsim_fib_data *data, @@ -894,22 +887,28 @@ static void nsim_nexthop_destroy(struct nsim_nexthop *nexthop) static int nsim_nexthop_account(struct nsim_fib_data *data, u64 occ, bool add, struct netlink_ext_ack *extack) { - int err = 0; + int i, err = 0; if (add) { - if (data->nexthops.num + occ <= data->nexthops.max) { - data->nexthops.num += occ; - } else { - err = -ENOSPC; - NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported nexthops"); - } + for (i = 0; i < occ; i++) + if (!atomic64_add_unless(&data->nexthops.num, 1, + data->nexthops.max)) { + err = -ENOSPC; + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported nexthops"); + goto err_num_decrease; + } } else { - if (WARN_ON(occ > data->nexthops.num)) + if (WARN_ON(occ > atomic64_read(&data->nexthops.num))) return -EINVAL; - data->nexthops.num -= occ; + atomic64_sub(occ, &data->nexthops.num); } return err; + +err_num_decrease: + atomic64_sub(i, &data->nexthops.num); + return err; + } static int nsim_nexthop_add(struct nsim_fib_data *data, -- GitLab From 0ae3eb7b4611207e140e9772398b9f88b72d6839 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 1 Feb 2021 21:47:49 +0200 Subject: [PATCH 1826/2012] netdevsim: fib: Perform the route programming in a non-atomic context Currently, netdevsim implements dummy FIB offload and marks notified routes with RTM_F_TRAP flag. netdevsim does not defer route notifications to a work queue because it does not need to program any hardware. Given that netdevsim's purpose is to both give an example implementation and allow developers to test their code, align netdevsim to a "real" hardware device driver like mlxsw and have it also perform the route "programming" in a non-atomic context. It will be used to test route flags notifications which will be added in the next patches. The following changes are needed when route handling is performed in WQ: - Handle the accounting in the main context, to be able to return an error for adding route when all the routes are used. For FIB_EVENT_ENTRY_REPLACE increase the counter before scheduling the delayed work, and in case that this event replaces an existing route, decrease the counter as part of the delayed work. - For IPv6, cannot use fen6_info->rt->fib6_siblings list because it might be changed during handling the delayed work. Save an array with the nexthops as part of fib6_event struct, and take a reference for each nexthop to prevent them from being freed while event is queued. - Change GFP_ATOMIC allocations to GFP_KERNEL. - Use single work item that is handling a list of ordered routes. Handling routes must be processed in the order they were submitted to avoid logical errors that could lead to unexpected failures. Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Acked-by: David Ahern Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/fib.c | 467 +++++++++++++++++++++++++----------- 1 file changed, 327 insertions(+), 140 deletions(-) diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 7be603e067694..d557533d43dd1 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -46,10 +46,13 @@ struct nsim_fib_data { struct nsim_fib_entry nexthops; struct rhashtable fib_rt_ht; struct list_head fib_rt_list; - spinlock_t fib_lock; /* Protects hashtable and list */ + struct mutex fib_lock; /* Protects hashtable and list */ struct notifier_block nexthop_nb; struct rhashtable nexthop_ht; struct devlink *devlink; + struct work_struct fib_event_work; + struct list_head fib_event_queue; + spinlock_t fib_event_queue_lock; /* Protects fib event queue list */ }; struct nsim_fib_rt_key { @@ -83,6 +86,22 @@ struct nsim_fib6_rt_nh { struct fib6_info *rt; }; +struct nsim_fib6_event { + struct fib6_info **rt_arr; + unsigned int nrt6; +}; + +struct nsim_fib_event { + struct list_head list; /* node in fib queue */ + union { + struct fib_entry_notifier_info fen_info; + struct nsim_fib6_event fib6_event; + }; + struct nsim_fib_data *data; + unsigned long event; + int family; +}; + static const struct rhashtable_params nsim_fib_rt_ht_params = { .key_offset = offsetof(struct nsim_fib_rt, key), .head_offset = offsetof(struct nsim_fib_rt, ht_node), @@ -194,16 +213,13 @@ static int nsim_fib_rule_event(struct nsim_fib_data *data, return err; } -static int nsim_fib_account(struct nsim_fib_entry *entry, bool add, - struct netlink_ext_ack *extack) +static int nsim_fib_account(struct nsim_fib_entry *entry, bool add) { int err = 0; if (add) { - if (!atomic64_add_unless(&entry->num, 1, entry->max)) { + if (!atomic64_add_unless(&entry->num, 1, entry->max)) err = -ENOSPC; - NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib entries"); - } } else { atomic64_dec_if_positive(&entry->num); } @@ -250,7 +266,7 @@ nsim_fib4_rt_create(struct nsim_fib_data *data, { struct nsim_fib4_rt *fib4_rt; - fib4_rt = kzalloc(sizeof(*fib4_rt), GFP_ATOMIC); + fib4_rt = kzalloc(sizeof(*fib4_rt), GFP_KERNEL); if (!fib4_rt) return NULL; @@ -307,51 +323,52 @@ static void nsim_fib4_rt_hw_flags_set(struct net *net, } static int nsim_fib4_rt_add(struct nsim_fib_data *data, - struct nsim_fib4_rt *fib4_rt, - struct netlink_ext_ack *extack) + struct nsim_fib4_rt *fib4_rt) { struct net *net = devlink_net(data->devlink); int err; - err = nsim_fib_account(&data->ipv4.fib, true, extack); - if (err) - return err; - err = rhashtable_insert_fast(&data->fib_rt_ht, &fib4_rt->common.ht_node, nsim_fib_rt_ht_params); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Failed to insert IPv4 route"); + if (err) goto err_fib_dismiss; - } + /* Simulate hardware programming latency. */ + msleep(1); nsim_fib4_rt_hw_flags_set(net, fib4_rt, true); return 0; err_fib_dismiss: - nsim_fib_account(&data->ipv4.fib, false, extack); + /* Drop the accounting that was increased from the notification + * context when FIB_EVENT_ENTRY_REPLACE was triggered. + */ + nsim_fib_account(&data->ipv4.fib, false); return err; } static int nsim_fib4_rt_replace(struct nsim_fib_data *data, struct nsim_fib4_rt *fib4_rt, - struct nsim_fib4_rt *fib4_rt_old, - struct netlink_ext_ack *extack) + struct nsim_fib4_rt *fib4_rt_old) { struct net *net = devlink_net(data->devlink); int err; - /* We are replacing a route, so no need to change the accounting. */ + /* We are replacing a route, so need to remove the accounting which + * was increased when FIB_EVENT_ENTRY_REPLACE was triggered. + */ + err = nsim_fib_account(&data->ipv4.fib, false); + if (err) + return err; err = rhashtable_replace_fast(&data->fib_rt_ht, &fib4_rt_old->common.ht_node, &fib4_rt->common.ht_node, nsim_fib_rt_ht_params); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Failed to replace IPv4 route"); + if (err) return err; - } + msleep(1); nsim_fib4_rt_hw_flags_set(net, fib4_rt, true); nsim_fib4_rt_hw_flags_set(net, fib4_rt_old, false); @@ -363,7 +380,6 @@ static int nsim_fib4_rt_replace(struct nsim_fib_data *data, static int nsim_fib4_rt_insert(struct nsim_fib_data *data, struct fib_entry_notifier_info *fen_info) { - struct netlink_ext_ack *extack = fen_info->info.extack; struct nsim_fib4_rt *fib4_rt, *fib4_rt_old; int err; @@ -373,9 +389,9 @@ static int nsim_fib4_rt_insert(struct nsim_fib_data *data, fib4_rt_old = nsim_fib4_rt_lookup(&data->fib_rt_ht, fen_info); if (!fib4_rt_old) - err = nsim_fib4_rt_add(data, fib4_rt, extack); + err = nsim_fib4_rt_add(data, fib4_rt); else - err = nsim_fib4_rt_replace(data, fib4_rt, fib4_rt_old, extack); + err = nsim_fib4_rt_replace(data, fib4_rt, fib4_rt_old); if (err) nsim_fib4_rt_destroy(fib4_rt); @@ -386,7 +402,6 @@ static int nsim_fib4_rt_insert(struct nsim_fib_data *data, static void nsim_fib4_rt_remove(struct nsim_fib_data *data, const struct fib_entry_notifier_info *fen_info) { - struct netlink_ext_ack *extack = fen_info->info.extack; struct nsim_fib4_rt *fib4_rt; fib4_rt = nsim_fib4_rt_lookup(&data->fib_rt_ht, fen_info); @@ -395,19 +410,15 @@ static void nsim_fib4_rt_remove(struct nsim_fib_data *data, rhashtable_remove_fast(&data->fib_rt_ht, &fib4_rt->common.ht_node, nsim_fib_rt_ht_params); - nsim_fib_account(&data->ipv4.fib, false, extack); nsim_fib4_rt_destroy(fib4_rt); } static int nsim_fib4_event(struct nsim_fib_data *data, - struct fib_notifier_info *info, + struct fib_entry_notifier_info *fen_info, unsigned long event) { - struct fib_entry_notifier_info *fen_info; int err = 0; - fen_info = container_of(info, struct fib_entry_notifier_info, info); - switch (event) { case FIB_EVENT_ENTRY_REPLACE: err = nsim_fib4_rt_insert(data, fen_info); @@ -441,7 +452,7 @@ static int nsim_fib6_rt_nh_add(struct nsim_fib6_rt *fib6_rt, { struct nsim_fib6_rt_nh *fib6_rt_nh; - fib6_rt_nh = kzalloc(sizeof(*fib6_rt_nh), GFP_ATOMIC); + fib6_rt_nh = kzalloc(sizeof(*fib6_rt_nh), GFP_KERNEL); if (!fib6_rt_nh) return -ENOMEM; @@ -453,6 +464,17 @@ static int nsim_fib6_rt_nh_add(struct nsim_fib6_rt *fib6_rt, return 0; } +#if IS_ENABLED(CONFIG_IPV6) +static void nsim_rt6_release(struct fib6_info *rt) +{ + fib6_info_release(rt); +} +#else +static void nsim_rt6_release(struct fib6_info *rt) +{ +} +#endif + static void nsim_fib6_rt_nh_del(struct nsim_fib6_rt *fib6_rt, const struct fib6_info *rt) { @@ -464,22 +486,20 @@ static void nsim_fib6_rt_nh_del(struct nsim_fib6_rt *fib6_rt, fib6_rt->nhs--; list_del(&fib6_rt_nh->list); -#if IS_ENABLED(CONFIG_IPV6) - fib6_info_release(fib6_rt_nh->rt); -#endif + nsim_rt6_release(fib6_rt_nh->rt); kfree(fib6_rt_nh); } static struct nsim_fib6_rt * nsim_fib6_rt_create(struct nsim_fib_data *data, - struct fib6_entry_notifier_info *fen6_info) + struct fib6_info **rt_arr, unsigned int nrt6) { - struct fib6_info *iter, *rt = fen6_info->rt; + struct fib6_info *rt = rt_arr[0]; struct nsim_fib6_rt *fib6_rt; int i = 0; int err; - fib6_rt = kzalloc(sizeof(*fib6_rt), GFP_ATOMIC); + fib6_rt = kzalloc(sizeof(*fib6_rt), GFP_KERNEL); if (!fib6_rt) return ERR_PTR(-ENOMEM); @@ -493,32 +513,18 @@ nsim_fib6_rt_create(struct nsim_fib_data *data, */ INIT_LIST_HEAD(&fib6_rt->nh_list); - err = nsim_fib6_rt_nh_add(fib6_rt, rt); - if (err) - goto err_fib_rt_fini; - - if (!fen6_info->nsiblings) - return fib6_rt; - - list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { - if (i == fen6_info->nsiblings) - break; - - err = nsim_fib6_rt_nh_add(fib6_rt, iter); + for (i = 0; i < nrt6; i++) { + err = nsim_fib6_rt_nh_add(fib6_rt, rt_arr[i]); if (err) goto err_fib6_rt_nh_del; - i++; } - WARN_ON_ONCE(i != fen6_info->nsiblings); return fib6_rt; err_fib6_rt_nh_del: - list_for_each_entry_continue_reverse(iter, &rt->fib6_siblings, - fib6_siblings) - nsim_fib6_rt_nh_del(fib6_rt, iter); - nsim_fib6_rt_nh_del(fib6_rt, rt); -err_fib_rt_fini: + for (i--; i >= 0; i--) { + nsim_fib6_rt_nh_del(fib6_rt, rt_arr[i]); + }; nsim_fib_rt_fini(&fib6_rt->common); kfree(fib6_rt); return ERR_PTR(err); @@ -551,47 +557,31 @@ nsim_fib6_rt_lookup(struct rhashtable *fib_rt_ht, const struct fib6_info *rt) } static int nsim_fib6_rt_append(struct nsim_fib_data *data, - struct fib6_entry_notifier_info *fen6_info) + struct nsim_fib6_event *fib6_event) { - struct fib6_info *iter, *rt = fen6_info->rt; + struct fib6_info *rt = fib6_event->rt_arr[0]; struct nsim_fib6_rt *fib6_rt; - int i = 0; - int err; + int i, err; fib6_rt = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt); if (WARN_ON_ONCE(!fib6_rt)) return -EINVAL; - err = nsim_fib6_rt_nh_add(fib6_rt, rt); - if (err) - return err; - rt->trap = true; - - if (!fen6_info->nsiblings) - return 0; - - list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { - if (i == fen6_info->nsiblings) - break; - - err = nsim_fib6_rt_nh_add(fib6_rt, iter); + for (i = 0; i < fib6_event->nrt6; i++) { + err = nsim_fib6_rt_nh_add(fib6_rt, fib6_event->rt_arr[i]); if (err) goto err_fib6_rt_nh_del; - iter->trap = true; - i++; + + fib6_event->rt_arr[i]->trap = true; } - WARN_ON_ONCE(i != fen6_info->nsiblings); return 0; err_fib6_rt_nh_del: - list_for_each_entry_continue_reverse(iter, &rt->fib6_siblings, - fib6_siblings) { - iter->trap = false; - nsim_fib6_rt_nh_del(fib6_rt, iter); + for (i--; i >= 0; i--) { + fib6_event->rt_arr[i]->trap = false; + nsim_fib6_rt_nh_del(fib6_rt, fib6_event->rt_arr[i]); } - rt->trap = false; - nsim_fib6_rt_nh_del(fib6_rt, rt); return err; } @@ -605,49 +595,52 @@ static void nsim_fib6_rt_hw_flags_set(const struct nsim_fib6_rt *fib6_rt, } static int nsim_fib6_rt_add(struct nsim_fib_data *data, - struct nsim_fib6_rt *fib6_rt, - struct netlink_ext_ack *extack) + struct nsim_fib6_rt *fib6_rt) { int err; - err = nsim_fib_account(&data->ipv6.fib, true, extack); - if (err) - return err; - err = rhashtable_insert_fast(&data->fib_rt_ht, &fib6_rt->common.ht_node, nsim_fib_rt_ht_params); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Failed to insert IPv6 route"); + + if (err) goto err_fib_dismiss; - } + msleep(1); nsim_fib6_rt_hw_flags_set(fib6_rt, true); return 0; err_fib_dismiss: - nsim_fib_account(&data->ipv6.fib, false, extack); + /* Drop the accounting that was increased from the notification + * context when FIB_EVENT_ENTRY_REPLACE was triggered. + */ + nsim_fib_account(&data->ipv6.fib, false); return err; } static int nsim_fib6_rt_replace(struct nsim_fib_data *data, struct nsim_fib6_rt *fib6_rt, - struct nsim_fib6_rt *fib6_rt_old, - struct netlink_ext_ack *extack) + struct nsim_fib6_rt *fib6_rt_old) { int err; - /* We are replacing a route, so no need to change the accounting. */ + /* We are replacing a route, so need to remove the accounting which + * was increased when FIB_EVENT_ENTRY_REPLACE was triggered. + */ + err = nsim_fib_account(&data->ipv6.fib, false); + if (err) + return err; + err = rhashtable_replace_fast(&data->fib_rt_ht, &fib6_rt_old->common.ht_node, &fib6_rt->common.ht_node, nsim_fib_rt_ht_params); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Failed to replace IPv6 route"); + + if (err) return err; - } + msleep(1); nsim_fib6_rt_hw_flags_set(fib6_rt, true); nsim_fib6_rt_hw_flags_set(fib6_rt_old, false); @@ -657,21 +650,22 @@ static int nsim_fib6_rt_replace(struct nsim_fib_data *data, } static int nsim_fib6_rt_insert(struct nsim_fib_data *data, - struct fib6_entry_notifier_info *fen6_info) + struct nsim_fib6_event *fib6_event) { - struct netlink_ext_ack *extack = fen6_info->info.extack; + struct fib6_info *rt = fib6_event->rt_arr[0]; struct nsim_fib6_rt *fib6_rt, *fib6_rt_old; int err; - fib6_rt = nsim_fib6_rt_create(data, fen6_info); + fib6_rt = nsim_fib6_rt_create(data, fib6_event->rt_arr, + fib6_event->nrt6); if (IS_ERR(fib6_rt)) return PTR_ERR(fib6_rt); - fib6_rt_old = nsim_fib6_rt_lookup(&data->fib_rt_ht, fen6_info->rt); + fib6_rt_old = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt); if (!fib6_rt_old) - err = nsim_fib6_rt_add(data, fib6_rt, extack); + err = nsim_fib6_rt_add(data, fib6_rt); else - err = nsim_fib6_rt_replace(data, fib6_rt, fib6_rt_old, extack); + err = nsim_fib6_rt_replace(data, fib6_rt, fib6_rt_old); if (err) nsim_fib6_rt_destroy(fib6_rt); @@ -679,59 +673,100 @@ static int nsim_fib6_rt_insert(struct nsim_fib_data *data, return err; } -static void -nsim_fib6_rt_remove(struct nsim_fib_data *data, - const struct fib6_entry_notifier_info *fen6_info) +static void nsim_fib6_rt_remove(struct nsim_fib_data *data, + struct nsim_fib6_event *fib6_event) { - struct netlink_ext_ack *extack = fen6_info->info.extack; + struct fib6_info *rt = fib6_event->rt_arr[0]; struct nsim_fib6_rt *fib6_rt; + int i; /* Multipath routes are first added to the FIB trie and only then * notified. If we vetoed the addition, we will get a delete * notification for a route we do not have. Therefore, do not warn if * route was not found. */ - fib6_rt = nsim_fib6_rt_lookup(&data->fib_rt_ht, fen6_info->rt); + fib6_rt = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt); if (!fib6_rt) return; /* If not all the nexthops are deleted, then only reduce the nexthop * group. */ - if (fen6_info->nsiblings + 1 != fib6_rt->nhs) { - nsim_fib6_rt_nh_del(fib6_rt, fen6_info->rt); + if (fib6_event->nrt6 != fib6_rt->nhs) { + for (i = 0; i < fib6_event->nrt6; i++) + nsim_fib6_rt_nh_del(fib6_rt, fib6_event->rt_arr[i]); return; } rhashtable_remove_fast(&data->fib_rt_ht, &fib6_rt->common.ht_node, nsim_fib_rt_ht_params); - nsim_fib_account(&data->ipv6.fib, false, extack); nsim_fib6_rt_destroy(fib6_rt); } +static int nsim_fib6_event_init(struct nsim_fib6_event *fib6_event, + struct fib6_entry_notifier_info *fen6_info) +{ + struct fib6_info *rt = fen6_info->rt; + struct fib6_info **rt_arr; + struct fib6_info *iter; + unsigned int nrt6; + int i = 0; + + nrt6 = fen6_info->nsiblings + 1; + + rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC); + if (!rt_arr) + return -ENOMEM; + + fib6_event->rt_arr = rt_arr; + fib6_event->nrt6 = nrt6; + + rt_arr[0] = rt; + fib6_info_hold(rt); + + if (!fen6_info->nsiblings) + return 0; + + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { + if (i == fen6_info->nsiblings) + break; + + rt_arr[i + 1] = iter; + fib6_info_hold(iter); + i++; + } + WARN_ON_ONCE(i != fen6_info->nsiblings); + + return 0; +} + +static void nsim_fib6_event_fini(struct nsim_fib6_event *fib6_event) +{ + int i; + + for (i = 0; i < fib6_event->nrt6; i++) + nsim_rt6_release(fib6_event->rt_arr[i]); + kfree(fib6_event->rt_arr); +} + static int nsim_fib6_event(struct nsim_fib_data *data, - struct fib_notifier_info *info, + struct nsim_fib6_event *fib6_event, unsigned long event) { - struct fib6_entry_notifier_info *fen6_info; int err = 0; - fen6_info = container_of(info, struct fib6_entry_notifier_info, info); - - if (fen6_info->rt->fib6_src.plen) { - NL_SET_ERR_MSG_MOD(info->extack, "IPv6 source-specific route is not supported"); + if (fib6_event->rt_arr[0]->fib6_src.plen) return 0; - } switch (event) { case FIB_EVENT_ENTRY_REPLACE: - err = nsim_fib6_rt_insert(data, fen6_info); + err = nsim_fib6_rt_insert(data, fib6_event); break; case FIB_EVENT_ENTRY_APPEND: - err = nsim_fib6_rt_append(data, fen6_info); + err = nsim_fib6_rt_append(data, fib6_event); break; case FIB_EVENT_ENTRY_DEL: - nsim_fib6_rt_remove(data, fen6_info); + nsim_fib6_rt_remove(data, fib6_event); break; default: break; @@ -740,48 +775,165 @@ static int nsim_fib6_event(struct nsim_fib_data *data, return err; } -static int nsim_fib_event(struct nsim_fib_data *data, - struct fib_notifier_info *info, unsigned long event) +static int nsim_fib_event(struct nsim_fib_event *fib_event) { int err = 0; - switch (info->family) { + switch (fib_event->family) { case AF_INET: - err = nsim_fib4_event(data, info, event); + nsim_fib4_event(fib_event->data, &fib_event->fen_info, + fib_event->event); + fib_info_put(fib_event->fen_info.fi); break; case AF_INET6: - err = nsim_fib6_event(data, info, event); + nsim_fib6_event(fib_event->data, &fib_event->fib6_event, + fib_event->event); + nsim_fib6_event_fini(&fib_event->fib6_event); break; } return err; } +static int nsim_fib4_prepare_event(struct fib_notifier_info *info, + struct nsim_fib_event *fib_event, + unsigned long event) +{ + struct nsim_fib_data *data = fib_event->data; + struct fib_entry_notifier_info *fen_info; + struct netlink_ext_ack *extack; + int err = 0; + + fen_info = container_of(info, struct fib_entry_notifier_info, + info); + fib_event->fen_info = *fen_info; + extack = info->extack; + + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: + err = nsim_fib_account(&data->ipv4.fib, true); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib entries"); + return err; + } + break; + case FIB_EVENT_ENTRY_DEL: + nsim_fib_account(&data->ipv4.fib, false); + break; + } + + /* Take reference on fib_info to prevent it from being + * freed while event is queued. Release it afterwards. + */ + fib_info_hold(fib_event->fen_info.fi); + + return 0; +} + +static int nsim_fib6_prepare_event(struct fib_notifier_info *info, + struct nsim_fib_event *fib_event, + unsigned long event) +{ + struct nsim_fib_data *data = fib_event->data; + struct fib6_entry_notifier_info *fen6_info; + struct netlink_ext_ack *extack; + int err = 0; + + fen6_info = container_of(info, struct fib6_entry_notifier_info, + info); + + err = nsim_fib6_event_init(&fib_event->fib6_event, fen6_info); + if (err) + return err; + + extack = info->extack; + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: + err = nsim_fib_account(&data->ipv6.fib, true); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib entries"); + goto err_fib6_event_fini; + } + break; + case FIB_EVENT_ENTRY_DEL: + nsim_fib_account(&data->ipv6.fib, false); + break; + } + + return 0; + +err_fib6_event_fini: + nsim_fib6_event_fini(&fib_event->fib6_event); + return err; +} + +static int nsim_fib_event_schedule_work(struct nsim_fib_data *data, + struct fib_notifier_info *info, + unsigned long event) +{ + struct nsim_fib_event *fib_event; + int err; + + if (info->family != AF_INET && info->family != AF_INET6) + /* netdevsim does not support 'RTNL_FAMILY_IP6MR' and + * 'RTNL_FAMILY_IPMR' and should ignore them. + */ + return NOTIFY_DONE; + + fib_event = kzalloc(sizeof(*fib_event), GFP_ATOMIC); + if (!fib_event) + return NOTIFY_BAD; + + fib_event->data = data; + fib_event->event = event; + fib_event->family = info->family; + + switch (info->family) { + case AF_INET: + err = nsim_fib4_prepare_event(info, fib_event, event); + break; + case AF_INET6: + err = nsim_fib6_prepare_event(info, fib_event, event); + break; + } + + if (err) + goto err_fib_prepare_event; + + /* Enqueue the event and trigger the work */ + spin_lock_bh(&data->fib_event_queue_lock); + list_add_tail(&fib_event->list, &data->fib_event_queue); + spin_unlock_bh(&data->fib_event_queue_lock); + schedule_work(&data->fib_event_work); + + return NOTIFY_DONE; + +err_fib_prepare_event: + kfree(fib_event); + return NOTIFY_BAD; +} + static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, void *ptr) { struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, fib_nb); struct fib_notifier_info *info = ptr; - int err = 0; + int err; switch (event) { case FIB_EVENT_RULE_ADD: case FIB_EVENT_RULE_DEL: err = nsim_fib_rule_event(data, info, event == FIB_EVENT_RULE_ADD); - break; + return notifier_from_errno(err); case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_APPEND: case FIB_EVENT_ENTRY_DEL: - /* IPv6 routes can be added via RAs from softIRQ. */ - spin_lock_bh(&data->fib_lock); - err = nsim_fib_event(data, info, event); - spin_unlock_bh(&data->fib_lock); - break; + return nsim_fib_event_schedule_work(data, info, event); } - return notifier_from_errno(err); + return NOTIFY_DONE; } static void nsim_fib4_rt_free(struct nsim_fib_rt *fib_rt, @@ -792,7 +944,7 @@ static void nsim_fib4_rt_free(struct nsim_fib_rt *fib_rt, fib4_rt = container_of(fib_rt, struct nsim_fib4_rt, common); nsim_fib4_rt_hw_flags_set(devlink_net(devlink), fib4_rt, false); - nsim_fib_account(&data->ipv4.fib, false, NULL); + nsim_fib_account(&data->ipv4.fib, false); nsim_fib4_rt_destroy(fib4_rt); } @@ -803,7 +955,7 @@ static void nsim_fib6_rt_free(struct nsim_fib_rt *fib_rt, fib6_rt = container_of(fib_rt, struct nsim_fib6_rt, common); nsim_fib6_rt_hw_flags_set(fib6_rt, false); - nsim_fib_account(&data->ipv6.fib, false, NULL); + nsim_fib_account(&data->ipv6.fib, false); nsim_fib6_rt_destroy(fib6_rt); } @@ -831,6 +983,9 @@ static void nsim_fib_dump_inconsistent(struct notifier_block *nb) fib_nb); struct nsim_fib_rt *fib_rt, *fib_rt_tmp; + /* Flush the work to make sure there is no race with notifications. */ + flush_work(&data->fib_event_work); + /* The notifier block is still not registered, so we do not need to * take any locks here. */ @@ -1101,6 +1256,29 @@ static void nsim_fib_set_max_all(struct nsim_fib_data *data, } } +static void nsim_fib_event_work(struct work_struct *work) +{ + struct nsim_fib_data *data = container_of(work, struct nsim_fib_data, + fib_event_work); + struct nsim_fib_event *fib_event, *next_fib_event; + + LIST_HEAD(fib_event_queue); + + spin_lock_bh(&data->fib_event_queue_lock); + list_splice_init(&data->fib_event_queue, &fib_event_queue); + spin_unlock_bh(&data->fib_event_queue_lock); + + mutex_lock(&data->fib_lock); + list_for_each_entry_safe(fib_event, next_fib_event, &fib_event_queue, + list) { + nsim_fib_event(fib_event); + list_del(&fib_event->list); + kfree(fib_event); + cond_resched(); + } + mutex_unlock(&data->fib_lock); +} + struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, struct netlink_ext_ack *extack) { @@ -1116,12 +1294,16 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, if (err) goto err_data_free; - spin_lock_init(&data->fib_lock); + mutex_init(&data->fib_lock); INIT_LIST_HEAD(&data->fib_rt_list); err = rhashtable_init(&data->fib_rt_ht, &nsim_fib_rt_ht_params); if (err) goto err_rhashtable_nexthop_destroy; + INIT_WORK(&data->fib_event_work, nsim_fib_event_work); + INIT_LIST_HEAD(&data->fib_event_queue); + spin_lock_init(&data->fib_event_queue_lock); + nsim_fib_set_max_all(data, devlink); data->nexthop_nb.notifier_call = nsim_nexthop_event_nb; @@ -1165,11 +1347,13 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, err_nexthop_nb_unregister: unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); err_rhashtable_fib_destroy: + flush_work(&data->fib_event_work); rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, data); err_rhashtable_nexthop_destroy: rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free, data); + mutex_destroy(&data->fib_lock); err_data_free: kfree(data); return ERR_PTR(err); @@ -1189,10 +1373,13 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data) NSIM_RESOURCE_IPV4_FIB); unregister_fib_notifier(devlink_net(devlink), &data->fib_nb); unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); + flush_work(&data->fib_event_work); rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, data); rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free, data); + WARN_ON_ONCE(!list_empty(&data->fib_event_queue)); WARN_ON_ONCE(!list_empty(&data->fib_rt_list)); + mutex_destroy(&data->fib_lock); kfree(data); } -- GitLab From 085547891de548491d8b9af22c8fbc9487c79055 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 1 Feb 2021 21:47:50 +0200 Subject: [PATCH 1827/2012] net: ipv4: Pass fib_rt_info as const to fib_dump_info() fib_dump_info() does not change 'fri', so pass it as 'const'. It will later allow us to invoke fib_dump_info() from fib_alias_hw_flags_set(). Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/fib_lookup.h | 2 +- net/ipv4/fib_semantics.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 818916b2a04d6..b75db4dcbf5e0 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -39,7 +39,7 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, struct netlink_ext_ack *extack); bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi); int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - struct fib_rt_info *fri, unsigned int flags); + const struct fib_rt_info *fri, unsigned int flags); void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, const struct nl_info *info, unsigned int nlm_flags); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b5400cec4f69b..dba56fa5e2cda 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1733,7 +1733,7 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) #endif int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, - struct fib_rt_info *fri, unsigned int flags) + const struct fib_rt_info *fri, unsigned int flags) { unsigned int nhs = fib_info_num_path(fri->fi); struct fib_info *fi = fri->fi; -- GitLab From 1e7bdec6bbc7816cdc6a093374f4bf4e732c3d44 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 1 Feb 2021 21:47:51 +0200 Subject: [PATCH 1828/2012] net: ipv4: Publish fib_nlmsg_size() Publish fib_nlmsg_size() to allow it to be used later on from fib_alias_hw_flags_set(). Remove the inline keyword since it shouldn't be used inside C files. Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/fib_lookup.h | 1 + net/ipv4/fib_semantics.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index b75db4dcbf5e0..aff454ef0fa38 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -42,6 +42,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, const struct fib_rt_info *fri, unsigned int flags); void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, const struct nl_info *info, unsigned int nlm_flags); +size_t fib_nlmsg_size(struct fib_info *fi); static inline void fib_result_assign(struct fib_result *res, struct fib_info *fi) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index dba56fa5e2cda..4c38facf91c0f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -452,7 +452,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) return -1; } -static inline size_t fib_nlmsg_size(struct fib_info *fi) +size_t fib_nlmsg_size(struct fib_info *fi) { size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(4) /* RTA_TABLE */ -- GitLab From 680aea08e78c003292415518ad270bc20f9c80b1 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 1 Feb 2021 21:47:52 +0200 Subject: [PATCH 1829/2012] net: ipv4: Emit notification when fib hardware flags are changed After installing a route to the kernel, user space receives an acknowledgment, which means the route was installed in the kernel, but not necessarily in hardware. The asynchronous nature of route installation in hardware can lead to a routing daemon advertising a route before it was actually installed in hardware. This can result in packet loss or mis-routed packets until the route is installed in hardware. It is also possible for a route already installed in hardware to change its action and therefore its flags. For example, a host route that is trapping packets can be "promoted" to perform decapsulation following the installation of an IPinIP/VXLAN tunnel. Emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/RTM_F_TRAP flags are changed. The aim is to provide an indication to user-space (e.g., routing daemons) about the state of the route in hardware. Introduce a sysctl that controls this behavior. Keep the default value at 0 (i.e., do not emit notifications) for several reasons: - Multiple RTM_NEWROUTE notification per-route might confuse existing routing daemons. - Convergence reasons in routing daemons. - The extra notifications will negatively impact the insertion rate. - Not all users are interested in these notifications. Signed-off-by: Amit Cohen Acked-by: Roopa Prabhu Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 20 +++++++++++++++++++ include/net/netns/ipv4.h | 2 ++ net/ipv4/af_inet.c | 2 ++ net/ipv4/fib_trie.c | 27 ++++++++++++++++++++++++++ net/ipv4/sysctl_net_ipv4.c | 9 +++++++++ 5 files changed, 60 insertions(+) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 2685ec161060b..b0e800e68366f 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -178,6 +178,26 @@ min_adv_mss - INTEGER The advertised MSS depends on the first hop route MTU, but will never be lower than this setting. +fib_notify_on_flag_change - INTEGER + Whether to emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/ + RTM_F_TRAP flags are changed. + + After installing a route to the kernel, user space receives an + acknowledgment, which means the route was installed in the kernel, + but not necessarily in hardware. + It is also possible for a route already installed in hardware to change + its action and therefore its flags. For example, a host route that is + trapping packets can be "promoted" to perform decapsulation following + the installation of an IPinIP/VXLAN tunnel. + The notifications will indicate to user-space the state of the route. + + Default: 0 (Do not emit notifications.) + + Possible values: + + - 0 - Do not emit notifications. + - 1 - Emit notifications. + IP Fragmentation: ipfrag_high_thresh - LONG INTEGER diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8e4fcac4df72f..70a2a085dd1ae 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -188,6 +188,8 @@ struct netns_ipv4 { int sysctl_udp_wmem_min; int sysctl_udp_rmem_min; + int sysctl_fib_notify_on_flag_change; + #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; #endif diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b94fa8eb831bf..ab42f6404fc62 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1871,6 +1871,8 @@ static __net_init int inet_init_net(struct net *net) net->ipv4.sysctl_igmp_llm_reports = 1; net->ipv4.sysctl_igmp_qrv = 2; + net->ipv4.sysctl_fib_notify_on_flag_change = 0; + return 0; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 28117c05dc353..60559b7081589 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1038,6 +1038,8 @@ fib_find_matching_alias(struct net *net, const struct fib_rt_info *fri) void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) { struct fib_alias *fa_match; + struct sk_buff *skb; + int err; rcu_read_lock(); @@ -1045,9 +1047,34 @@ void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri) if (!fa_match) goto out; + if (fa_match->offload == fri->offload && fa_match->trap == fri->trap) + goto out; + fa_match->offload = fri->offload; fa_match->trap = fri->trap; + if (!net->ipv4.sysctl_fib_notify_on_flag_change) + goto out; + + skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC); + if (!skb) { + err = -ENOBUFS; + goto errout; + } + + err = fib_dump_info(skb, 0, 0, RTM_NEWROUTE, fri, 0); + if (err < 0) { + /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_ROUTE, NULL, GFP_ATOMIC); + goto out; + +errout: + rtnl_set_sk_err(net, RTNLGRP_IPV4_ROUTE, err); out: rcu_read_unlock(); } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3e5f4f2e705e8..e5798b3b59d23 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1354,6 +1354,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ONE }, + { + .procname = "fib_notify_on_flag_change", + .data = &init_net.ipv4.sysctl_fib_notify_on_flag_change, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, { } }; -- GitLab From fbaca8f895a6855b0b442227bc21da2d4cf77a01 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 1 Feb 2021 21:47:53 +0200 Subject: [PATCH 1830/2012] net: Pass 'net' struct as first argument to fib6_info_hw_flags_set() The next patch will emit notification when hardware flags are changed, in case that fib_notify_on_flag_change sysctl is set to 1. To know sysctl values, net struct is needed. This change is consistent with the IPv4 version, which gets 'net' struct as its first argument. Currently, the only callers of this function are mlxsw and netdevsim. Patch the callers to pass net. Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 7 ++++--- drivers/net/netdevsim/fib.c | 14 ++++++++------ include/net/ip6_fib.h | 5 +++-- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 0ac7014703aab..5516e141f5bfe 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5004,8 +5004,8 @@ mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, common); list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) - fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, should_offload, - !should_offload); + fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt, + should_offload, !should_offload); } static void @@ -5018,7 +5018,8 @@ mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, common); list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) - fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, false, false); + fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt, + false, false); } static void diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index d557533d43dd1..cb68f0cc67405 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -585,13 +585,15 @@ err_fib6_rt_nh_del: return err; } -static void nsim_fib6_rt_hw_flags_set(const struct nsim_fib6_rt *fib6_rt, +static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data, + const struct nsim_fib6_rt *fib6_rt, bool trap) { + struct net *net = devlink_net(data->devlink); struct nsim_fib6_rt_nh *fib6_rt_nh; list_for_each_entry(fib6_rt_nh, &fib6_rt->nh_list, list) - fib6_info_hw_flags_set(fib6_rt_nh->rt, false, trap); + fib6_info_hw_flags_set(net, fib6_rt_nh->rt, false, trap); } static int nsim_fib6_rt_add(struct nsim_fib_data *data, @@ -607,7 +609,7 @@ static int nsim_fib6_rt_add(struct nsim_fib_data *data, goto err_fib_dismiss; msleep(1); - nsim_fib6_rt_hw_flags_set(fib6_rt, true); + nsim_fib6_rt_hw_flags_set(data, fib6_rt, true); return 0; @@ -641,9 +643,9 @@ static int nsim_fib6_rt_replace(struct nsim_fib_data *data, return err; msleep(1); - nsim_fib6_rt_hw_flags_set(fib6_rt, true); + nsim_fib6_rt_hw_flags_set(data, fib6_rt, true); - nsim_fib6_rt_hw_flags_set(fib6_rt_old, false); + nsim_fib6_rt_hw_flags_set(data, fib6_rt_old, false); nsim_fib6_rt_destroy(fib6_rt_old); return 0; @@ -954,7 +956,7 @@ static void nsim_fib6_rt_free(struct nsim_fib_rt *fib_rt, struct nsim_fib6_rt *fib6_rt; fib6_rt = container_of(fib_rt, struct nsim_fib6_rt, common); - nsim_fib6_rt_hw_flags_set(fib6_rt, false); + nsim_fib6_rt_hw_flags_set(data, fib6_rt, false); nsim_fib_account(&data->ipv6.fib, false); nsim_fib6_rt_destroy(fib6_rt); } diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index ac5ff3c3afb14..cc189e668adff 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -336,8 +336,9 @@ static inline void fib6_info_release(struct fib6_info *f6i) call_rcu(&f6i->rcu, fib6_info_destroy_rcu); } -static inline void fib6_info_hw_flags_set(struct fib6_info *f6i, bool offload, - bool trap) +static inline void +fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i, bool offload, + bool trap) { f6i->offload = offload; f6i->trap = trap; -- GitLab From efc42879ec9ea85d5d17019536f2f8c5da455498 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 1 Feb 2021 21:47:54 +0200 Subject: [PATCH 1831/2012] net: Do not call fib6_info_hw_flags_set() when IPv6 is disabled With the next patch mlxsw and netdevsim will fail in compilation if CONFIG_IPV6 is disabled. Do not call fib6_info_hw_flags_set() when IPv6 is disabled. Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 16 ++++++++++++++++ drivers/net/netdevsim/fib.c | 8 ++++++++ 2 files changed, 24 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 5516e141f5bfe..cf111e73f81e7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -4988,6 +4988,7 @@ mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri); } +#if IS_ENABLED(CONFIG_IPV6) static void mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) @@ -5007,7 +5008,15 @@ mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt, should_offload, !should_offload); } +#else +static void +mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ +} +#endif +#if IS_ENABLED(CONFIG_IPV6) static void mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) @@ -5021,6 +5030,13 @@ mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, fib6_info_hw_flags_set(mlxsw_sp_net(mlxsw_sp), mlxsw_sp_rt6->rt, false, false); } +#else +static void +mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ +} +#endif static void mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index cb68f0cc67405..1779146926a57 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -585,6 +585,7 @@ err_fib6_rt_nh_del: return err; } +#if IS_ENABLED(CONFIG_IPV6) static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data, const struct nsim_fib6_rt *fib6_rt, bool trap) @@ -595,6 +596,13 @@ static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data, list_for_each_entry(fib6_rt_nh, &fib6_rt->nh_list, list) fib6_info_hw_flags_set(net, fib6_rt_nh->rt, false, trap); } +#else +static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data, + const struct nsim_fib6_rt *fib6_rt, + bool trap) +{ +} +#endif static int nsim_fib6_rt_add(struct nsim_fib_data *data, struct nsim_fib6_rt *fib6_rt) -- GitLab From 907eea486888cfe118c19759bbc4b8fca2e004df Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 1 Feb 2021 21:47:55 +0200 Subject: [PATCH 1832/2012] net: ipv6: Emit notification when fib hardware flags are changed After installing a route to the kernel, user space receives an acknowledgment, which means the route was installed in the kernel, but not necessarily in hardware. The asynchronous nature of route installation in hardware can lead to a routing daemon advertising a route before it was actually installed in hardware. This can result in packet loss or mis-routed packets until the route is installed in hardware. It is also possible for a route already installed in hardware to change its action and therefore its flags. For example, a host route that is trapping packets can be "promoted" to perform decapsulation following the installation of an IPinIP/VXLAN tunnel. Emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/RTM_F_TRAP flags are changed. The aim is to provide an indication to user-space (e.g., routing daemons) about the state of the route in hardware. Introduce a sysctl that controls this behavior. Keep the default value at 0 (i.e., do not emit notifications) for several reasons: - Multiple RTM_NEWROUTE notification per-route might confuse existing routing daemons. - Convergence reasons in routing daemons. - The extra notifications will negatively impact the insertion rate. - Not all users are interested in these notifications. Move fib6_info_hw_flags_set() to C file because it is no longer a short function. Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 20 ++++++++++++ include/net/ip6_fib.h | 10 ++---- include/net/netns/ipv6.h | 1 + net/ipv6/af_inet6.c | 1 + net/ipv6/route.c | 44 ++++++++++++++++++++++++++ net/ipv6/sysctl_net_ipv6.c | 9 ++++++ 6 files changed, 77 insertions(+), 8 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index b0e800e68366f..61a358301f12b 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1795,6 +1795,26 @@ nexthop_compat_mode - BOOLEAN and extraneous notifications. Default: true (backward compat mode) +fib_notify_on_flag_change - INTEGER + Whether to emit RTM_NEWROUTE notifications whenever RTM_F_OFFLOAD/ + RTM_F_TRAP flags are changed. + + After installing a route to the kernel, user space receives an + acknowledgment, which means the route was installed in the kernel, + but not necessarily in hardware. + It is also possible for a route already installed in hardware to change + its action and therefore its flags. For example, a host route that is + trapping packets can be "promoted" to perform decapsulation following + the installation of an IPinIP/VXLAN tunnel. + The notifications will indicate to user-space the state of the route. + + Default: 0 (Do not emit notifications.) + + Possible values: + + - 0 - Do not emit notifications. + - 1 - Emit notifications. + IPv6 Fragmentation: ip6frag_high_thresh - INTEGER diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index cc189e668adff..1e262b23c68b4 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -336,14 +336,6 @@ static inline void fib6_info_release(struct fib6_info *f6i) call_rcu(&f6i->rcu, fib6_info_destroy_rcu); } -static inline void -fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i, bool offload, - bool trap) -{ - f6i->offload = offload; - f6i->trap = trap; -} - enum fib6_walk_state { #ifdef CONFIG_IPV6_SUBTREES FWS_S, @@ -546,6 +538,8 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) { return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric)); } +void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i, + bool offload, bool trap); #if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL) struct bpf_iter__ipv6_route { diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 5ec054473d81a..21c0debbd39ee 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -51,6 +51,7 @@ struct netns_sysctl_ipv6 { int max_hbh_opts_len; int seg6_flowlabel; bool skip_notify_on_dev_down; + int fib_notify_on_flag_change; }; struct netns_ipv6 { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8e9c3e9ea36e3..0e9994e0ecd78 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -954,6 +954,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT; net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN; net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN; + net->ipv6.sysctl.fib_notify_on_flag_change = 0; atomic_set(&net->ipv6.fib6_sernum, 1); err = ipv6_init_mibs(net); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 41d8f801b75fe..92b400eb84769 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6064,6 +6064,50 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); } +void fib6_info_hw_flags_set(struct net *net, struct fib6_info *f6i, + bool offload, bool trap) +{ + struct sk_buff *skb; + int err; + + if (f6i->offload == offload && f6i->trap == trap) + return; + + f6i->offload = offload; + f6i->trap = trap; + + if (!rcu_access_pointer(f6i->fib6_node)) + /* The route was removed from the tree, do not send + * notfication. + */ + return; + + if (!net->ipv6.sysctl.fib_notify_on_flag_change) + return; + + skb = nlmsg_new(rt6_nlmsg_size(f6i), GFP_KERNEL); + if (!skb) { + err = -ENOBUFS; + goto errout; + } + + err = rt6_fill_node(net, skb, f6i, NULL, NULL, NULL, 0, RTM_NEWROUTE, 0, + 0, 0); + if (err < 0) { + /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ROUTE, NULL, GFP_KERNEL); + return; + +errout: + rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); +} +EXPORT_SYMBOL(fib6_info_hw_flags_set); + static int ip6_route_dev_notify(struct notifier_block *this, unsigned long event, void *ptr) { diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 5b60a4bdd36af..392ef01e33667 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -160,6 +160,15 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "fib_notify_on_flag_change", + .data = &init_net.ipv6.sysctl.fib_notify_on_flag_change, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, { } }; -- GitLab From d1a7a489287cebeb3ff3d1a114f5eb7d3641793b Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 1 Feb 2021 21:47:56 +0200 Subject: [PATCH 1833/2012] selftests: Extend fib tests to run with and without flags notifications Run the test cases with both `fib_notify_on_flag_change` sysctls set to '1', and then with both sysctls set to '0' to verify there are no regressions in the test when notifications are added. Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/mlxsw/fib.sh | 14 ++++++++++++++ .../testing/selftests/drivers/net/netdevsim/fib.sh | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib.sh b/tools/testing/selftests/drivers/net/mlxsw/fib.sh index eab79b9e58cdf..dcbf32b99bb6a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/fib.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/fib.sh @@ -225,6 +225,16 @@ ipv6_local_replace() ip -n $ns link del dev dummy1 } +fib_notify_on_flag_change_set() +{ + local notify=$1; shift + + ip netns exec testns1 sysctl -qw net.ipv4.fib_notify_on_flag_change=$notify + ip netns exec testns1 sysctl -qw net.ipv6.fib_notify_on_flag_change=$notify + + log_info "Set fib_notify_on_flag_change to $notify" +} + setup_prepare() { ip netns add testns1 @@ -251,6 +261,10 @@ trap cleanup EXIT setup_prepare +fib_notify_on_flag_change_set 1 +tests_run + +fib_notify_on_flag_change_set 0 tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh index 2f87c3be76a95..251f228ce63ea 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh @@ -302,6 +302,16 @@ ipv6_error_path() ipv6_error_path_replay } +fib_notify_on_flag_change_set() +{ + local notify=$1; shift + + ip netns exec testns1 sysctl -qw net.ipv4.fib_notify_on_flag_change=$notify + ip netns exec testns1 sysctl -qw net.ipv6.fib_notify_on_flag_change=$notify + + log_info "Set fib_notify_on_flag_change to $notify" +} + setup_prepare() { local netdev @@ -336,6 +346,10 @@ trap cleanup EXIT setup_prepare +fib_notify_on_flag_change_set 1 +tests_run + +fib_notify_on_flag_change_set 0 tests_run exit $EXIT_STATUS -- GitLab From 19d36d2971e671cd1b7f4174ef5e21c341ec7690 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Mon, 1 Feb 2021 21:47:57 +0200 Subject: [PATCH 1834/2012] selftests: netdevsim: Add fib_notifications test Add test to check fib notifications behavior. The test checks route addition, route deletion and route replacement for both IPv4 and IPv6. When fib_notify_on_flag_change=0, expect single notification for route addition/deletion/replacement. When fib_notify_on_flag_change=1, expect: - two notification for route addition/replacement, first without RTM_F_TRAP and second with RTM_F_TRAP. - single notification for route deletion. $ ./fib_notifications.sh TEST: IPv4 route addition [ OK ] TEST: IPv4 route deletion [ OK ] TEST: IPv4 route replacement [ OK ] TEST: IPv6 route addition [ OK ] TEST: IPv6 route deletion [ OK ] TEST: IPv6 route replacement [ OK ] Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- .../net/netdevsim/fib_notifications.sh | 300 ++++++++++++++++++ 1 file changed, 300 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh new file mode 100755 index 0000000000000..16a9dd43aefca --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh @@ -0,0 +1,300 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ipv4_route_addition_test + ipv4_route_deletion_test + ipv4_route_replacement_test + ipv6_route_addition_test + ipv6_route_deletion_test + ipv6_route_replacement_test +" + +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +NUM_NETIFS=0 +source $lib_dir/lib.sh + +check_rt_trap() +{ + local outfile=$1; shift + local line + + # Make sure that the first notification was emitted without RTM_F_TRAP + # flag and the second with RTM_F_TRAP flag + head -n 1 $outfile | grep -q "rt_trap" + if [[ $? -eq 0 ]]; then + return 1 + fi + + head -n 2 $outfile | tail -n 1 | grep -q "rt_trap" +} + +route_notify_check() +{ + local outfile=$1; shift + local expected_num_lines=$1; shift + + # check the monitor results + lines=`wc -l $outfile | cut "-d " -f1` + test $lines -eq $expected_num_lines + check_err $? "$expected_num_lines notifications were expected but $lines were received" + + if [[ $expected_num_lines -eq 2 ]]; then + check_rt_trap $outfile + check_err $? "Wrong RTM_F_TRAP flags in notifications" + fi +} + +route_addition_check() +{ + local ip=$1; shift + local notify=$1; shift + local route=$1; shift + local expected_num_notifications=$1; shift + + ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify + + local outfile=$(mktemp) + + $IP monitor route &> $outfile & + sleep 1 + $IP route add $route dev dummy1 + sleep 1 + kill %% && wait %% &> /dev/null + + route_notify_check $outfile $expected_num_notifications + rm -f $outfile + + $IP route del $route dev dummy1 +} + +ipv4_route_addition_test() +{ + RET=0 + + local ip="ipv4" + local route=192.0.2.0/24 + + # Make sure a single notification will be emitted for the programmed + # route. + local notify=0 + local expected_num_notifications=1 + # route_addition_check will assign value to RET. + route_addition_check $ip $notify $route $expected_num_notifications + + # Make sure two notifications will be emitted for the programmed route. + notify=1 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications + + log_test "IPv4 route addition" +} + +route_deletion_check() +{ + local ip=$1; shift + local notify=$1; shift + local route=$1; shift + local expected_num_notifications=$1; shift + + ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify + $IP route add $route dev dummy1 + sleep 1 + + local outfile=$(mktemp) + + $IP monitor route &> $outfile & + sleep 1 + $IP route del $route dev dummy1 + sleep 1 + kill %% && wait %% &> /dev/null + + route_notify_check $outfile $expected_num_notifications + rm -f $outfile +} + +ipv4_route_deletion_test() +{ + RET=0 + + local ip="ipv4" + local route=192.0.2.0/24 + local expected_num_notifications=1 + + # Make sure a single notification will be emitted for the deleted route, + # regardless of fib_notify_on_flag_change value. + local notify=0 + # route_deletion_check will assign value to RET. + route_deletion_check $ip $notify $route $expected_num_notifications + + notify=1 + route_deletion_check $ip $notify $route $expected_num_notifications + + log_test "IPv4 route deletion" +} + +route_replacement_check() +{ + local ip=$1; shift + local notify=$1; shift + local route=$1; shift + local expected_num_notifications=$1; shift + + ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify + $IP route add $route dev dummy1 + sleep 1 + + local outfile=$(mktemp) + + $IP monitor route &> $outfile & + sleep 1 + $IP route replace $route dev dummy2 + sleep 1 + kill %% && wait %% &> /dev/null + + route_notify_check $outfile $expected_num_notifications + rm -f $outfile + + $IP route del $route dev dummy2 +} + +ipv4_route_replacement_test() +{ + RET=0 + + local ip="ipv4" + local route=192.0.2.0/24 + + $IP link add name dummy2 type dummy + $IP link set dev dummy2 up + + # Make sure a single notification will be emitted for the new route. + local notify=0 + local expected_num_notifications=1 + # route_replacement_check will assign value to RET. + route_replacement_check $ip $notify $route $expected_num_notifications + + # Make sure two notifications will be emitted for the new route. + notify=1 + expected_num_notifications=2 + route_replacement_check $ip $notify $route $expected_num_notifications + + $IP link del name dummy2 + + log_test "IPv4 route replacement" +} + +ipv6_route_addition_test() +{ + RET=0 + + local ip="ipv6" + local route=2001:db8:1::/64 + + # Make sure a single notification will be emitted for the programmed + # route. + local notify=0 + local expected_num_notifications=1 + route_addition_check $ip $notify $route $expected_num_notifications + + # Make sure two notifications will be emitted for the programmed route. + notify=1 + expected_num_notifications=2 + route_addition_check $ip $notify $route $expected_num_notifications + + log_test "IPv6 route addition" +} + +ipv6_route_deletion_test() +{ + RET=0 + + local ip="ipv6" + local route=2001:db8:1::/64 + local expected_num_notifications=1 + + # Make sure a single notification will be emitted for the deleted route, + # regardless of fib_notify_on_flag_change value. + local notify=0 + route_deletion_check $ip $notify $route $expected_num_notifications + + notify=1 + route_deletion_check $ip $notify $route $expected_num_notifications + + log_test "IPv6 route deletion" +} + +ipv6_route_replacement_test() +{ + RET=0 + + local ip="ipv6" + local route=2001:db8:1::/64 + + $IP link add name dummy2 type dummy + $IP link set dev dummy2 up + + # Make sure a single notification will be emitted for the new route. + local notify=0 + local expected_num_notifications=1 + route_replacement_check $ip $notify $route $expected_num_notifications + + # Make sure two notifications will be emitted for the new route. + notify=1 + expected_num_notifications=2 + route_replacement_check $ip $notify $route $expected_num_notifications + + $IP link del name dummy2 + + log_test "IPv6 route replacement" +} + +setup_prepare() +{ + modprobe netdevsim &> /dev/null + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + ip netns add testns1 + + if [ $? -ne 0 ]; then + echo "Failed to add netns \"testns1\"" + exit 1 + fi + + devlink dev reload $DEVLINK_DEV netns testns1 + + if [ $? -ne 0 ]; then + echo "Failed to reload into netns \"testns1\"" + exit 1 + fi + + IP="ip -n testns1" + + $IP link add name dummy1 type dummy + $IP link set dev dummy1 up +} + +cleanup() +{ + pre_cleanup + + $IP link del name dummy1 + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS -- GitLab From e6d6ca6e12049dfbff6ac8b029678d2d2c55c34f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 2 Feb 2021 12:48:12 +0800 Subject: [PATCH 1835/2012] r8169: Add support for another RTL8168FP According to the vendor driver, the new chip with XID 0x54b is essentially the same as the one with XID 0x54a, but it doesn't need the firmware. So add support accordingly. Signed-off-by: Kai-Heng Feng Reviewed-by: Heiner Kallweit Link: https://lore.kernel.org/r/20210202044813.1304266-1-kai.heng.feng@canonical.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169.h | 1 + drivers/net/ethernet/realtek/r8169_main.c | 17 +++++++++++------ drivers/net/ethernet/realtek/r8169_phy_config.c | 1 + 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h index 7be86ef5a584b..2728df46ec410 100644 --- a/drivers/net/ethernet/realtek/r8169.h +++ b/drivers/net/ethernet/realtek/r8169.h @@ -63,6 +63,7 @@ enum mac_version { RTL_GIGA_MAC_VER_50, RTL_GIGA_MAC_VER_51, RTL_GIGA_MAC_VER_52, + RTL_GIGA_MAC_VER_53, RTL_GIGA_MAC_VER_60, RTL_GIGA_MAC_VER_61, RTL_GIGA_MAC_VER_63, diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index eb4c2e67896b8..7b053a1f6d599 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -146,6 +146,7 @@ static const struct { [RTL_GIGA_MAC_VER_50] = {"RTL8168ep/8111ep" }, [RTL_GIGA_MAC_VER_51] = {"RTL8168ep/8111ep" }, [RTL_GIGA_MAC_VER_52] = {"RTL8168fp/RTL8117", FIRMWARE_8168FP_3}, + [RTL_GIGA_MAC_VER_53] = {"RTL8168fp/RTL8117", }, [RTL_GIGA_MAC_VER_60] = {"RTL8125A" }, [RTL_GIGA_MAC_VER_61] = {"RTL8125A", FIRMWARE_8125A_3}, /* reserve 62 for CFG_METHOD_4 in the vendor driver */ @@ -696,7 +697,7 @@ static bool rtl_is_8168evl_up(struct rtl8169_private *tp) { return tp->mac_version >= RTL_GIGA_MAC_VER_34 && tp->mac_version != RTL_GIGA_MAC_VER_39 && - tp->mac_version <= RTL_GIGA_MAC_VER_52; + tp->mac_version <= RTL_GIGA_MAC_VER_53; } static bool rtl_supports_eee(struct rtl8169_private *tp) @@ -763,7 +764,9 @@ static bool name ## _check(struct rtl8169_private *tp) static void r8168fp_adjust_ocp_cmd(struct rtl8169_private *tp, u32 *cmd, int type) { /* based on RTL8168FP_OOBMAC_BASE in vendor driver */ - if (tp->mac_version == RTL_GIGA_MAC_VER_52 && type == ERIAR_OOB) + if (type == ERIAR_OOB && + (tp->mac_version == RTL_GIGA_MAC_VER_52 || + tp->mac_version == RTL_GIGA_MAC_VER_53)) *cmd |= 0x7f0 << 18; } @@ -1238,7 +1241,7 @@ static enum rtl_dash_type rtl_check_dash(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_28: case RTL_GIGA_MAC_VER_31: return r8168dp_check_dash(tp) ? RTL_DASH_DP : RTL_DASH_NONE; - case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_52: + case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_53: return r8168ep_check_dash(tp) ? RTL_DASH_EP : RTL_DASH_NONE; default: return RTL_DASH_NONE; @@ -1962,6 +1965,7 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii) { 0x7c8, 0x608, RTL_GIGA_MAC_VER_61 }, /* RTL8117 */ + { 0x7cf, 0x54b, RTL_GIGA_MAC_VER_53 }, { 0x7cf, 0x54a, RTL_GIGA_MAC_VER_52 }, /* 8168EP family. */ @@ -2236,7 +2240,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_38: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST); break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_52: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53: RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF); break; case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_63: @@ -2410,7 +2414,7 @@ DECLARE_RTL_COND(rtl_rxtx_empty_cond_2) static void rtl_wait_txrx_fifo_empty(struct rtl8169_private *tp) { switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_52: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_53: rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42); rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42); break; @@ -3669,6 +3673,7 @@ static void rtl_hw_config(struct rtl8169_private *tp) [RTL_GIGA_MAC_VER_50] = rtl_hw_start_8168ep_2, [RTL_GIGA_MAC_VER_51] = rtl_hw_start_8168ep_3, [RTL_GIGA_MAC_VER_52] = rtl_hw_start_8117, + [RTL_GIGA_MAC_VER_53] = rtl_hw_start_8117, [RTL_GIGA_MAC_VER_60] = rtl_hw_start_8125a_1, [RTL_GIGA_MAC_VER_61] = rtl_hw_start_8125a_2, [RTL_GIGA_MAC_VER_63] = rtl_hw_start_8125b, @@ -5114,7 +5119,7 @@ static void rtl_hw_init_8125(struct rtl8169_private *tp) static void rtl_hw_initialize(struct rtl8169_private *tp) { switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_52: + case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_53: rtl8168ep_stop_cmac(tp); fallthrough; case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48: diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c index 913d030d73eb4..50f0f621b1aa8 100644 --- a/drivers/net/ethernet/realtek/r8169_phy_config.c +++ b/drivers/net/ethernet/realtek/r8169_phy_config.c @@ -1358,6 +1358,7 @@ void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev, [RTL_GIGA_MAC_VER_50] = rtl8168ep_2_hw_phy_config, [RTL_GIGA_MAC_VER_51] = rtl8168ep_2_hw_phy_config, [RTL_GIGA_MAC_VER_52] = rtl8117_hw_phy_config, + [RTL_GIGA_MAC_VER_53] = rtl8117_hw_phy_config, [RTL_GIGA_MAC_VER_60] = rtl8125a_1_hw_phy_config, [RTL_GIGA_MAC_VER_61] = rtl8125a_2_hw_phy_config, [RTL_GIGA_MAC_VER_63] = rtl8125b_hw_phy_config, -- GitLab From 72603d207d595a1a70ba75ec885a5e02a6d802a8 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2021 15:09:06 -0800 Subject: [PATCH 1836/2012] mptcp: use WRITE_ONCE for the pernet *_max This patch uses WRITE_ONCE() for all the pernet add_addr_signal_max, add_addr_accept_max, local_addr_max and subflows_max fields in struct pm_nl_pernet to avoid concurrency issues. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 83976b9ee99bc..c429bd82313e4 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -572,6 +572,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, struct mptcp_pm_addr_entry *entry) { struct mptcp_pm_addr_entry *cur; + unsigned int addr_max; int ret = -EINVAL; spin_lock_bh(&pernet->lock); @@ -614,10 +615,14 @@ find_next: if (entry->addr.id > pernet->next_id) pernet->next_id = entry->addr.id; - if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) - pernet->add_addr_signal_max++; - if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) - pernet->local_addr_max++; + if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + addr_max = pernet->add_addr_signal_max; + WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1); + } + if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + addr_max = pernet->local_addr_max; + WRITE_ONCE(pernet->local_addr_max, addr_max + 1); + } pernet->addrs++; list_add_tail_rcu(&entry->list, &pernet->local_addr_list); @@ -912,6 +917,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; + unsigned int addr_max; int ret; ret = mptcp_pm_parse_addr(attr, info, false, &addr); @@ -925,10 +931,14 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&pernet->lock); return -EINVAL; } - if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) - pernet->add_addr_signal_max--; - if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) - pernet->local_addr_max--; + if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + addr_max = pernet->add_addr_signal_max; + WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1); + } + if (entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + addr_max = pernet->local_addr_max; + WRITE_ONCE(pernet->local_addr_max, addr_max - 1); + } pernet->addrs--; list_del_rcu(&entry->list); @@ -956,9 +966,9 @@ static void __flush_addrs(struct net *net, struct list_head *list) static void __reset_counters(struct pm_nl_pernet *pernet) { - pernet->add_addr_signal_max = 0; - pernet->add_addr_accept_max = 0; - pernet->local_addr_max = 0; + WRITE_ONCE(pernet->add_addr_signal_max, 0); + WRITE_ONCE(pernet->add_addr_accept_max, 0); + WRITE_ONCE(pernet->local_addr_max, 0); pernet->addrs = 0; } -- GitLab From a914e586689f2b322e7b923eb9ea8894fc80d5ec Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2021 15:09:07 -0800 Subject: [PATCH 1837/2012] mptcp: drop *_max fields in mptcp_pm_data This patch drops the per-msk values add_addr_signal_max, add_addr_accept_max, local_addr_max and subflows_max fields in struct mptcp_pm_data, uses the pernet *_max values instead. And adds four new helpers to get the pernet *_max values separately. Co-developed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/mptcp_diag.c | 6 +-- net/mptcp/pm.c | 9 +++-- net/mptcp/pm_netlink.c | 90 ++++++++++++++++++++++++++++++------------ net/mptcp/protocol.h | 7 ++-- 4 files changed, 77 insertions(+), 35 deletions(-) diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index b70ae4ba30008..00ed742f48a47 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -128,10 +128,10 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); - info->mptcpi_subflows_max = READ_ONCE(msk->pm.subflows_max); - val = READ_ONCE(msk->pm.add_addr_signal_max); + info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk); + val = mptcp_pm_get_add_addr_signal_max(msk); info->mptcpi_add_addr_signal_max = val; - val = READ_ONCE(msk->pm.add_addr_accept_max); + val = mptcp_pm_get_add_addr_accept_max(msk); info->mptcpi_add_addr_accepted_max = val; if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) flags |= MPTCP_INFO_FLAG_FALLBACK; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 0a6ebd0642ec9..01a846b257711 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -78,10 +78,13 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side) bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) { struct mptcp_pm_data *pm = &msk->pm; + unsigned int subflows_max; int ret = 0; + subflows_max = mptcp_pm_get_subflows_max(msk); + pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows, - pm->subflows_max, READ_ONCE(pm->accept_subflow)); + subflows_max, READ_ONCE(pm->accept_subflow)); /* try to avoid acquiring the lock below */ if (!READ_ONCE(pm->accept_subflow)) @@ -89,8 +92,8 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) spin_lock_bh(&pm->lock); if (READ_ONCE(pm->accept_subflow)) { - ret = pm->subflows < pm->subflows_max; - if (ret && ++pm->subflows == pm->subflows_max) + ret = pm->subflows < subflows_max; + if (ret && ++pm->subflows == subflows_max) WRITE_ONCE(pm->accept_subflow, false); } spin_unlock_bh(&pm->lock); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index c429bd82313e4..6aeadcaef8aeb 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -196,11 +196,46 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos) return ret; } +unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk) +{ + struct pm_nl_pernet *pernet; + + pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); + return READ_ONCE(pernet->add_addr_signal_max); +} +EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max); + +unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk) +{ + struct pm_nl_pernet *pernet; + + pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); + return READ_ONCE(pernet->add_addr_accept_max); +} +EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max); + +unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk) +{ + struct pm_nl_pernet *pernet; + + pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); + return READ_ONCE(pernet->subflows_max); +} +EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max); + +static unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk) +{ + struct pm_nl_pernet *pernet; + + pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); + return READ_ONCE(pernet->local_addr_max); +} + static void check_work_pending(struct mptcp_sock *msk) { - if (msk->pm.add_addr_signaled == msk->pm.add_addr_signal_max && - (msk->pm.local_addr_used == msk->pm.local_addr_max || - msk->pm.subflows == msk->pm.subflows_max)) + if (msk->pm.add_addr_signaled == mptcp_pm_get_add_addr_signal_max(msk) && + (msk->pm.local_addr_used == mptcp_pm_get_local_addr_max(msk) || + msk->pm.subflows == mptcp_pm_get_subflows_max(msk))) WRITE_ONCE(msk->pm.work_pending, false); } @@ -327,17 +362,24 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *local; + unsigned int add_addr_signal_max; + unsigned int local_addr_max; struct pm_nl_pernet *pernet; + unsigned int subflows_max; pernet = net_generic(sock_net(sk), pm_nl_pernet_id); + add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk); + local_addr_max = mptcp_pm_get_local_addr_max(msk); + subflows_max = mptcp_pm_get_subflows_max(msk); + pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", - msk->pm.local_addr_used, msk->pm.local_addr_max, - msk->pm.add_addr_signaled, msk->pm.add_addr_signal_max, - msk->pm.subflows, msk->pm.subflows_max); + msk->pm.local_addr_used, local_addr_max, + msk->pm.add_addr_signaled, add_addr_signal_max, + msk->pm.subflows, subflows_max); /* check first for announce */ - if (msk->pm.add_addr_signaled < msk->pm.add_addr_signal_max) { + if (msk->pm.add_addr_signaled < add_addr_signal_max) { local = select_signal_address(pernet, msk->pm.add_addr_signaled); @@ -349,15 +391,15 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) } } else { /* pick failed, avoid fourther attempts later */ - msk->pm.local_addr_used = msk->pm.add_addr_signal_max; + msk->pm.local_addr_used = add_addr_signal_max; } check_work_pending(msk); } /* check if should create a new subflow */ - if (msk->pm.local_addr_used < msk->pm.local_addr_max && - msk->pm.subflows < msk->pm.subflows_max) { + if (msk->pm.local_addr_used < local_addr_max && + msk->pm.subflows < subflows_max) { local = select_local_address(pernet, msk); if (local) { struct mptcp_addr_info remote = { 0 }; @@ -373,7 +415,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) } /* lookup failed, avoid fourther attempts later */ - msk->pm.local_addr_used = msk->pm.local_addr_max; + msk->pm.local_addr_used = local_addr_max; check_work_pending(msk); } } @@ -391,17 +433,22 @@ void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; + unsigned int add_addr_accept_max; struct mptcp_addr_info remote; struct mptcp_addr_info local; + unsigned int subflows_max; bool use_port = false; + add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); + subflows_max = mptcp_pm_get_subflows_max(msk); + pr_debug("accepted %d:%d remote family %d", - msk->pm.add_addr_accepted, msk->pm.add_addr_accept_max, + msk->pm.add_addr_accepted, add_addr_accept_max, msk->pm.remote.family); msk->pm.add_addr_accepted++; msk->pm.subflows++; - if (msk->pm.add_addr_accepted >= msk->pm.add_addr_accept_max || - msk->pm.subflows >= msk->pm.subflows_max) + if (msk->pm.add_addr_accepted >= add_addr_accept_max || + msk->pm.subflows >= subflows_max) WRITE_ONCE(msk->pm.accept_addr, false); /* connect to the specified remote address, using whatever @@ -687,19 +734,12 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) void mptcp_pm_nl_data_init(struct mptcp_sock *msk) { struct mptcp_pm_data *pm = &msk->pm; - struct pm_nl_pernet *pernet; bool subflows; - pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id); - - pm->add_addr_signal_max = READ_ONCE(pernet->add_addr_signal_max); - pm->add_addr_accept_max = READ_ONCE(pernet->add_addr_accept_max); - pm->local_addr_max = READ_ONCE(pernet->local_addr_max); - pm->subflows_max = READ_ONCE(pernet->subflows_max); - subflows = !!pm->subflows_max; - WRITE_ONCE(pm->work_pending, (!!pm->local_addr_max && subflows) || - !!pm->add_addr_signal_max); - WRITE_ONCE(pm->accept_addr, !!pm->add_addr_accept_max && subflows); + subflows = !!mptcp_pm_get_subflows_max(msk); + WRITE_ONCE(pm->work_pending, (!!mptcp_pm_get_local_addr_max(msk) && subflows) || + !!mptcp_pm_get_add_addr_signal_max(msk)); + WRITE_ONCE(pm->accept_addr, !!mptcp_pm_get_add_addr_accept_max(msk) && subflows); WRITE_ONCE(pm->accept_subflow, subflows); } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 07ee319f78472..a56247738dee6 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -203,10 +203,6 @@ struct mptcp_pm_data { u8 add_addr_accepted; u8 local_addr_used; u8 subflows; - u8 add_addr_signal_max; - u8 add_addr_accept_max; - u8 local_addr_max; - u8 subflows_max; u8 status; u8 rm_id; }; @@ -714,6 +710,9 @@ void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk); void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); +unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk); +unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk); +unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk); static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb) { -- GitLab From 875b76718f68bac8cec4ce669babd709852ca376 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2021 15:09:08 -0800 Subject: [PATCH 1838/2012] mptcp: create subflow or signal addr for newly added address Currently, when a new MPTCP endpoint is added, the existing MPTCP sockets are not affected. This patch implements a new function mptcp_nl_add_subflow_or_signal_addr, invoked when an address is added from PM netlink. This function traverses the MPTCP sockets list and invokes mptcp_pm_create_subflow_or_signal_addr to try to create a subflow or signal an address for the newly added address, if local constraint allows that. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/19 Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 6aeadcaef8aeb..f1eb3a512fcb4 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -850,6 +850,31 @@ static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) return net_generic(genl_info_net(info), pm_nl_pernet_id); } +static int mptcp_nl_add_subflow_or_signal_addr(struct net *net) +{ + struct mptcp_sock *msk; + long s_slot = 0, s_num = 0; + + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { + struct sock *sk = (struct sock *)msk; + + if (!READ_ONCE(msk->fully_established)) + goto next; + + lock_sock(sk); + spin_lock_bh(&msk->pm.lock); + mptcp_pm_create_subflow_or_signal_addr(msk); + spin_unlock_bh(&msk->pm.lock); + release_sock(sk); + +next: + sock_put(sk); + cond_resched(); + } + + return 0; +} + static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; @@ -875,6 +900,8 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) return ret; } + mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk)); + return 0; } -- GitLab From b5a7acd3bd63c7430c98d7f66d0aa457c9ccde30 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2021 15:09:09 -0800 Subject: [PATCH 1839/2012] mptcp: send ack for every add_addr This patch changes the sending ACK conditions for the ADD_ADDR, send an ACK packet for any ADD_ADDR, not just when ipv6 addresses or port numbers are included. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/139 Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 3 +-- net/mptcp/pm_netlink.c | 10 ++++------ 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 01a846b257711..3a22e73220b99 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -191,8 +191,7 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) { - if (!mptcp_pm_should_add_signal_ipv6(msk) && - !mptcp_pm_should_add_signal_port(msk)) + if (!mptcp_pm_should_add_signal(msk)) return; mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index f1eb3a512fcb4..5d87e475c7512 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -474,8 +474,7 @@ void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; - if (!mptcp_pm_should_add_signal_ipv6(msk) && - !mptcp_pm_should_add_signal_port(msk)) + if (!mptcp_pm_should_add_signal(msk)) return; __mptcp_flush_join_list(msk); @@ -485,10 +484,9 @@ void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) u8 add_addr; spin_unlock_bh(&msk->pm.lock); - if (mptcp_pm_should_add_signal_ipv6(msk)) - pr_debug("send ack for add_addr6"); - if (mptcp_pm_should_add_signal_port(msk)) - pr_debug("send ack for add_addr_port"); + pr_debug("send ack for add_addr%s%s", + mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "", + mptcp_pm_should_add_signal_port(msk) ? " [port]" : ""); lock_sock(ssk); tcp_send_ack(ssk); -- GitLab From 2e8cbf45cfb38bf6dbe604397e86341375b15c0f Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2021 15:09:10 -0800 Subject: [PATCH 1840/2012] selftests: mptcp: use minus values for removing address numbers This patch changes the removing addresses numbers to minus values, left the plus values for the adding addresses numbers. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index be34b9ccbd202..e5fb2b01f31ce 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -209,8 +209,8 @@ do_transfer() srv_proto="$4" connect_addr="$5" test_link_fail="$6" - rm_nr_ns1="$7" - rm_nr_ns2="$8" + addr_nr_ns1="$7" + addr_nr_ns2="$8" speed="$9" bkup="${10}" @@ -264,7 +264,8 @@ do_transfer() fi cpid=$! - if [ $rm_nr_ns1 -gt 0 ]; then + if [ $addr_nr_ns1 -lt 0 ]; then + let rm_nr_ns1=-addr_nr_ns1 if [ $rm_nr_ns1 -lt 8 ]; then counter=1 sleep 1 @@ -281,7 +282,8 @@ do_transfer() fi fi - if [ $rm_nr_ns2 -gt 0 ]; then + if [ $addr_nr_ns2 -lt 0 ]; then + let rm_nr_ns2=-addr_nr_ns2 if [ $rm_nr_ns2 -lt 8 ]; then counter=1 sleep 1 @@ -368,8 +370,8 @@ run_tests() connector_ns="$2" connect_addr="$3" test_linkfail="${4:-0}" - rm_nr_ns1="${5:-0}" - rm_nr_ns2="${6:-0}" + addr_nr_ns1="${5:-0}" + addr_nr_ns2="${6:-0}" speed="${7:-fast}" bkup="${8:-""}" lret=0 @@ -386,7 +388,7 @@ run_tests() fi do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ - ${test_linkfail} ${rm_nr_ns1} ${rm_nr_ns2} ${speed} ${bkup} + ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup} lret=$? if [ "$test_linkfail" -eq 1 ];then @@ -677,7 +679,7 @@ reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow +run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow chk_join_nr "remove single subflow" 1 1 1 chk_rm_nr 1 1 @@ -687,7 +689,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns2 ./pm_nl_ctl limits 0 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow +run_tests $ns1 $ns2 10.0.1.1 0 0 -2 slow chk_join_nr "remove multiple subflows" 2 2 2 chk_rm_nr 2 2 @@ -696,7 +698,7 @@ reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow +run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow chk_join_nr "remove single address" 1 1 1 chk_add_nr 1 1 chk_rm_nr 0 0 @@ -707,7 +709,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 1 1 slow +run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow chk_join_nr "remove subflow and signal" 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 @@ -719,7 +721,7 @@ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 3 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 1 2 slow +run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow chk_join_nr "remove subflows and signal" 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 @@ -731,7 +733,7 @@ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 3 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 8 8 slow +run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow chk_join_nr "flush subflows and signal" 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 @@ -774,7 +776,7 @@ reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_tests $ns1 $ns2 dead:beef:1::1 0 1 0 slow +run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow chk_join_nr "remove single address IPv6" 1 1 1 chk_add_nr 1 1 chk_rm_nr 0 0 @@ -785,7 +787,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 2 ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow -run_tests $ns1 $ns2 dead:beef:1::1 0 1 1 slow +run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow chk_join_nr "remove subflow and signal IPv6" 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 -- GitLab From 6208fd822a2c656461d2f2dc29a309d379ab5850 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2021 15:09:11 -0800 Subject: [PATCH 1841/2012] selftests: mptcp: add testcases for newly added addresses This patch adds testcases to create subflows or signal addresses for the newly added IPv4 or IPv6 addresses. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 73 ++++++++++++++++++- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index e5fb2b01f31ce..b5cd2a48831eb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -264,7 +264,23 @@ do_transfer() fi cpid=$! - if [ $addr_nr_ns1 -lt 0 ]; then + if [ $addr_nr_ns1 -gt 0 ]; then + let add_nr_ns1=addr_nr_ns1 + counter=2 + sleep 1 + while [ $add_nr_ns1 -gt 0 ]; do + local addr + if is_v6 "${connect_addr}"; then + addr="dead:beef:$counter::1" + else + addr="10.0.$counter.1" + fi + ip netns exec $ns1 ./pm_nl_ctl add $addr flags signal + let counter+=1 + let add_nr_ns1-=1 + done + sleep 1 + elif [ $addr_nr_ns1 -lt 0 ]; then let rm_nr_ns1=-addr_nr_ns1 if [ $rm_nr_ns1 -lt 8 ]; then counter=1 @@ -282,7 +298,23 @@ do_transfer() fi fi - if [ $addr_nr_ns2 -lt 0 ]; then + if [ $addr_nr_ns2 -gt 0 ]; then + let add_nr_ns2=addr_nr_ns2 + counter=3 + sleep 1 + while [ $add_nr_ns2 -gt 0 ]; do + local addr + if is_v6 "${connect_addr}"; then + addr="dead:beef:$counter::2" + else + addr="10.0.$counter.2" + fi + ip netns exec $ns2 ./pm_nl_ctl add $addr flags subflow + let counter+=1 + let add_nr_ns2-=1 + done + sleep 1 + elif [ $addr_nr_ns2 -lt 0 ]; then let rm_nr_ns2=-addr_nr_ns2 if [ $rm_nr_ns2 -lt 8 ]; then counter=1 @@ -738,6 +770,43 @@ chk_join_nr "flush subflows and signal" 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 +# add single subflow +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow +chk_join_nr "add single subflow" 1 1 1 + +# add signal address +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow +chk_join_nr "add signal address" 1 1 1 +chk_add_nr 1 1 + +# add multiple subflows +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 0 2 +run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow +chk_join_nr "add multiple subflows" 2 2 2 + +# add multiple subflows IPv6 +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 0 2 +run_tests $ns1 $ns2 dead:beef:1::1 0 0 2 slow +chk_join_nr "add multiple subflows IPv6" 2 2 2 + +# add multiple addresses IPv6 +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 2 2 +run_tests $ns1 $ns2 dead:beef:1::1 0 2 0 slow +chk_join_nr "add multiple addresses IPv6" 2 2 2 +chk_add_nr 2 2 + # subflow IPv6 reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -- GitLab From 1729cf186d8a5d70cf7a54e07c4763635079f015 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2021 15:09:12 -0800 Subject: [PATCH 1842/2012] mptcp: create the listening socket for new port This patch creates a listening socket when an address with a port-number is added by PM netlink. Then binds the new port to the socket, and listens for new connections. When the address is removed or the addresses are flushed by PM netlink, release the listening socket. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 96 +++++++++++++++++++++++++++++++++++++++++- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 4 ++ net/mptcp/subflow.c | 6 +-- 4 files changed, 102 insertions(+), 6 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 5d87e475c7512..b71701a743a4c 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -26,6 +26,7 @@ struct mptcp_pm_addr_entry { struct list_head list; struct mptcp_addr_info addr; struct rcu_head rcu; + struct socket *lsk; }; struct mptcp_pm_add_entry { @@ -678,6 +679,53 @@ out: return ret; } +static int mptcp_pm_nl_create_listen_socket(struct sock *sk, + struct mptcp_pm_addr_entry *entry) +{ + struct sockaddr_storage addr; + struct mptcp_sock *msk; + struct socket *ssock; + int backlog = 1024; + int err; + + err = sock_create_kern(sock_net(sk), entry->addr.family, + SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk); + if (err) + return err; + + msk = mptcp_sk(entry->lsk->sk); + if (!msk) { + err = -EINVAL; + goto out; + } + + ssock = __mptcp_nmpc_socket(msk); + if (!ssock) { + err = -EINVAL; + goto out; + } + + mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); + err = kernel_bind(ssock, (struct sockaddr *)&addr, + sizeof(struct sockaddr_in)); + if (err) { + pr_warn("kernel_bind error, err=%d", err); + goto out; + } + + err = kernel_listen(ssock, backlog); + if (err) { + pr_warn("kernel_listen error, err=%d", err); + goto out; + } + + return 0; + +out: + sock_release(entry->lsk); + return err; +} + int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) { struct mptcp_pm_addr_entry *entry; @@ -722,6 +770,8 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) entry->addr.ifindex = 0; entry->addr.flags = 0; entry->addr.id = 0; + entry->addr.port = 0; + entry->lsk = NULL; ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); if (ret < 0) kfree(entry); @@ -891,9 +941,19 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) } *entry = addr; + if (entry->addr.port) { + ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); + if (ret) { + GENL_SET_ERR_MSG(info, "create listen socket error"); + kfree(entry); + return ret; + } + } ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); if (ret < 0) { GENL_SET_ERR_MSG(info, "too many addresses or duplicate one"); + if (entry->lsk) + sock_release(entry->lsk); kfree(entry); return ret; } @@ -977,6 +1037,38 @@ next: return 0; } +struct addr_entry_release_work { + struct rcu_work rwork; + struct mptcp_pm_addr_entry *entry; +}; + +static void mptcp_pm_release_addr_entry(struct work_struct *work) +{ + struct addr_entry_release_work *w; + struct mptcp_pm_addr_entry *entry; + + w = container_of(to_rcu_work(work), struct addr_entry_release_work, rwork); + entry = w->entry; + if (entry) { + if (entry->lsk) + sock_release(entry->lsk); + kfree(entry); + } + kfree(w); +} + +static void mptcp_pm_free_addr_entry(struct mptcp_pm_addr_entry *entry) +{ + struct addr_entry_release_work *w; + + w = kmalloc(sizeof(*w), GFP_ATOMIC); + if (w) { + INIT_RCU_WORK(&w->rwork, mptcp_pm_release_addr_entry); + w->entry = entry; + queue_rcu_work(system_wq, &w->rwork); + } +} + static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; @@ -1011,7 +1103,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&pernet->lock); mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr); - kfree_rcu(entry, rcu); + mptcp_pm_free_addr_entry(entry); return ret; } @@ -1025,7 +1117,7 @@ static void __flush_addrs(struct net *net, struct list_head *list) struct mptcp_pm_addr_entry, list); mptcp_nl_remove_subflow_and_signal_addr(net, &cur->addr); list_del_rcu(&cur->list); - kfree_rcu(cur, rcu); + mptcp_pm_free_addr_entry(cur); } } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a033bf9c26ee1..1405e146dd7cc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -52,7 +52,7 @@ static struct net_device mptcp_napi_dev; * completed yet or has failed, return the subflow socket. * Otherwise return NULL. */ -static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) +struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) { if (!msk->subflow || READ_ONCE(msk->can_ack)) return NULL; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a56247738dee6..4e071d9264dc3 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -538,11 +538,15 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void mptcp_subflow_reset(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); +struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, const struct mptcp_addr_info *remote); int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock); +void mptcp_info2sockaddr(const struct mptcp_addr_info *info, + struct sockaddr_storage *addr, + unsigned short family); static inline void mptcp_subflow_tcp_fallback(struct sock *sk, struct mptcp_subflow_context *ctx) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 586156281e5a0..50a01546ac34e 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1084,9 +1084,9 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped) } #endif -static void mptcp_info2sockaddr(const struct mptcp_addr_info *info, - struct sockaddr_storage *addr, - unsigned short family) +void mptcp_info2sockaddr(const struct mptcp_addr_info *info, + struct sockaddr_storage *addr, + unsigned short family) { memset(addr, 0, sizeof(*addr)); addr->ss_family = family; -- GitLab From b5e2e42fe5660266553a74711534db427d725a45 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2021 15:09:13 -0800 Subject: [PATCH 1843/2012] mptcp: drop unused skb in subflow_token_join_request This patch drops the unused parameter skb in subflow_token_join_request. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 50a01546ac34e..2dcc0fb5a69e6 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -64,8 +64,7 @@ static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk) } /* validate received token and create truncated hmac and nonce for SYN-ACK */ -static struct mptcp_sock *subflow_token_join_request(struct request_sock *req, - const struct sk_buff *skb) +static struct mptcp_sock *subflow_token_join_request(struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); u8 hmac[SHA256_DIGEST_SIZE]; @@ -181,7 +180,7 @@ again: subflow_req->remote_id = mp_opt.join_id; subflow_req->token = mp_opt.token; subflow_req->remote_nonce = mp_opt.nonce; - subflow_req->msk = subflow_token_join_request(req, skb); + subflow_req->msk = subflow_token_join_request(req); /* Can't fall back to TCP in this case. */ if (!subflow_req->msk) -- GitLab From ec20e14396aeea26f6bf9221bce686a33bde9047 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2021 15:09:14 -0800 Subject: [PATCH 1844/2012] mptcp: add a new helper subflow_req_create_thmac This patch adds a new helper named subflow_req_create_thmac, which is extracted from subflow_token_join_request. It initializes subflow_req's local_nonce and thmac fields, those are the more expensive to populate. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 2dcc0fb5a69e6..94926ab74d481 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -64,10 +64,23 @@ static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk) } /* validate received token and create truncated hmac and nonce for SYN-ACK */ +static void subflow_req_create_thmac(struct mptcp_subflow_request_sock *subflow_req) +{ + struct mptcp_sock *msk = subflow_req->msk; + u8 hmac[SHA256_DIGEST_SIZE]; + + get_random_bytes(&subflow_req->local_nonce, sizeof(u32)); + + subflow_generate_hmac(msk->local_key, msk->remote_key, + subflow_req->local_nonce, + subflow_req->remote_nonce, hmac); + + subflow_req->thmac = get_unaligned_be64(hmac); +} + static struct mptcp_sock *subflow_token_join_request(struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - u8 hmac[SHA256_DIGEST_SIZE]; struct mptcp_sock *msk; int local_id; @@ -84,13 +97,6 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req) } subflow_req->local_id = local_id; - get_random_bytes(&subflow_req->local_nonce, sizeof(u32)); - - subflow_generate_hmac(msk->local_key, msk->remote_key, - subflow_req->local_nonce, - subflow_req->remote_nonce, hmac); - - subflow_req->thmac = get_unaligned_be64(hmac); return msk; } @@ -186,6 +192,8 @@ again: if (!subflow_req->msk) return -EPERM; + subflow_req_create_thmac(subflow_req); + if (unlikely(req->syncookie)) { if (mptcp_can_accept_new_subflow(subflow_req->msk)) subflow_init_req_cookie_join_save(subflow_req, skb); -- GitLab From 5bc56388c74f0e64b32e343ea603609b146dcb96 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2021 15:09:15 -0800 Subject: [PATCH 1845/2012] mptcp: add port number check for MP_JOIN This patch adds two new helpers, subflow_use_different_sport and subflow_use_different_dport, to check whether the subflow's source or destination port number is different from the msk's port number. When receiving the MP_JOIN's SYN/SYNACK/ACK, we do these port number checks and print out the different port numbers. And furthermore, when receiving the MP_JOIN's SYN/ACK, we also use a new helper mptcp_pm_sport_in_anno_list to check whether this port number is announced. If it isn't, we need to abort this connection. This patch also populates the local address's port field in local_address. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 23 ++++++++++++++++++++++- net/mptcp/protocol.h | 1 + net/mptcp/subflow.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 1 deletion(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index b71701a743a4c..54f0ca73e68a9 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -97,8 +97,8 @@ static bool address_zero(const struct mptcp_addr_info *addr) static void local_address(const struct sock_common *skc, struct mptcp_addr_info *addr) { - addr->port = 0; addr->family = skc->skc_family; + addr->port = htons(skc->skc_num); if (addr->family == AF_INET) addr->addr.s_addr = skc->skc_rcv_saddr; #if IS_ENABLED(CONFIG_MPTCP_IPV6) @@ -254,6 +254,27 @@ lookup_anno_list_by_saddr(struct mptcp_sock *msk, return NULL; } +bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) +{ + struct mptcp_pm_add_entry *entry; + struct mptcp_addr_info saddr; + bool ret = false; + + local_address((struct sock_common *)sk, &saddr); + + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(entry, &msk->pm.anno_list, list) { + if (addresses_equal(&entry->addr, &saddr, true)) { + ret = true; + goto out; + } + } + +out: + spin_unlock_bh(&msk->pm.lock); + return ret; +} + static void mptcp_pm_add_timer(struct timer_list *timer) { struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 4e071d9264dc3..1cc7948a1826f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -650,6 +650,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct mptcp_addr_info *addr, u8 bkup); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); +bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct mptcp_addr_info *addr); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 94926ab74d481..ebfbf6a9b669c 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -120,6 +120,11 @@ static int __subflow_init_req(struct request_sock *req, const struct sock *sk_li return 0; } +static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk) +{ + return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport; +} + /* Init mptcp request socket. * * Returns an error code if a JOIN has failed and a TCP reset @@ -192,6 +197,20 @@ again: if (!subflow_req->msk) return -EPERM; + if (subflow_use_different_sport(subflow_req->msk, sk_listener)) { + pr_debug("syn inet_sport=%d %d", + ntohs(inet_sk(sk_listener)->inet_sport), + ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport)); + if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) { + sock_put((struct sock *)subflow_req->msk); + mptcp_token_destroy_request(req); + tcp_request_sock_ops.destructor(req); + subflow_req->msk = NULL; + subflow_req->mp_join = 0; + return -EPERM; + } + } + subflow_req_create_thmac(subflow_req); if (unlikely(req->syncookie)) { @@ -336,6 +355,11 @@ void mptcp_subflow_reset(struct sock *ssk) sock_put(sk); } +static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct sock *sk) +{ + return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport; +} + static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); @@ -402,6 +426,12 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->mp_join = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); + + if (subflow_use_different_dport(mptcp_sk(parent), sk)) { + pr_debug("synack inet_dport=%d %d", + ntohs(inet_sk(sk)->inet_dport), + ntohs(inet_sk(parent)->inet_dport)); + } } else if (mptcp_check_fallback(sk)) { fallback: mptcp_rcv_space_init(mptcp_sk(parent), sk); @@ -667,6 +697,14 @@ create_child: SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX); tcp_rsk(req)->drop_req = true; + + if (subflow_use_different_sport(owner, sk)) { + pr_debug("ack inet_sport=%d %d", + ntohs(inet_sk(sk)->inet_sport), + ntohs(inet_sk((struct sock *)owner)->inet_sport)); + if (!mptcp_pm_sport_in_anno_list(owner, sk)) + goto out; + } } } -- GitLab From 60b57bf76cfff5e216f4d96db0e39e4cd6686699 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2021 15:09:16 -0800 Subject: [PATCH 1846/2012] mptcp: enable use_port when invoke addresses_equal When dealing with the addresses list local_addr_list or anno_list, we should enable the function addresses_equal's parameter use_port. And enable it in address_zero too. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 54f0ca73e68a9..c610597bd58bc 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -91,7 +91,7 @@ static bool address_zero(const struct mptcp_addr_info *addr) memset(&zero, 0, sizeof(zero)); zero.family = addr->family; - return addresses_equal(addr, &zero, false); + return addresses_equal(addr, &zero, true); } static void local_address(const struct sock_common *skc, @@ -131,7 +131,7 @@ static bool lookup_subflow_by_saddr(const struct list_head *list, skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); local_address(skc, &cur); - if (addresses_equal(&cur, saddr, false)) + if (addresses_equal(&cur, saddr, saddr->port)) return true; } @@ -247,7 +247,7 @@ lookup_anno_list_by_saddr(struct mptcp_sock *msk, struct mptcp_pm_add_entry *entry; list_for_each_entry(entry, &msk->pm.anno_list, list) { - if (addresses_equal(&entry->addr, addr, false)) + if (addresses_equal(&entry->addr, addr, true)) return entry; } @@ -773,7 +773,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) rcu_read_lock(); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { - if (addresses_equal(&entry->addr, &skc_local, false)) { + if (addresses_equal(&entry->addr, &skc_local, entry->addr.port)) { ret = entry->addr.id; break; } -- GitLab From a77e9179c7651b6af2bb10c1b1df1a0ef087054f Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2021 15:09:17 -0800 Subject: [PATCH 1847/2012] mptcp: deal with MPTCP_PM_ADDR_ATTR_PORT in PM netlink This patch adds MPTCP_PM_ADDR_ATTR_PORT filling and parsing in PM netlink. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index c610597bd58bc..e7b1abb4f0c26 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -911,6 +911,9 @@ skip_family: if (tb[MPTCP_PM_ADDR_ATTR_FLAGS]) entry->addr.flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); + if (tb[MPTCP_PM_ADDR_ATTR_PORT]) + entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); + return 0; } @@ -1177,6 +1180,8 @@ static int mptcp_nl_fill_addr(struct sk_buff *skb, if (nla_put_u16(skb, MPTCP_PM_ADDR_ATTR_FAMILY, addr->family)) goto nla_put_failure; + if (nla_put_u16(skb, MPTCP_PM_ADDR_ATTR_PORT, ntohs(addr->port))) + goto nla_put_failure; if (nla_put_u8(skb, MPTCP_PM_ADDR_ATTR_ID, addr->id)) goto nla_put_failure; if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->addr.flags)) -- GitLab From d4a7726a79e27d7a117a75cc81f335311d7fc7b8 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2021 15:09:18 -0800 Subject: [PATCH 1848/2012] selftests: mptcp: add port argument for pm_nl_ctl This patch adds a new argument for pm_nl_ctl tool. We can use it like this: # pm_nl_ctl add 10.0.2.1 flags signal port 10100 # pm_nl_ctl dump id 1 flags signal 10.0.2.1 10100 Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index abc269e96a07c..7b4167f3f9a2c 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -177,8 +177,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) 1024]; struct rtattr *rta, *nest; struct nlmsghdr *nh; + u_int32_t flags = 0; u_int16_t family; - u_int32_t flags; int nest_start; u_int8_t id; int off = 0; @@ -224,7 +224,6 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) char *tok, *str; /* flags */ - flags = 0; if (++arg >= argc) error(1, 0, " missing flags value"); @@ -272,6 +271,20 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) rta->rta_len = RTA_LENGTH(4); memcpy(RTA_DATA(rta), &ifindex, 4); off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "port")) { + u_int16_t port; + + if (++arg >= argc) + error(1, 0, " missing port value"); + if (!(flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) + error(1, 0, " flags must be signal when using port"); + + port = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &port, 2); + off += NLMSG_ALIGN(rta->rta_len); } else error(1, 0, "unknown keyword %s", argv[arg]); } @@ -324,6 +337,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[]) static void print_addr(struct rtattr *attrs, int len) { uint16_t family = 0; + uint16_t port = 0; char str[1024]; uint32_t flags; uint8_t id; @@ -331,12 +345,16 @@ static void print_addr(struct rtattr *attrs, int len) while (RTA_OK(attrs, len)) { if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FAMILY) memcpy(&family, RTA_DATA(attrs), 2); + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_PORT) + memcpy(&port, RTA_DATA(attrs), 2); if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR4) { if (family != AF_INET) error(1, errno, "wrong IP (v4) for family %d", family); inet_ntop(AF_INET, RTA_DATA(attrs), str, sizeof(str)); printf("%s", str); + if (port) + printf(" %d", port); } if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR6) { if (family != AF_INET6) @@ -344,6 +362,8 @@ static void print_addr(struct rtattr *attrs, int len) family); inet_ntop(AF_INET6, RTA_DATA(attrs), str, sizeof(str)); printf("%s", str); + if (port) + printf(" %d", port); } if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ID) { memcpy(&id, RTA_DATA(attrs), 1); -- GitLab From 2fbdd9eaf17448c52788b0bb5dea04acfd7e9635 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2021 15:09:19 -0800 Subject: [PATCH 1849/2012] mptcp: add the mibs for ADD_ADDR with port This patch adds the mibs for ADD_ADDR with port: MPTCP_MIB_PORTADD for received ADD_ADDR suboption with a port number. MPTCP_MIB_PORTSYNRX, MPTCP_MIB_PORTSYNACKRX, MPTCP_MIB_PORTACKRX, for received MP_JOIN's SYN or SYN/ACK or ACK with a port number which is different from the msk's port number. MPTCP_MIB_MISMATCHPORTSYNRX and MPTCP_MIB_MISMATCHPORTACKRX, for received SYN or ACK MP_JOIN with a mismatched port-number. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/mib.c | 6 ++++++ net/mptcp/mib.h | 6 ++++++ net/mptcp/options.c | 4 ++++ net/mptcp/subflow.c | 8 +++++++- 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 8ca196489893f..3780c29c321d1 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -29,6 +29,12 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA), SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR), SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD), + SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD), + SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX), + SNMP_MIB_ITEM("MPJoinPortSynAckRx", MPTCP_MIB_JOINPORTSYNACKRX), + SNMP_MIB_ITEM("MPJoinPortAckRx", MPTCP_MIB_JOINPORTACKRX), + SNMP_MIB_ITEM("MismatchPortSynRx", MPTCP_MIB_MISMATCHPORTSYNRX), + SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX), SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR), SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 63914a5ef6a5d..72afbc135f8e7 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -22,6 +22,12 @@ enum linux_mptcp_mib_field { MPTCP_MIB_DUPDATA, /* Segments discarded due to duplicate DSS */ MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */ MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */ + MPTCP_MIB_PORTADD, /* Received ADD_ADDR with a port-number */ + MPTCP_MIB_JOINPORTSYNRX, /* Received a SYN MP_JOIN with a different port-number */ + MPTCP_MIB_JOINPORTSYNACKRX, /* Received a SYNACK MP_JOIN with a different port-number */ + MPTCP_MIB_JOINPORTACKRX, /* Received an ACK MP_JOIN with a different port-number */ + MPTCP_MIB_MISMATCHPORTSYNRX, /* Received a SYN MP_JOIN with a mismatched port-number */ + MPTCP_MIB_MISMATCHPORTACKRX, /* Received an ACK MP_JOIN with a mismatched port-number */ MPTCP_MIB_RMADDR, /* Received RM_ADDR */ MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */ MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 17ad42c65087d..3b71d68b3863d 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1025,6 +1025,10 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mptcp_pm_del_add_timer(msk, &addr); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); } + + if (mp_opt.port) + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD); + mp_opt.add_addr = 0; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index ebfbf6a9b669c..280da418d60b6 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -207,8 +207,10 @@ again: tcp_request_sock_ops.destructor(req); subflow_req->msk = NULL; subflow_req->mp_join = 0; + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX); return -EPERM; } + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTSYNRX); } subflow_req_create_thmac(subflow_req); @@ -431,6 +433,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) pr_debug("synack inet_dport=%d %d", ntohs(inet_sk(sk)->inet_dport), ntohs(inet_sk(parent)->inet_dport)); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX); } } else if (mptcp_check_fallback(sk)) { fallback: @@ -702,8 +705,11 @@ create_child: pr_debug("ack inet_sport=%d %d", ntohs(inet_sk(sk)->inet_sport), ntohs(inet_sk((struct sock *)owner)->inet_sport)); - if (!mptcp_pm_sport_in_anno_list(owner, sk)) + if (!mptcp_pm_sport_in_anno_list(owner, sk)) { + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX); goto out; + } + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX); } } } -- GitLab From 8a127bf68a6fadcc5f760b26e2d3acf5d4c67b83 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 1 Feb 2021 15:09:20 -0800 Subject: [PATCH 1850/2012] selftests: mptcp: add testcases for ADD_ADDR with port This patch adds testcases for ADD_ADDR with port and the related MIB counters check in chk_add_nr. The output looks like this: 24 signal address with port syn[ ok ] - synack[ ok ] - ack[ ok ] add[ ok ] - echo [ ok ] - pt [ ok ] syn[ ok ] - synack[ ok ] - ack[ ok ] syn[ ok ] - ack [ ok ] 25 subflow and signal with port syn[ ok ] - synack[ ok ] - ack[ ok ] add[ ok ] - echo [ ok ] - pt [ ok ] syn[ ok ] - synack[ ok ] - ack[ ok ] syn[ ok ] - ack [ ok ] 26 remove single address with port syn[ ok ] - synack[ ok ] - ack[ ok ] add[ ok ] - echo [ ok ] - pt [ ok ] syn[ ok ] - synack[ ok ] - ack[ ok ] syn[ ok ] - ack [ ok ] rm [ ok ] - sf [ ok ] Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 160 +++++++++++++++++- 1 file changed, 159 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index b5cd2a48831eb..b8fd924033b1e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -487,6 +487,12 @@ chk_add_nr() { local add_nr=$1 local echo_nr=$2 + local port_nr=${3:-0} + local syn_nr=${4:-$port_nr} + local syn_ack_nr=${5:-$port_nr} + local ack_nr=${6:-$port_nr} + local mis_syn_nr=${7:-0} + local mis_ack_nr=${8:-0} local count local dump_stats @@ -509,7 +515,87 @@ chk_add_nr() ret=1 dump_stats=1 else - echo "[ ok ]" + echo -n "[ ok ]" + fi + + if [ $port_nr -gt 0 ]; then + echo -n " - pt " + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtPortAdd | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$port_nr" ]; then + echo "[fail] got $count ADD_ADDR[s] with a port-number expected $port_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + + printf "%-39s %s" " " "syn" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortSynRx | + awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$syn_nr" ]; then + echo "[fail] got $count JOIN[s] syn with a different \ + port-number expected $syn_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - synack" + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinPortSynAckRx | + awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$syn_ack_nr" ]; then + echo "[fail] got $count JOIN[s] synack with a different \ + port-number expected $syn_ack_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - ack" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinPortAckRx | + awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$ack_nr" ]; then + echo "[fail] got $count JOIN[s] ack with a different \ + port-number expected $ack_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + + printf "%-39s %s" " " "syn" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortSynRx | + awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$mis_syn_nr" ]; then + echo "[fail] got $count JOIN[s] syn with a mismatched \ + port-number expected $mis_syn_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - ack " + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMismatchPortAckRx | + awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$mis_ack_nr" ]; then + echo "[fail] got $count JOIN[s] ack with a mismatched \ + port-number expected $mis_ack_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + else + echo "" fi if [ "${dump_stats}" = 1 ]; then @@ -955,6 +1041,78 @@ chk_join_nr "single address, backup" 1 1 1 chk_add_nr 1 1 chk_prio_nr 1 0 +# signal address with port +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "signal address with port" 1 1 1 +chk_add_nr 1 1 1 + +# subflow and signal with port +reset +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 1 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "subflow and signal with port" 2 2 2 +chk_add_nr 1 1 1 + +# single address with port, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow +chk_join_nr "remove single address with port" 1 1 1 +chk_add_nr 1 1 1 +chk_rm_nr 0 0 + +# subflow and signal with port, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 +ip netns exec $ns2 ./pm_nl_ctl limits 1 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow +chk_join_nr "remove subflow and signal with port" 2 2 2 +chk_add_nr 1 1 1 +chk_rm_nr 1 1 + +# subflows and signal with port, flush +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 +ip netns exec $ns2 ./pm_nl_ctl limits 1 3 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow +chk_join_nr "flush subflows and signal with port" 3 3 3 +chk_add_nr 1 1 +chk_rm_nr 2 2 + +# multiple addresses with port +reset +ip netns exec $ns1 ./pm_nl_ctl limits 2 2 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10100 +ip netns exec $ns2 ./pm_nl_ctl limits 2 2 +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "multiple addresses with port" 2 2 2 +chk_add_nr 2 2 2 + +# multiple addresses with ports +reset +ip netns exec $ns1 ./pm_nl_ctl limits 2 2 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10101 +ip netns exec $ns2 ./pm_nl_ctl limits 2 2 +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "multiple addresses with ports" 2 2 2 +chk_add_nr 2 2 2 + # single subflow, syncookies reset_with_cookies ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -- GitLab From fca23f37f3a7f6296d1ae98606871fd7ed565a0b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Feb 2021 07:41:45 -0800 Subject: [PATCH 1851/2012] inet: do not export inet_gro_{receive|complete} inet_gro_receive() and inet_gro_complete() are part of GRO engine which can not be modular. Similarly, inet_gso_segment() does not need to be exported, being part of GSO stack. In other words, net/ipv6/ip6_offload.o is part of vmlinux, regardless of CONFIG_IPV6. Signed-off-by: Eric Dumazet Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20210202154145.1568451-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/af_inet.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ab42f6404fc62..2ff5d8058ce6a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1419,7 +1419,6 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, out: return segs; } -EXPORT_SYMBOL(inet_gso_segment); static struct sk_buff *ipip_gso_segment(struct sk_buff *skb, netdev_features_t features) @@ -1550,7 +1549,6 @@ out: return pp; } -EXPORT_SYMBOL(inet_gro_receive); static struct sk_buff *ipip_gro_receive(struct list_head *head, struct sk_buff *skb) @@ -1636,7 +1634,6 @@ out_unlock: return err; } -EXPORT_SYMBOL(inet_gro_complete); static int ipip_gro_complete(struct sk_buff *skb, int nhoff) { -- GitLab From 32d1bbb1d609f5a78b0c95e2189f398a52a3fbf7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 2 Feb 2021 14:06:50 +0100 Subject: [PATCH 1852/2012] net: fec: Silence M5272 build warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_M5272=y: drivers/net/ethernet/freescale/fec_main.c: In function ‘fec_restart’: drivers/net/ethernet/freescale/fec_main.c:948:6: warning: unused variable ‘val’ [-Wunused-variable] 948 | u32 val; | ^~~ drivers/net/ethernet/freescale/fec_main.c: In function ‘fec_get_mac’: drivers/net/ethernet/freescale/fec_main.c:1667:28: warning: unused variable ‘pdata’ [-Wunused-variable] 1667 | struct fec_platform_data *pdata = dev_get_platdata(&fep->pdev->dev); | ^~~~~ Fix this by moving the variable declarations inside the existing #ifdef blocks. Signed-off-by: Geert Uytterhoeven Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20210202130650.865023-1-geert@linux-m68k.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 9ebdb0e54291b..3db882322b2bd 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -945,7 +945,6 @@ static void fec_restart(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); - u32 val; u32 temp_mac[2]; u32 rcntl = OPT_FRAME_SIZE | 0x04; u32 ecntl = 0x2; /* ETHEREN */ @@ -997,7 +996,8 @@ fec_restart(struct net_device *ndev) #if !defined(CONFIG_M5272) if (fep->quirks & FEC_QUIRK_HAS_RACC) { - val = readl(fep->hwp + FEC_RACC); + u32 val = readl(fep->hwp + FEC_RACC); + /* align IP header */ val |= FEC_RACC_SHIFT16; if (fep->csum_flags & FLAG_RX_CSUM_ENABLED) @@ -1664,7 +1664,6 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget) static void fec_get_mac(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); - struct fec_platform_data *pdata = dev_get_platdata(&fep->pdev->dev); unsigned char *iap, tmpaddr[ETH_ALEN]; /* @@ -1695,6 +1694,8 @@ static void fec_get_mac(struct net_device *ndev) if (FEC_FLASHMAC) iap = (unsigned char *)FEC_FLASHMAC; #else + struct fec_platform_data *pdata = dev_get_platdata(&fep->pdev->dev); + if (pdata) iap = (unsigned char *)&pdata->mac; #endif -- GitLab From 4f4e54366eae20d5867864001db57c5d90693d8c Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sun, 31 Jan 2021 00:46:37 +0100 Subject: [PATCH 1853/2012] net: usb: cdc_ncm: use new API for bh tasklet This converts the driver to use the new tasklet API introduced in commit 12cc923f1ccc ("tasklet: Introduce new initialization API") It is unfortunate that we need to add a pointer to the driver context to get back to the usbnet device, but the space will be reclaimed once there are no more users of the old API left and we can remove the data value and flag from the tasklet struct. Signed-off-by: Emil Renner Berthing Link: https://lore.kernel.org/r/20210130234637.26505-1-kernel@esmil.dk Signed-off-by: Jakub Kicinski --- drivers/net/usb/cdc_ncm.c | 12 +++++++----- include/linux/usb/cdc_ncm.h | 2 ++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 291e76d32abe7..4087c9e337819 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -61,7 +61,7 @@ static bool prefer_mbim; module_param(prefer_mbim, bool, 0644); MODULE_PARM_DESC(prefer_mbim, "Prefer MBIM setting on dual NCM/MBIM functions"); -static void cdc_ncm_txpath_bh(unsigned long param); +static void cdc_ncm_txpath_bh(struct tasklet_struct *t); static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx); static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer); static struct usb_driver cdc_ncm_driver; @@ -813,9 +813,11 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ if (!ctx) return -ENOMEM; + ctx->dev = dev; + hrtimer_init(&ctx->tx_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ctx->tx_timer.function = &cdc_ncm_tx_timer_cb; - tasklet_init(&ctx->bh, cdc_ncm_txpath_bh, (unsigned long)dev); + tasklet_setup(&ctx->bh, cdc_ncm_txpath_bh); atomic_set(&ctx->stop, 0); spin_lock_init(&ctx->mtx); @@ -1472,10 +1474,10 @@ static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *timer) return HRTIMER_NORESTART; } -static void cdc_ncm_txpath_bh(unsigned long param) +static void cdc_ncm_txpath_bh(struct tasklet_struct *t) { - struct usbnet *dev = (struct usbnet *)param; - struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + struct cdc_ncm_ctx *ctx = from_tasklet(ctx, t, bh); + struct usbnet *dev = ctx->dev; spin_lock_bh(&ctx->mtx); if (ctx->tx_timer_pending != 0) { diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 0ce4377545f82..f7cb3ddce7fbe 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -98,6 +98,8 @@ struct cdc_ncm_ctx { struct hrtimer tx_timer; struct tasklet_struct bh; + struct usbnet *dev; + const struct usb_cdc_ncm_desc *func_desc; const struct usb_cdc_mbim_desc *mbim_desc; const struct usb_cdc_mbim_extended_desc *mbim_extended_desc; -- GitLab From e43b21906439ed14dda84f9784d38c03d0464607 Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Mon, 1 Feb 2021 17:41:29 +0000 Subject: [PATCH 1854/2012] net: use indirect call helpers for dst_input This patch avoids the indirect call for the common case: ip_local_deliver and ip6_input Signed-off-by: Brian Vazquez Signed-off-by: Jakub Kicinski --- include/net/dst.h | 6 +++++- net/ipv4/ip_input.c | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/net/dst.h b/include/net/dst.h index 10f0a83998672..98cf6e8c06c4b 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -18,6 +18,7 @@ #include #include #include +#include struct sk_buff; @@ -441,10 +442,13 @@ static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *s return skb_dst(skb)->output(net, sk, skb); } +INDIRECT_CALLABLE_DECLARE(int ip6_input(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int ip_local_deliver(struct sk_buff *)); /* Input packet from network to transport. */ static inline int dst_input(struct sk_buff *skb) { - return skb_dst(skb)->input(skb); + return INDIRECT_CALL_INET(skb_dst(skb)->input, + ip6_input, ip_local_deliver, skb); } static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index b0c244af1e4d5..3a025c0119718 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -253,6 +253,7 @@ int ip_local_deliver(struct sk_buff *skb) net, NULL, skb, skb->dev, NULL, ip_local_deliver_finish); } +EXPORT_SYMBOL(ip_local_deliver); static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev) { -- GitLab From 6585d7dc491d9d5e323ed52ee32ad071e04c9dfa Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Mon, 1 Feb 2021 17:41:30 +0000 Subject: [PATCH 1855/2012] net: use indirect call helpers for dst_output This patch avoids the indirect call for the common case: ip6_output and ip_output Signed-off-by: Brian Vazquez Signed-off-by: Jakub Kicinski --- include/net/dst.h | 8 +++++++- net/ipv4/ip_output.c | 1 + net/ipv6/ip6_output.c | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/net/dst.h b/include/net/dst.h index 98cf6e8c06c4b..3932e9931f087 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -436,10 +436,16 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout) dst->expires = expires; } +INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *, + struct sk_buff *)); /* Output packet to network from transport. */ static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - return skb_dst(skb)->output(net, sk, skb); + return INDIRECT_CALL_INET(skb_dst(skb)->output, + ip6_output, ip_output, + net, sk, skb); } INDIRECT_CALLABLE_DECLARE(int ip6_input(struct sk_buff *)); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 959b94e32f2bf..3aab53beb4ea2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -434,6 +434,7 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb) ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } +EXPORT_SYMBOL(ip_output); /* * copy saddr and daddr, possibly using 64bit load/stores diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 117cd95df213f..ff4f9ebcf7f65 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -217,6 +217,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) ip6_finish_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } +EXPORT_SYMBOL(ip6_output); bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) { -- GitLab From f67fbeaebdc0356e0cbc94f4b099f45ebe174b02 Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Mon, 1 Feb 2021 17:41:31 +0000 Subject: [PATCH 1856/2012] net: use indirect call helpers for dst_mtu This patch avoids the indirect call for the common case: ip6_mtu and ipv4_mtu Signed-off-by: Brian Vazquez Signed-off-by: Jakub Kicinski --- include/net/dst.h | 4 +++- net/ipv4/route.c | 6 ++++-- net/ipv6/route.c | 6 ++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 3932e9931f087..9f474a79ed7d3 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -194,9 +194,11 @@ dst_feature(const struct dst_entry *dst, u32 feature) return dst_metric(dst, RTAX_FEATURES) & feature; } +INDIRECT_CALLABLE_DECLARE(unsigned int ip6_mtu(const struct dst_entry *)); +INDIRECT_CALLABLE_DECLARE(unsigned int ipv4_mtu(const struct dst_entry *)); static inline u32 dst_mtu(const struct dst_entry *dst) { - return dst->ops->mtu(dst); + return INDIRECT_CALL_INET(dst->ops->mtu, ip6_mtu, ipv4_mtu, dst); } /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e26652ff7059d..4fac91f8bd6c9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -135,7 +135,8 @@ static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ipv4_default_advmss(const struct dst_entry *dst); -static unsigned int ipv4_mtu(const struct dst_entry *dst); +INDIRECT_CALLABLE_SCOPE +unsigned int ipv4_mtu(const struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, @@ -1311,7 +1312,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) return min(advmss, IPV4_MAX_PMTU - header_size); } -static unsigned int ipv4_mtu(const struct dst_entry *dst) +INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst) { const struct rtable *rt = (const struct rtable *)dst; unsigned int mtu = rt->rt_pmtu; @@ -1333,6 +1334,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } +EXPORT_SYMBOL(ipv4_mtu); static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 92b400eb84769..886b2095097e5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -83,7 +83,8 @@ enum rt6_nud_state { static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); -static unsigned int ip6_mtu(const struct dst_entry *dst); +INDIRECT_CALLABLE_SCOPE +unsigned int ip6_mtu(const struct dst_entry *dst); static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, @@ -3089,7 +3090,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) return mtu; } -static unsigned int ip6_mtu(const struct dst_entry *dst) +INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst) { struct inet6_dev *idev; unsigned int mtu; @@ -3111,6 +3112,7 @@ out: return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } +EXPORT_SYMBOL(ip6_mtu); /* MTU selection: * 1. mtu on route is locked - use it -- GitLab From bbd807dfbf20506f5548b0297c430a09326e7c4b Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Mon, 1 Feb 2021 17:41:32 +0000 Subject: [PATCH 1857/2012] net: indirect call helpers for ipv4/ipv6 dst_check functions This patch avoids the indirect call for the common case: ip6_dst_check and ipv4_dst_check Signed-off-by: Brian Vazquez Signed-off-by: Jakub Kicinski --- include/net/dst.h | 7 ++++++- net/core/sock.c | 12 ++++++++++-- net/ipv4/route.c | 7 +++++-- net/ipv4/tcp_ipv4.c | 5 ++++- net/ipv6/route.c | 7 +++++-- net/ipv6/tcp_ipv6.c | 5 ++++- 6 files changed, 34 insertions(+), 9 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 9f474a79ed7d3..26f134ad3a25a 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -459,10 +459,15 @@ static inline int dst_input(struct sk_buff *skb) ip6_input, ip_local_deliver, skb); } +INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *, + u32)); +INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, + u32)); static inline struct dst_entry *dst_check(struct dst_entry *dst, u32 cookie) { if (dst->obsolete) - dst = dst->ops->check(dst, cookie); + dst = INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, + ipv4_dst_check, dst, cookie); return dst; } diff --git a/net/core/sock.c b/net/core/sock.c index 648a5cb46209a..0ed98f20448a2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -526,11 +526,17 @@ discard_and_relse: } EXPORT_SYMBOL(__sk_receive_skb); +INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *, + u32)); +INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, + u32)); struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) { struct dst_entry *dst = __sk_dst_get(sk); - if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { + if (dst && dst->obsolete && + INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check, + dst, cookie) == NULL) { sk_tx_queue_clear(sk); sk->sk_dst_pending_confirm = 0; RCU_INIT_POINTER(sk->sk_dst_cache, NULL); @@ -546,7 +552,9 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) { struct dst_entry *dst = sk_dst_get(sk); - if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { + if (dst && dst->obsolete && + INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check, + dst, cookie) == NULL) { sk_dst_reset(sk); dst_release(dst); return NULL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4fac91f8bd6c9..9e65377097943 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -133,7 +133,8 @@ static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; * Interface to generic destination cache. */ -static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); +INDIRECT_CALLABLE_SCOPE +struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ipv4_default_advmss(const struct dst_entry *dst); INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst); @@ -1188,7 +1189,8 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) } EXPORT_SYMBOL_GPL(ipv4_sk_redirect); -static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) +INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst, + u32 cookie) { struct rtable *rt = (struct rtable *) dst; @@ -1204,6 +1206,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) return NULL; return dst; } +EXPORT_SYMBOL(ipv4_dst_check); static void ipv4_send_dest_unreach(struct sk_buff *skb) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 777306b5bc224..611039207d302 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1649,6 +1649,8 @@ u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, return mss; } +INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, + u32)); /* The socket must have it's spinlock held when we get * here, unless it is a TCP_LISTEN socket. * @@ -1668,7 +1670,8 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) sk_mark_napi_id(sk, skb); if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || - !dst->ops->check(dst, 0)) { + !INDIRECT_CALL_1(dst->ops->check, ipv4_dst_check, + dst, 0)) { dst_release(dst); sk->sk_rx_dst = NULL; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 886b2095097e5..8d9e053dc0717 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -81,7 +81,8 @@ enum rt6_nud_state { RT6_NUD_SUCCEED = 1 }; -static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); +INDIRECT_CALLABLE_SCOPE +struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst); @@ -2612,7 +2613,8 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, return NULL; } -static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) +INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst, + u32 cookie) { struct dst_entry *dst_ret; struct fib6_info *from; @@ -2642,6 +2644,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) return dst_ret; } +EXPORT_SYMBOL(ip6_dst_check); static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0e1509b02cb30..d093ef3ef0603 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1420,6 +1420,8 @@ out: return NULL; } +INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, + u32)); /* The socket must have it's spinlock held when we get * here, unless it is a TCP_LISTEN socket. * @@ -1473,7 +1475,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) sk_mark_napi_id(sk, skb); if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || - dst->ops->check(dst, np->rx_dst_cookie) == NULL) { + INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, + dst, np->rx_dst_cookie) == NULL) { dst_release(dst); sk->sk_rx_dst = NULL; } -- GitLab From 63532ced07772be98febdb070a0a8207401ea52e Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Mon, 30 Nov 2020 11:24:04 +0200 Subject: [PATCH 1858/2012] igc: Clean up nvm_operations structure valid_led_default function pointer not in use and can be removed from nvm_operations structure. Signed-off-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_hw.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h index 9da5f83ce4566..4461f8b9a864b 100644 --- a/drivers/net/ethernet/intel/igc/igc_hw.h +++ b/drivers/net/ethernet/intel/igc/igc_hw.h @@ -113,7 +113,6 @@ struct igc_nvm_operations { s32 (*write)(struct igc_hw *hw, u16 offset, u16 i, u16 *data); s32 (*update)(struct igc_hw *hw); s32 (*validate)(struct igc_hw *hw); - s32 (*valid_led_default)(struct igc_hw *hw, u16 *data); }; struct igc_phy_operations { -- GitLab From 4d59f52ba770b463c61c299d3d2f3e4d53722d74 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Mon, 30 Nov 2020 19:43:00 +0200 Subject: [PATCH 1859/2012] igc: Remove igc_set_fw_version comment i225 device not supported and do not plan to support configuration of fw version string for ethtool Signed-off-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index ec8cd69d49928..29da2710b500a 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -544,7 +544,6 @@ static int igc_ethtool_set_eeprom(struct net_device *netdev, if (ret_val == 0) hw->nvm.ops.update(hw); - /* check if need: igc_set_fw_version(adapter); */ kfree(eeprom_buff); return ret_val; } -- GitLab From e96c5b46bdf1e605eb195c0cc33096dedc47f884 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 6 Dec 2020 11:37:07 +0200 Subject: [PATCH 1860/2012] igc: Remove MULR mask define Multiple Tx Data Read Requests is hardware pipeline feature and is not controlled by software Signed-off-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_defines.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 32f5fd6841398..6cc958031fce6 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -284,7 +284,6 @@ #define IGC_TCTL_CT 0x00000ff0 /* collision threshold */ #define IGC_TCTL_COLD 0x003ff000 /* collision distance */ #define IGC_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ -#define IGC_TCTL_MULR 0x10000000 /* Multiple request support */ /* Flow Control Constants */ #define FLOW_CONTROL_ADDRESS_LOW 0x00C28001 -- GitLab From e65299444e3cad6d7cdfd09f9ebd77020f451887 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 6 Dec 2020 14:07:00 +0200 Subject: [PATCH 1861/2012] igc: Add Host Good Packets Transmitted Count This counter counts the number of good (non-erred) packets transmitted sent by the host. A good transmit packet is considered one that is 64 or more bytes in length (from through , inclusively) in length Signed-off-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 43aec42e6d9d4..e26ec0c822719 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -3674,6 +3674,7 @@ void igc_update_stats(struct igc_adapter *adapter) adapter->stats.prc1522 += rd32(IGC_PRC1522); adapter->stats.tlpic += rd32(IGC_TLPIC); adapter->stats.rlpic += rd32(IGC_RLPIC); + adapter->stats.hgptc += rd32(IGC_HGPTC); mpc = rd32(IGC_MPC); adapter->stats.mpc += mpc; -- GitLab From 01bb6129c641030a40931c1a8c60ce4098c23dc9 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Thu, 10 Dec 2020 08:42:09 +0200 Subject: [PATCH 1862/2012] igc: Expose the NVM version Expose the NVM map version via drvinfo in ethtool NVM image version is reported as firmware version for i225 device Minor typo fix - remove space Signed-off-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 2 ++ drivers/net/ethernet/intel/igc/igc_defines.h | 1 + drivers/net/ethernet/intel/igc/igc_ethtool.c | 16 ++++++++++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 35baae900c1fd..2d8b1716a20c7 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -217,6 +217,8 @@ struct igc_adapter { struct timecounter tc; struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */ ktime_t ptp_reset_start; /* Reset time in clock mono */ + + char fw_version[16]; }; void igc_up(struct igc_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 6cc958031fce6..4b7251d0c4e18 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -160,6 +160,7 @@ #define IGC_NVM_RW_REG_START 1 /* Start operation */ #define IGC_NVM_RW_ADDR_SHIFT 2 /* Shift to the address bits */ #define IGC_NVM_POLL_READ 0 /* Flag for polling for read complete */ +#define IGC_NVM_DEV_STARTER 5 /* Dev_starter Version */ /* NVM Word Offsets */ #define NVM_CHECKSUM_REG 0x003F diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 29da2710b500a..7dd1ca7f3ed54 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -129,10 +129,22 @@ static void igc_ethtool_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { struct igc_adapter *adapter = netdev_priv(netdev); + struct igc_hw *hw = &adapter->hw; + u16 nvm_version = 0; + + strscpy(drvinfo->driver, igc_driver_name, sizeof(drvinfo->driver)); + + /* NVM image version is reported as firmware version for i225 device */ + hw->nvm.ops.read(hw, IGC_NVM_DEV_STARTER, 1, &nvm_version); + + scnprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%x", + nvm_version); - strlcpy(drvinfo->driver, igc_driver_name, sizeof(drvinfo->driver)); + strscpy(drvinfo->fw_version, adapter->fw_version, + sizeof(drvinfo->fw_version)); - /* add fw_version here */ strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info)); -- GitLab From 94f794d15a5ea902aabf148f17f5310a2f6ccd67 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 20 Dec 2020 11:16:49 +0200 Subject: [PATCH 1863/2012] igc: Expose the gPHY firmware version Extend reporting of NVM image version to include the gPHY (i225 PHY) firmware version. Signed-off-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 2 +- drivers/net/ethernet/intel/igc/igc_ethtool.c | 9 +++++++-- drivers/net/ethernet/intel/igc/igc_phy.c | 18 ++++++++++++++++++ drivers/net/ethernet/intel/igc/igc_phy.h | 1 + drivers/net/ethernet/intel/igc/igc_regs.h | 1 + 5 files changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 2d8b1716a20c7..5d2809dfd06a4 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -218,7 +218,7 @@ struct igc_adapter { struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */ ktime_t ptp_reset_start; /* Reset time in clock mono */ - char fw_version[16]; + char fw_version[32]; }; void igc_up(struct igc_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 7dd1ca7f3ed54..a3811d7e59bc7 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -131,16 +131,21 @@ static void igc_ethtool_get_drvinfo(struct net_device *netdev, struct igc_adapter *adapter = netdev_priv(netdev); struct igc_hw *hw = &adapter->hw; u16 nvm_version = 0; + u16 gphy_version; strscpy(drvinfo->driver, igc_driver_name, sizeof(drvinfo->driver)); /* NVM image version is reported as firmware version for i225 device */ hw->nvm.ops.read(hw, IGC_NVM_DEV_STARTER, 1, &nvm_version); + /* gPHY firmware version is reported as PHY FW version */ + gphy_version = igc_read_phy_fw_version(hw); + scnprintf(adapter->fw_version, sizeof(adapter->fw_version), - "%x", - nvm_version); + "%x:%x", + nvm_version, + gphy_version); strscpy(drvinfo->fw_version, adapter->fw_version, sizeof(drvinfo->fw_version)); diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c index 8e1799508edc4..83aeb5e7076fd 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.c +++ b/drivers/net/ethernet/intel/igc/igc_phy.c @@ -791,3 +791,21 @@ s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data) return ret_val; } + +/** + * igc_read_phy_fw_version - Read gPHY firmware version + * @hw: pointer to the HW structure + */ +u16 igc_read_phy_fw_version(struct igc_hw *hw) +{ + struct igc_phy_info *phy = &hw->phy; + u16 gphy_version = 0; + u16 ret_val; + + /* NVM image version is reported as firmware version for i225 device */ + ret_val = phy->ops.read_reg(hw, IGC_GPHY_VERSION, &gphy_version); + if (ret_val) + hw_dbg("igc_phy: read wrong gphy version\n"); + + return gphy_version; +} diff --git a/drivers/net/ethernet/intel/igc/igc_phy.h b/drivers/net/ethernet/intel/igc/igc_phy.h index 25cba33de7e27..1b031372d2066 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.h +++ b/drivers/net/ethernet/intel/igc/igc_phy.h @@ -17,5 +17,6 @@ void igc_power_up_phy_copper(struct igc_hw *hw); void igc_power_down_phy_copper(struct igc_hw *hw); s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data); s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data); +u16 igc_read_phy_fw_version(struct igc_hw *hw); #endif diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index b52dd9d737e87..3e5cb7aef9dab 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -13,6 +13,7 @@ #define IGC_MDICNFG 0x00E04 /* MDC/MDIO Configuration - RW */ #define IGC_CONNSW 0x00034 /* Copper/Fiber switch control - RW */ #define IGC_I225_PHPM 0x00E14 /* I225 PHY Power Management */ +#define IGC_GPHY_VERSION 0x0001E /* I225 gPHY Firmware Version */ /* Internal Packet Buffer Size Registers */ #define IGC_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */ -- GitLab From ed443cdf67b58dfc5c48ce60f5f79ad855316f98 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 17 Jan 2021 10:57:02 +0200 Subject: [PATCH 1864/2012] igc: Prefer strscpy over strlcpy Use the strscpy method instead of strlcpy method. See: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr _i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/ Signed-off-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index a3811d7e59bc7..824a6c454bca3 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -150,7 +150,7 @@ static void igc_ethtool_get_drvinfo(struct net_device *netdev, strscpy(drvinfo->fw_version, adapter->fw_version, sizeof(drvinfo->fw_version)); - strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), + strscpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info)); drvinfo->n_priv_flags = IGC_PRIV_FLAGS_STR_LEN; -- GitLab From 9c99482e45b0334464b49daf4f93aa1d661b6abc Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 13 Dec 2020 17:25:26 +0200 Subject: [PATCH 1865/2012] igc: Remove unused local receiver mask Local receiver mask SR_1000T_LOCAL_RX_STATUS not in use in i225 device and could be removed Signed-off-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_defines.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 4b7251d0c4e18..5286047ff9149 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -129,7 +129,6 @@ /* 1000BASE-T Status Register */ #define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */ -#define SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */ /* PHY GPY 211 registers */ #define STANDARD_AN_REG_MASK 0x0007 /* MMD */ -- GitLab From 4917fc8eb640ebfa69be9ce6048500c29dd2ecc6 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 20 Dec 2020 11:15:36 +0200 Subject: [PATCH 1866/2012] igc: Remove unused FUNC_1 mask FUNC_1 mask not in use in i225 device and could be removed Signed-off-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_defines.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 5286047ff9149..b909f00a79e66 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -179,7 +179,6 @@ #define IGC_STATUS_LU 0x00000002 /* Link up.0=no,1=link */ #define IGC_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */ #define IGC_STATUS_FUNC_SHIFT 2 -#define IGC_STATUS_FUNC_1 0x00000004 /* Function 1 */ #define IGC_STATUS_TXOFF 0x00000010 /* transmission paused */ #define IGC_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ #define IGC_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ -- GitLab From e0c16233577fb8dde90760632df535d7b7846267 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Feb 2021 12:12:38 +0300 Subject: [PATCH 1867/2012] net: mscc: ocelot: fix error handling bugs in mscc_ocelot_init_ports() There are several error handling bugs in mscc_ocelot_init_ports(). I went through the code, and carefully audited it and made fixes and cleanups. 1) The ocelot_probe_port() function didn't have a mirror release function so it was hard to follow. I created the ocelot_release_port() function. 2) In the ocelot_probe_port() function, if the register_netdev() call failed, then it lead to a double free_netdev(dev) bug. Fix this by setting "ocelot->ports[port] = NULL" on the error path. 3) I was concerned that the "port" which comes from of_property_read_u32() might be out of bounds so I added a check for that. 4) In the original code if ocelot_regmap_init() failed then the driver tried to continue but I think that should be a fatal error. 5) If ocelot_probe_port() failed then the most recent devlink was leaked. The fix for mostly came Vladimir Oltean. Get rid of "registered_ports" and just set a bit in "devlink_ports_registered" to say when the devlink port has been registered (and needs to be unregistered on error). There are fewer than 32 ports so a u32 is large enough for this purpose. 6) The error handling if the final ocelot_port_devlink_init() failed had two problems. The "while (port-- >= 0)" loop should have been "--port" pre-op instead of a post-op to avoid a buffer underflow. The "if (!registered_ports[port])" condition was reversed leading to resource leaks and double frees. Fixes: 6c30384eb1de ("net: mscc: ocelot: register devlink ports") Signed-off-by: Dan Carpenter Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Link: https://lore.kernel.org/r/YBkXhqRxHtRGzSnJ@mwanda Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.h | 1 + drivers/net/ethernet/mscc/ocelot_net.c | 14 +++++- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 52 ++++++++-------------- 3 files changed, 33 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index e8621dbc14f73..76b8d8ce3b48b 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -121,6 +121,7 @@ void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg); int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target, struct phy_device *phy); +void ocelot_release_port(struct ocelot_port *ocelot_port); int ocelot_devlink_init(struct ocelot *ocelot); void ocelot_devlink_teardown(struct ocelot *ocelot); int ocelot_port_devlink_init(struct ocelot *ocelot, int port, diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 05142803a4638..e6b33d9df184b 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -1283,7 +1283,19 @@ int ocelot_probe_port(struct ocelot *ocelot, int port, struct regmap *target, if (err) { dev_err(ocelot->dev, "register_netdev failed\n"); free_netdev(dev); + ocelot->ports[port] = NULL; + return err; } - return err; + return 0; +} + +void ocelot_release_port(struct ocelot_port *ocelot_port) +{ + struct ocelot_port_private *priv = container_of(ocelot_port, + struct ocelot_port_private, + port); + + unregister_netdev(priv->dev); + free_netdev(priv->dev); } diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 407244fe5b17b..b52e24826b107 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -1064,7 +1064,6 @@ static void mscc_ocelot_release_ports(struct ocelot *ocelot) int port; for (port = 0; port < ocelot->num_phys_ports; port++) { - struct ocelot_port_private *priv; struct ocelot_port *ocelot_port; ocelot_port = ocelot->ports[port]; @@ -1072,12 +1071,7 @@ static void mscc_ocelot_release_ports(struct ocelot *ocelot) continue; ocelot_deinit_port(ocelot, port); - - priv = container_of(ocelot_port, struct ocelot_port_private, - port); - - unregister_netdev(priv->dev); - free_netdev(priv->dev); + ocelot_release_port(ocelot_port); } } @@ -1085,8 +1079,8 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev, struct device_node *ports) { struct ocelot *ocelot = platform_get_drvdata(pdev); + u32 devlink_ports_registered = 0; struct device_node *portnp; - bool *registered_ports; int port, err; u32 reg; @@ -1102,11 +1096,6 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev, if (!ocelot->devlink_ports) return -ENOMEM; - registered_ports = kcalloc(ocelot->num_phys_ports, sizeof(bool), - GFP_KERNEL); - if (!registered_ports) - return -ENOMEM; - for_each_available_child_of_node(ports, portnp) { struct ocelot_port_private *priv; struct ocelot_port *ocelot_port; @@ -1123,14 +1112,22 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev, continue; port = reg; + if (port < 0 || port >= ocelot->num_phys_ports) { + dev_err(ocelot->dev, + "invalid port number: %d >= %d\n", port, + ocelot->num_phys_ports); + continue; + } snprintf(res_name, sizeof(res_name), "port%d", port); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, res_name); target = ocelot_regmap_init(ocelot, res); - if (IS_ERR(target)) - continue; + if (IS_ERR(target)) { + err = PTR_ERR(target); + goto out_teardown; + } phy_node = of_parse_phandle(portnp, "phy-handle", 0); if (!phy_node) @@ -1147,6 +1144,7 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev, of_node_put(portnp); goto out_teardown; } + devlink_ports_registered |= BIT(port); err = ocelot_probe_port(ocelot, port, target, phy); if (err) { @@ -1154,8 +1152,6 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev, goto out_teardown; } - registered_ports[port] = true; - ocelot_port = ocelot->ports[port]; priv = container_of(ocelot_port, struct ocelot_port_private, port); @@ -1208,23 +1204,16 @@ static int mscc_ocelot_init_ports(struct platform_device *pdev, /* Initialize unused devlink ports at the end */ for (port = 0; port < ocelot->num_phys_ports; port++) { - if (registered_ports[port]) + if (devlink_ports_registered & BIT(port)) continue; err = ocelot_port_devlink_init(ocelot, port, DEVLINK_PORT_FLAVOUR_UNUSED); - if (err) { - while (port-- >= 0) { - if (!registered_ports[port]) - continue; - ocelot_port_devlink_teardown(ocelot, port); - } - + if (err) goto out_teardown; - } - } - kfree(registered_ports); + devlink_ports_registered |= BIT(port); + } return 0; @@ -1233,12 +1222,9 @@ out_teardown: mscc_ocelot_release_ports(ocelot); /* Tear down devlink ports for the registered network interfaces */ for (port = 0; port < ocelot->num_phys_ports; port++) { - if (!registered_ports[port]) - continue; - - ocelot_port_devlink_teardown(ocelot, port); + if (devlink_ports_registered & BIT(port)) + ocelot_port_devlink_teardown(ocelot, port); } - kfree(registered_ports); return err; } -- GitLab From 4160d9ec5b41738e3a020b5a18cb5e99e2e9244d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Feb 2021 12:13:44 +0300 Subject: [PATCH 1868/2012] net: mscc: ocelot: fix error code in mscc_ocelot_probe() Probe should return an error code if platform_get_irq_byname() fails but it returns success instead. Fixes: 6c30384eb1de ("net: mscc: ocelot: register devlink ports") Signed-off-by: Dan Carpenter Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/YBkXyFIl4V9hgxYM@mwanda Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_vsc7514.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index b52e24826b107..6b6eb92149bac 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -1300,8 +1300,10 @@ static int mscc_ocelot_probe(struct platform_device *pdev) goto out_free_devlink; irq_xtr = platform_get_irq_byname(pdev, "xtr"); - if (irq_xtr < 0) + if (irq_xtr < 0) { + err = irq_xtr; goto out_free_devlink; + } err = devm_request_threaded_irq(&pdev->dev, irq_xtr, NULL, ocelot_xtr_irq_handler, IRQF_ONESHOT, -- GitLab From 9660ef25e958b641f9f340cd97b1fab2286a8b07 Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Wed, 6 Jan 2021 19:27:04 +0200 Subject: [PATCH 1869/2012] igc: Fix TDBAL register show incorrect value Fixed a typo which caused the registers dump function to read the RDBAL register when printing TDBAL register values. _reg_dump method has been partially derived from i210 and have same typo. Suggested-by: Gal Hammer Signed-off-by: Sasha Neftin Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_dump.c b/drivers/net/ethernet/intel/igc/igc_dump.c index 4b9ec7d0b7276..495bed47ed0a1 100644 --- a/drivers/net/ethernet/intel/igc/igc_dump.c +++ b/drivers/net/ethernet/intel/igc/igc_dump.c @@ -75,7 +75,7 @@ static void igc_regdump(struct igc_hw *hw, struct igc_reg_info *reginfo) break; case IGC_TDBAL(0): for (n = 0; n < 4; n++) - regs[n] = rd32(IGC_RDBAL(n)); + regs[n] = rd32(IGC_TDBAL(n)); break; case IGC_TDBAH(0): for (n = 0; n < 4; n++) -- GitLab From abb9efc70988087a7ea04c90112657e68e8894a8 Mon Sep 17 00:00:00 2001 From: Gal Hammer Date: Wed, 6 Jan 2021 11:28:26 +0200 Subject: [PATCH 1870/2012] igb: fix TDBAL register show incorrect value Fixed a typo which caused the registers dump function to read the RDBAL register when printing TDBAL register values. Signed-off-by: Gal Hammer Tested-by: David Switzer Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 84d4284b8b326..d6090faaa41d2 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -316,7 +316,7 @@ static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo) break; case E1000_TDBAL(0): for (n = 0; n < 4; n++) - regs[n] = rd32(E1000_RDBAL(n)); + regs[n] = rd32(E1000_TDBAL(n)); break; case E1000_TDBAH(0): for (n = 0; n < 4; n++) -- GitLab From 6e6026f2dd2005844fb35c3911e8083c09952c6c Mon Sep 17 00:00:00 2001 From: Nick Lowe Date: Mon, 21 Dec 2020 22:25:02 +0000 Subject: [PATCH 1871/2012] igb: Enable RSS for Intel I211 Ethernet Controller The Intel I211 Ethernet Controller supports 2 Receive Side Scaling (RSS) queues. It should not be excluded from having this feature enabled. Via commit c883de9fd787 ("igb: rename igb define to be more generic") E1000_MRQC_ENABLE_RSS_4Q was renamed to E1000_MRQC_ENABLE_RSS_MQ to indicate that this is a generic bit flag to enable queues and not a flag that is specific to devices that support 4 queues The bit flag enables 2, 4 or 8 queues appropriately depending on the part. Tested with a multicore CPU and frames were then distributed as expected. This issue appears to have been introduced because of confusion caused by the prior name. Signed-off-by: Nick Lowe Tested-by: David Switzer Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index d6090faaa41d2..23e50de944741 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4482,8 +4482,7 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) else mrqc |= E1000_MRQC_ENABLE_VMDQ; } else { - if (hw->mac.type != e1000_i211) - mrqc |= E1000_MRQC_ENABLE_RSS_MQ; + mrqc |= E1000_MRQC_ENABLE_RSS_MQ; } igb_vmm_control(adapter); -- GitLab From 2f7c1fd23d9faad5bcf8cdfe04c1632352bc0136 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Wed, 23 Dec 2020 11:44:25 -0800 Subject: [PATCH 1872/2012] igb: remove h from printk format specifier This change fixes the checkpatch warning described in this commit cbacb5ab0aa0 ("docs: printk-formats: Stop encouraging use of unnecessary %h[xudi] and %hh[xudi]") Standard integer promotion is already done and %hx and %hhx is useless so do not encourage the use of %hh[xudi] or %h[xudi]. Signed-off-by: Tom Rix Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 23e50de944741..bd0594ac3ae73 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3156,7 +3156,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * the PCIe SR-IOV capability. */ if (pdev->is_virtfn) { - WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n", + WARN(1, KERN_ERR "%s (%x:%x) should not be a VF!\n", pci_name(pdev), pdev->vendor, pdev->device); return -EINVAL; } -- GitLab From 99eb3943ab9b90c49f27c2bfeea87e2bb4da5f3b Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Sat, 21 Nov 2020 18:17:27 +0800 Subject: [PATCH 1873/2012] e1000e: remove the redundant value assignment in e1000_update_nvm_checksum_spt Both of the statements are value assignment of the variable act_offset. The first value assignment is overwritten by the second and is useless. Remove it. Reported-by: Tosk Robot Signed-off-by: Kaixu Xia Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 6fb46682b058a..0ac8d79a79870 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -3886,13 +3886,6 @@ static s32 e1000_update_nvm_checksum_spt(struct e1000_hw *hw) if (ret_val) goto release; - /* And invalidate the previously valid segment by setting - * its signature word (0x13) high_byte to 0b. This can be - * done without an erase because flash erase sets all bits - * to 1's. We can write 1's to 0's without an erase - */ - act_offset = (old_bank_offset + E1000_ICH_NVM_SIG_WORD) * 2 + 1; - /* offset in words but we read dword */ act_offset = old_bank_offset + E1000_ICH_NVM_SIG_WORD - 1; ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &dword); -- GitLab From 5a04b958ad3906c57c6c515ba28fa340b6938d28 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Sun, 11 Oct 2020 22:23:26 +0100 Subject: [PATCH 1874/2012] e1000: drop unneeded assignment in e1000_set_itr() The variable 'current_itr' is assigned to 0 before jumping to 'set_itr_now' but it has not been used after the jump. So, remove the unneeded assignment. Signed-off-by: Sudip Mukherjee Reviewed-by: Lukas Bulwahn Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000/e1000_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 5e28cf4fa2cd9..042de276e6320 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -2632,7 +2632,6 @@ static void e1000_set_itr(struct e1000_adapter *adapter) /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ if (unlikely(adapter->link_speed != SPEED_1000)) { - current_itr = 0; new_itr = 4000; goto set_itr_now; } -- GitLab From e0183b974d3008ae769d769cabfa2051c896dd48 Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Tue, 2 Feb 2021 15:32:39 +0100 Subject: [PATCH 1875/2012] net: mdiobus: Prevent spike on MDIO bus reset signal The mdio_bus reset code first de-asserted the reset by allocating with GPIOD_OUT_LOW, then asserted and de-asserted again. In other words, if the reset signal defaulted to asserted, there'd be a short "spike" before the reset. Here is what happens depending on the pre-existing state of the reset signal: Reset (previously asserted): ~~~|_|~~~~|_______ Reset (previously deasserted): _____|~~~~|_______ ^ ^ ^ A B C At point A, the low going transition is because the reset line is requested using GPIOD_OUT_LOW. If the line is successfully requested, the first thing we do is set it high _without_ any delay. This is point B. So, a glitch occurs between A and B. We then fsleep() and finally set the GPIO low at point C. Requesting the line using GPIOD_OUT_HIGH eliminates the A and B transitions. Instead we get: Reset (previously asserted) : ~~~~~~~~~~|______ Reset (previously deasserted): ____|~~~~~|______ ^ ^ A C Where A and C are the points described above in the code. Point B has been eliminated. The issue was found when we pulled down the reset signal for the Marvell 88E1512P PHY (because it requires at least 50ms after POR with an active clock). Looking at the reset signal with a scope revealed a short spike, point B in the artwork above. Signed-off-by: Mike Looijmans Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20210202143239.10714-1-mike.looijmans@topic.nl Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_bus.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 040509b81f02a..8235185540798 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -543,8 +543,8 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) mutex_init(&bus->mdio_lock); mutex_init(&bus->shared_lock); - /* de-assert bus level PHY GPIO reset */ - gpiod = devm_gpiod_get_optional(&bus->dev, "reset", GPIOD_OUT_LOW); + /* assert bus level PHY GPIO reset */ + gpiod = devm_gpiod_get_optional(&bus->dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(gpiod)) { err = dev_err_probe(&bus->dev, PTR_ERR(gpiod), "mii_bus %s couldn't get reset GPIO\n", @@ -553,8 +553,6 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) return err; } else if (gpiod) { bus->reset_gpiod = gpiod; - - gpiod_set_value_cansleep(gpiod, 1); fsleep(bus->reset_delay_us); gpiod_set_value_cansleep(gpiod, 0); if (bus->reset_post_delay_us > 0) -- GitLab From de2854c87c64788f94e34217d06e60422e4a1842 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Tue, 2 Feb 2021 20:57:07 +0530 Subject: [PATCH 1876/2012] octeontx2-af: Mailbox changes for 98xx CPT block This patch changes CPT mailbox message format to support new block CPT1 in 98xx silicon. cpt_rd_wr_reg -> Modify cpt_rd_wr_reg mailbox and its handler to accommodate new block CPT1. cpt_lf_alloc -> Modify cpt_lf_alloc mailbox and its handler to configure LFs from a block address out of multiple blocks of same type. If a PF/VF needs to configure LFs from both the blocks then this mbox should be called twice. Signed-off-by: Mahipal Challa Signed-off-by: Srujana Challa Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/mbox.h | 2 + .../ethernet/marvell/octeontx2/af/rvu_cpt.c | 41 +++++++++++-------- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 89e93eb46ab73..a0fa44941204a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1073,6 +1073,7 @@ struct cpt_rd_wr_reg_msg { u64 *ret_val; u64 val; u8 is_write; + int blkaddr; }; struct cpt_lf_alloc_req_msg { @@ -1080,6 +1081,7 @@ struct cpt_lf_alloc_req_msg { u16 nix_pf_func; u16 sso_pf_func; u16 eng_grpmsk; + int blkaddr; }; #endif /* MBOX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 35261d52c997f..b6de4b95a72a0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -65,13 +65,13 @@ int rvu_mbox_handler_cpt_lf_alloc(struct rvu *rvu, int num_lfs, slot; u64 val; + blkaddr = req->blkaddr ? req->blkaddr : BLKADDR_CPT0; + if (blkaddr != BLKADDR_CPT0 && blkaddr != BLKADDR_CPT1) + return -ENODEV; + if (req->eng_grpmsk == 0x0) return CPT_AF_ERR_GRP_INVALID; - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); - if (blkaddr < 0) - return blkaddr; - block = &rvu->hw->block[blkaddr]; num_lfs = rvu_get_rsrc_mapcount(rvu_get_pfvf(rvu, pcifunc), block->addr); @@ -114,23 +114,17 @@ int rvu_mbox_handler_cpt_lf_alloc(struct rvu *rvu, return 0; } -int rvu_mbox_handler_cpt_lf_free(struct rvu *rvu, struct msg_req *req, - struct msg_rsp *rsp) +static int cpt_lf_free(struct rvu *rvu, struct msg_req *req, int blkaddr) { u16 pcifunc = req->hdr.pcifunc; + int num_lfs, cptlf, slot; struct rvu_block *block; - int cptlf, blkaddr; - int num_lfs, slot; - - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); - if (blkaddr < 0) - return blkaddr; block = &rvu->hw->block[blkaddr]; num_lfs = rvu_get_rsrc_mapcount(rvu_get_pfvf(rvu, pcifunc), block->addr); if (!num_lfs) - return CPT_AF_ERR_LF_INVALID; + return 0; for (slot = 0; slot < num_lfs; slot++) { cptlf = rvu_get_lf(rvu, block, pcifunc, slot); @@ -146,6 +140,21 @@ int rvu_mbox_handler_cpt_lf_free(struct rvu *rvu, struct msg_req *req, return 0; } +int rvu_mbox_handler_cpt_lf_free(struct rvu *rvu, struct msg_req *req, + struct msg_rsp *rsp) +{ + int ret; + + ret = cpt_lf_free(rvu, req, BLKADDR_CPT0); + if (ret) + return ret; + + if (is_block_implemented(rvu->hw, BLKADDR_CPT1)) + ret = cpt_lf_free(rvu, req, BLKADDR_CPT1); + + return ret; +} + static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) { u64 offset = req->reg_offset; @@ -208,9 +217,9 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, { int blkaddr; - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); - if (blkaddr < 0) - return blkaddr; + blkaddr = req->blkaddr ? req->blkaddr : BLKADDR_CPT0; + if (blkaddr != BLKADDR_CPT0 && blkaddr != BLKADDR_CPT1) + return -ENODEV; /* This message is accepted only if sent from CPT PF/VF */ if (!is_cpt_pf(rvu, req->hdr.pcifunc) && -- GitLab From b0f60fab7805cb013311ede0127f8abbf1bdc986 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Tue, 2 Feb 2021 20:57:08 +0530 Subject: [PATCH 1877/2012] octeontx2-af: Add support for CPT1 in debugfs Adds support to display block CPT1 stats at "/sys/kernel/debug/octeontx2/cpt1". Signed-off-by: Mahipal Challa Signed-off-by: Srujana Challa Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/rvu.h | 7 ++ .../marvell/octeontx2/af/rvu_debugfs.c | 86 +++++++++---------- 2 files changed, 49 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index b1a6ecfd563eb..aabf6d5ee0206 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -33,6 +33,7 @@ #define NAME_SIZE 32 #define MAX_NIX_BLKS 2 +#define MAX_CPT_BLKS 2 /* PF_FUNC */ #define RVU_PFVF_PF_SHIFT 10 @@ -47,6 +48,11 @@ struct dump_ctx { bool all; }; +struct cpt_ctx { + int blkaddr; + struct rvu *rvu; +}; + struct rvu_debugfs { struct dentry *root; struct dentry *cgx_root; @@ -61,6 +67,7 @@ struct rvu_debugfs { struct dump_ctx nix_cq_ctx; struct dump_ctx nix_rq_ctx; struct dump_ctx nix_sq_ctx; + struct cpt_ctx cpt_ctx[MAX_CPT_BLKS]; int npa_qsize_id; int nix_qsize_id; }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index f60499562d2e2..80e964330de38 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -1904,20 +1904,16 @@ static void rvu_dbg_npc_init(struct rvu *rvu) &rvu_dbg_npc_rx_miss_act_fops); } -/* CPT debugfs APIs */ static int cpt_eng_sts_display(struct seq_file *filp, u8 eng_type) { - struct rvu *rvu = filp->private; + struct cpt_ctx *ctx = filp->private; u64 busy_sts = 0, free_sts = 0; u32 e_min = 0, e_max = 0, e, i; u16 max_ses, max_ies, max_aes; - int blkaddr; + struct rvu *rvu = ctx->rvu; + int blkaddr = ctx->blkaddr; u64 reg; - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); - if (blkaddr < 0) - return -ENODEV; - reg = rvu_read64(rvu, blkaddr, CPT_AF_CONSTANTS1); max_ses = reg & 0xffff; max_ies = (reg >> 16) & 0xffff; @@ -1977,16 +1973,13 @@ RVU_DEBUG_SEQ_FOPS(cpt_ie_sts, cpt_ie_sts_display, NULL); static int rvu_dbg_cpt_engines_info_display(struct seq_file *filp, void *unused) { - struct rvu *rvu = filp->private; + struct cpt_ctx *ctx = filp->private; u16 max_ses, max_ies, max_aes; + struct rvu *rvu = ctx->rvu; + int blkaddr = ctx->blkaddr; u32 e_max, e; - int blkaddr; u64 reg; - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); - if (blkaddr < 0) - return -ENODEV; - reg = rvu_read64(rvu, blkaddr, CPT_AF_CONSTANTS1); max_ses = reg & 0xffff; max_ies = (reg >> 16) & 0xffff; @@ -2014,17 +2007,15 @@ RVU_DEBUG_SEQ_FOPS(cpt_engines_info, cpt_engines_info_display, NULL); static int rvu_dbg_cpt_lfs_info_display(struct seq_file *filp, void *unused) { - struct rvu *rvu = filp->private; - struct rvu_hwinfo *hw = rvu->hw; + struct cpt_ctx *ctx = filp->private; + int blkaddr = ctx->blkaddr; + struct rvu *rvu = ctx->rvu; struct rvu_block *block; - int blkaddr; + struct rvu_hwinfo *hw; u64 reg; u32 lf; - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); - if (blkaddr < 0) - return -ENODEV; - + hw = rvu->hw; block = &hw->block[blkaddr]; if (!block->lf.bmap) return -ENODEV; @@ -2049,13 +2040,10 @@ RVU_DEBUG_SEQ_FOPS(cpt_lfs_info, cpt_lfs_info_display, NULL); static int rvu_dbg_cpt_err_info_display(struct seq_file *filp, void *unused) { - struct rvu *rvu = filp->private; + struct cpt_ctx *ctx = filp->private; + struct rvu *rvu = ctx->rvu; + int blkaddr = ctx->blkaddr; u64 reg0, reg1; - int blkaddr; - - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); - if (blkaddr < 0) - return -ENODEV; reg0 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(0)); reg1 = rvu_read64(rvu, blkaddr, CPT_AF_FLTX_INT(1)); @@ -2079,15 +2067,11 @@ RVU_DEBUG_SEQ_FOPS(cpt_err_info, cpt_err_info_display, NULL); static int rvu_dbg_cpt_pc_display(struct seq_file *filp, void *unused) { - struct rvu *rvu; - int blkaddr; + struct cpt_ctx *ctx = filp->private; + struct rvu *rvu = ctx->rvu; + int blkaddr = ctx->blkaddr; u64 reg; - rvu = filp->private; - blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, 0); - if (blkaddr < 0) - return -ENODEV; - reg = rvu_read64(rvu, blkaddr, CPT_AF_INST_REQ_PC); seq_printf(filp, "CPT instruction requests %llu\n", reg); reg = rvu_read64(rvu, blkaddr, CPT_AF_INST_LATENCY_PC); @@ -2108,26 +2092,39 @@ static int rvu_dbg_cpt_pc_display(struct seq_file *filp, void *unused) RVU_DEBUG_SEQ_FOPS(cpt_pc, cpt_pc_display, NULL); -static void rvu_dbg_cpt_init(struct rvu *rvu) +static void rvu_dbg_cpt_init(struct rvu *rvu, int blkaddr) { - if (!is_block_implemented(rvu->hw, BLKADDR_CPT0)) + struct cpt_ctx *ctx; + + if (!is_block_implemented(rvu->hw, blkaddr)) return; - rvu->rvu_dbg.cpt = debugfs_create_dir("cpt", rvu->rvu_dbg.root); + if (blkaddr == BLKADDR_CPT0) { + rvu->rvu_dbg.cpt = debugfs_create_dir("cpt", rvu->rvu_dbg.root); + ctx = &rvu->rvu_dbg.cpt_ctx[0]; + ctx->blkaddr = BLKADDR_CPT0; + ctx->rvu = rvu; + } else { + rvu->rvu_dbg.cpt = debugfs_create_dir("cpt1", + rvu->rvu_dbg.root); + ctx = &rvu->rvu_dbg.cpt_ctx[1]; + ctx->blkaddr = BLKADDR_CPT1; + ctx->rvu = rvu; + } - debugfs_create_file("cpt_pc", 0600, rvu->rvu_dbg.cpt, rvu, + debugfs_create_file("cpt_pc", 0600, rvu->rvu_dbg.cpt, ctx, &rvu_dbg_cpt_pc_fops); - debugfs_create_file("cpt_ae_sts", 0600, rvu->rvu_dbg.cpt, rvu, + debugfs_create_file("cpt_ae_sts", 0600, rvu->rvu_dbg.cpt, ctx, &rvu_dbg_cpt_ae_sts_fops); - debugfs_create_file("cpt_se_sts", 0600, rvu->rvu_dbg.cpt, rvu, + debugfs_create_file("cpt_se_sts", 0600, rvu->rvu_dbg.cpt, ctx, &rvu_dbg_cpt_se_sts_fops); - debugfs_create_file("cpt_ie_sts", 0600, rvu->rvu_dbg.cpt, rvu, + debugfs_create_file("cpt_ie_sts", 0600, rvu->rvu_dbg.cpt, ctx, &rvu_dbg_cpt_ie_sts_fops); - debugfs_create_file("cpt_engines_info", 0600, rvu->rvu_dbg.cpt, rvu, + debugfs_create_file("cpt_engines_info", 0600, rvu->rvu_dbg.cpt, ctx, &rvu_dbg_cpt_engines_info_fops); - debugfs_create_file("cpt_lfs_info", 0600, rvu->rvu_dbg.cpt, rvu, + debugfs_create_file("cpt_lfs_info", 0600, rvu->rvu_dbg.cpt, ctx, &rvu_dbg_cpt_lfs_info_fops); - debugfs_create_file("cpt_err_info", 0600, rvu->rvu_dbg.cpt, rvu, + debugfs_create_file("cpt_err_info", 0600, rvu->rvu_dbg.cpt, ctx, &rvu_dbg_cpt_err_info_fops); } @@ -2146,7 +2143,8 @@ void rvu_dbg_init(struct rvu *rvu) rvu_dbg_nix_init(rvu, BLKADDR_NIX1); rvu_dbg_cgx_init(rvu); rvu_dbg_npc_init(rvu); - rvu_dbg_cpt_init(rvu); + rvu_dbg_cpt_init(rvu, BLKADDR_CPT0); + rvu_dbg_cpt_init(rvu, BLKADDR_CPT1); } void rvu_dbg_exit(struct rvu *rvu) -- GitLab From c57c58fd5c4fd288f5aca0970982c9bd547d6288 Mon Sep 17 00:00:00 2001 From: Srujana Challa Date: Tue, 2 Feb 2021 20:57:09 +0530 Subject: [PATCH 1878/2012] octeontx2-af: Handle CPT function level reset When FLR is initiated for a VF (PCI function level reset), the parent PF gets a interrupt. PF then sends a message to admin function (AF), which then cleans up all resources attached to that VF. This patch adds support to handle CPT FLR. Signed-off-by: Narayana Prasad Raju Atherya Signed-off-by: Suheil Chandran Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Srujana Challa Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/octeontx2/af/rvu.c | 3 + .../net/ethernet/marvell/octeontx2/af/rvu.h | 2 + .../ethernet/marvell/octeontx2/af/rvu_cpt.c | 89 +++++++++++++++++++ .../ethernet/marvell/octeontx2/af/rvu_reg.h | 8 ++ 4 files changed, 102 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 4ef7fc8bbb197..50c2a1d800f49 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2150,6 +2150,9 @@ static void rvu_blklf_teardown(struct rvu *rvu, u16 pcifunc, u8 blkaddr) rvu_nix_lf_teardown(rvu, pcifunc, block->addr, lf); else if (block->addr == BLKADDR_NPA) rvu_npa_lf_teardown(rvu, pcifunc, lf); + else if ((block->addr == BLKADDR_CPT0) || + (block->addr == BLKADDR_CPT1)) + rvu_cpt_lf_teardown(rvu, pcifunc, lf, slot); err = rvu_lf_reset(rvu, block, lf); if (err) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index aabf6d5ee0206..ce931d86600ba 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -608,6 +608,8 @@ void npc_enable_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, void npc_read_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, u16 src, struct mcam_entry *entry, u8 *intf, u8 *ena); +/* CPT APIs */ +int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int lf, int slot); #ifdef CONFIG_DEBUG_FS void rvu_dbg_init(struct rvu *rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index b6de4b95a72a0..0945c3a3b180a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -240,3 +240,92 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, return 0; } + +#define INPROG_INFLIGHT(reg) ((reg) & 0x1FF) +#define INPROG_GRB_PARTIAL(reg) ((reg) & BIT_ULL(31)) +#define INPROG_GRB(reg) (((reg) >> 32) & 0xFF) +#define INPROG_GWB(reg) (((reg) >> 40) & 0xFF) + +static void cpt_lf_disable_iqueue(struct rvu *rvu, int blkaddr, int slot) +{ + int i = 0, hard_lp_ctr = 100000; + u64 inprog, grp_ptr; + u16 nq_ptr, dq_ptr; + + /* Disable instructions enqueuing */ + rvu_write64(rvu, blkaddr, CPT_AF_BAR2_ALIASX(slot, CPT_LF_CTL), 0x0); + + /* Disable executions in the LF's queue */ + inprog = rvu_read64(rvu, blkaddr, + CPT_AF_BAR2_ALIASX(slot, CPT_LF_INPROG)); + inprog &= ~BIT_ULL(16); + rvu_write64(rvu, blkaddr, + CPT_AF_BAR2_ALIASX(slot, CPT_LF_INPROG), inprog); + + /* Wait for CPT queue to become execution-quiescent */ + do { + inprog = rvu_read64(rvu, blkaddr, + CPT_AF_BAR2_ALIASX(slot, CPT_LF_INPROG)); + if (INPROG_GRB_PARTIAL(inprog)) { + i = 0; + hard_lp_ctr--; + } else { + i++; + } + + grp_ptr = rvu_read64(rvu, blkaddr, + CPT_AF_BAR2_ALIASX(slot, + CPT_LF_Q_GRP_PTR)); + nq_ptr = (grp_ptr >> 32) & 0x7FFF; + dq_ptr = grp_ptr & 0x7FFF; + + } while (hard_lp_ctr && (i < 10) && (nq_ptr != dq_ptr)); + + if (hard_lp_ctr == 0) + dev_warn(rvu->dev, "CPT FLR hits hard loop counter\n"); + + i = 0; + hard_lp_ctr = 100000; + do { + inprog = rvu_read64(rvu, blkaddr, + CPT_AF_BAR2_ALIASX(slot, CPT_LF_INPROG)); + + if ((INPROG_INFLIGHT(inprog) == 0) && + (INPROG_GWB(inprog) < 40) && + ((INPROG_GRB(inprog) == 0) || + (INPROG_GRB((inprog)) == 40))) { + i++; + } else { + i = 0; + hard_lp_ctr--; + } + } while (hard_lp_ctr && (i < 10)); + + if (hard_lp_ctr == 0) + dev_warn(rvu->dev, "CPT FLR hits hard loop counter\n"); +} + +int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int lf, int slot) +{ + int blkaddr; + u64 reg; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_CPT, pcifunc); + if (blkaddr != BLKADDR_CPT0 && blkaddr != BLKADDR_CPT1) + return -EINVAL; + + /* Enable BAR2 ALIAS for this pcifunc. */ + reg = BIT_ULL(16) | pcifunc; + rvu_write64(rvu, blkaddr, CPT_AF_BAR2_SEL, reg); + + cpt_lf_disable_iqueue(rvu, blkaddr, slot); + + /* Set group drop to help clear out hardware */ + reg = rvu_read64(rvu, blkaddr, CPT_AF_BAR2_ALIASX(slot, CPT_LF_INPROG)); + reg |= BIT_ULL(17); + rvu_write64(rvu, blkaddr, CPT_AF_BAR2_ALIASX(slot, CPT_LF_INPROG), reg); + + rvu_write64(rvu, blkaddr, CPT_AF_BAR2_SEL, 0); + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 0fb2aa909a23f..79a6dcf0e3c08 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -484,9 +484,17 @@ #define CPT_AF_RAS_INT_ENA_W1S (0x47030) #define CPT_AF_RAS_INT_ENA_W1C (0x47038) +#define AF_BAR2_ALIASX(a, b) (0x9100000ull | (a) << 12 | (b)) +#define CPT_AF_BAR2_SEL 0x9000000 +#define CPT_AF_BAR2_ALIASX(a, b) AF_BAR2_ALIASX(a, b) + #define CPT_AF_LF_CTL2_SHIFT 3 #define CPT_AF_LF_SSO_PF_FUNC_SHIFT 32 +#define CPT_LF_CTL 0x10 +#define CPT_LF_INPROG 0x40 +#define CPT_LF_Q_GRP_PTR 0x120 + #define NPC_AF_BLK_RST (0x00040) /* NPC */ -- GitLab From fec7fa0a750c2127b01adb626e4945509da96462 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 2 Feb 2021 18:01:03 +0100 Subject: [PATCH 1879/2012] chelsio: cxgb: Replace the workqueue with threaded interrupt The external interrupt (F_PL_INTR_EXT) needs to be handled in a process context and this is accomplished by utilizing a workqueue. The process context can also be provided by a threaded interrupt instead of a workqueue. The threaded interrupt can be used later for other interrupt related processing which require non-atomic context without using yet another workqueue. free_irq() also ensures that the thread is done which is currently missing (the worker could continue after the module has been removed). Save pending flags in pending_thread_intr. Use the same mechanism to disable F_PL_INTR_EXT as interrupt source like it is used before the worker is scheduled. Enable the interrupt again once t1_elmer0_ext_intr_handler() is done. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb/common.h | 5 +-- drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 44 ++-------------------- drivers/net/ethernet/chelsio/cxgb/sge.c | 33 ++++++++++++++-- drivers/net/ethernet/chelsio/cxgb/sge.h | 1 + drivers/net/ethernet/chelsio/cxgb/subr.c | 26 +++++++++---- 5 files changed, 55 insertions(+), 54 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb/common.h b/drivers/net/ethernet/chelsio/cxgb/common.h index 6475060649e90..e999a9b9fe6cc 100644 --- a/drivers/net/ethernet/chelsio/cxgb/common.h +++ b/drivers/net/ethernet/chelsio/cxgb/common.h @@ -238,7 +238,6 @@ struct adapter { int msg_enable; u32 mmio_len; - struct work_struct ext_intr_handler_task; struct adapter_params params; /* Terminator modules. */ @@ -257,6 +256,7 @@ struct adapter { /* guards async operations */ spinlock_t async_lock ____cacheline_aligned; + u32 pending_thread_intr; u32 slow_intr_mask; int t1powersave; }; @@ -334,8 +334,7 @@ void t1_interrupts_enable(adapter_t *adapter); void t1_interrupts_disable(adapter_t *adapter); void t1_interrupts_clear(adapter_t *adapter); int t1_elmer0_ext_intr_handler(adapter_t *adapter); -void t1_elmer0_ext_intr(adapter_t *adapter); -int t1_slow_intr_handler(adapter_t *adapter); +irqreturn_t t1_slow_intr_handler(adapter_t *adapter); int t1_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc); const struct board_info *t1_get_board_info(unsigned int board_id); diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c index 0e4a0f413960a..bd6f3c532d72e 100644 --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c @@ -211,9 +211,10 @@ static int cxgb_up(struct adapter *adapter) t1_interrupts_clear(adapter); adapter->params.has_msi = !disable_msi && !pci_enable_msi(adapter->pdev); - err = request_irq(adapter->pdev->irq, t1_interrupt, - adapter->params.has_msi ? 0 : IRQF_SHARED, - adapter->name, adapter); + err = request_threaded_irq(adapter->pdev->irq, t1_interrupt, + t1_interrupt_thread, + adapter->params.has_msi ? 0 : IRQF_SHARED, + adapter->name, adapter); if (err) { if (adapter->params.has_msi) pci_disable_msi(adapter->pdev); @@ -916,41 +917,6 @@ static void mac_stats_task(struct work_struct *work) spin_unlock(&adapter->work_lock); } -/* - * Processes elmer0 external interrupts in process context. - */ -static void ext_intr_task(struct work_struct *work) -{ - struct adapter *adapter = - container_of(work, struct adapter, ext_intr_handler_task); - - t1_elmer0_ext_intr_handler(adapter); - - /* Now reenable external interrupts */ - spin_lock_irq(&adapter->async_lock); - adapter->slow_intr_mask |= F_PL_INTR_EXT; - writel(F_PL_INTR_EXT, adapter->regs + A_PL_CAUSE); - writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA, - adapter->regs + A_PL_ENABLE); - spin_unlock_irq(&adapter->async_lock); -} - -/* - * Interrupt-context handler for elmer0 external interrupts. - */ -void t1_elmer0_ext_intr(struct adapter *adapter) -{ - /* - * Schedule a task to handle external interrupts as we require - * a process context. We disable EXT interrupts in the interim - * and let the task reenable them when it's done. - */ - adapter->slow_intr_mask &= ~F_PL_INTR_EXT; - writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA, - adapter->regs + A_PL_ENABLE); - schedule_work(&adapter->ext_intr_handler_task); -} - void t1_fatal_err(struct adapter *adapter) { if (adapter->flags & FULL_INIT_DONE) { @@ -1062,8 +1028,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&adapter->async_lock); spin_lock_init(&adapter->mac_lock); - INIT_WORK(&adapter->ext_intr_handler_task, - ext_intr_task); INIT_DELAYED_WORK(&adapter->stats_update_task, mac_stats_task); diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c index 2d9c2b5a690a3..5aef9ae1ecfed 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c @@ -1619,11 +1619,38 @@ int t1_poll(struct napi_struct *napi, int budget) return work_done; } +irqreturn_t t1_interrupt_thread(int irq, void *data) +{ + struct adapter *adapter = data; + u32 pending_thread_intr; + + spin_lock_irq(&adapter->async_lock); + pending_thread_intr = adapter->pending_thread_intr; + adapter->pending_thread_intr = 0; + spin_unlock_irq(&adapter->async_lock); + + if (!pending_thread_intr) + return IRQ_NONE; + + if (pending_thread_intr & F_PL_INTR_EXT) + t1_elmer0_ext_intr_handler(adapter); + + spin_lock_irq(&adapter->async_lock); + adapter->slow_intr_mask |= F_PL_INTR_EXT; + + writel(F_PL_INTR_EXT, adapter->regs + A_PL_CAUSE); + writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA, + adapter->regs + A_PL_ENABLE); + spin_unlock_irq(&adapter->async_lock); + + return IRQ_HANDLED; +} + irqreturn_t t1_interrupt(int irq, void *data) { struct adapter *adapter = data; struct sge *sge = adapter->sge; - int handled; + irqreturn_t handled; if (likely(responses_pending(adapter))) { writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE); @@ -1645,10 +1672,10 @@ irqreturn_t t1_interrupt(int irq, void *data) handled = t1_slow_intr_handler(adapter); spin_unlock(&adapter->async_lock); - if (!handled) + if (handled == IRQ_NONE) sge->stats.unhandled_irqs++; - return IRQ_RETVAL(handled != 0); + return handled; } /* diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.h b/drivers/net/ethernet/chelsio/cxgb/sge.h index a1ba591b34312..76516d2a8aa9e 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.h +++ b/drivers/net/ethernet/chelsio/cxgb/sge.h @@ -74,6 +74,7 @@ struct sge *t1_sge_create(struct adapter *, struct sge_params *); int t1_sge_configure(struct sge *, struct sge_params *); int t1_sge_set_coalesce_params(struct sge *, struct sge_params *); void t1_sge_destroy(struct sge *); +irqreturn_t t1_interrupt_thread(int irq, void *data); irqreturn_t t1_interrupt(int irq, void *cookie); int t1_poll(struct napi_struct *, int); diff --git a/drivers/net/ethernet/chelsio/cxgb/subr.c b/drivers/net/ethernet/chelsio/cxgb/subr.c index ea0f8741d7cfd..d90ad07ff1a40 100644 --- a/drivers/net/ethernet/chelsio/cxgb/subr.c +++ b/drivers/net/ethernet/chelsio/cxgb/subr.c @@ -210,7 +210,7 @@ static int fpga_phy_intr_handler(adapter_t *adapter) /* * Slow path interrupt handler for FPGAs. */ -static int fpga_slow_intr(adapter_t *adapter) +static irqreturn_t fpga_slow_intr(adapter_t *adapter) { u32 cause = readl(adapter->regs + A_PL_CAUSE); @@ -238,7 +238,7 @@ static int fpga_slow_intr(adapter_t *adapter) if (cause) writel(cause, adapter->regs + A_PL_CAUSE); - return cause != 0; + return cause == 0 ? IRQ_NONE : IRQ_HANDLED; } #endif @@ -842,13 +842,14 @@ void t1_interrupts_clear(adapter_t* adapter) /* * Slow path interrupt handler for ASICs. */ -static int asic_slow_intr(adapter_t *adapter) +static irqreturn_t asic_slow_intr(adapter_t *adapter) { u32 cause = readl(adapter->regs + A_PL_CAUSE); + irqreturn_t ret = IRQ_HANDLED; cause &= adapter->slow_intr_mask; if (!cause) - return 0; + return IRQ_NONE; if (cause & F_PL_INTR_SGE_ERR) t1_sge_intr_error_handler(adapter->sge); if (cause & F_PL_INTR_TP) @@ -857,16 +858,25 @@ static int asic_slow_intr(adapter_t *adapter) t1_espi_intr_handler(adapter->espi); if (cause & F_PL_INTR_PCIX) t1_pci_intr_handler(adapter); - if (cause & F_PL_INTR_EXT) - t1_elmer0_ext_intr(adapter); + if (cause & F_PL_INTR_EXT) { + /* Wake the threaded interrupt to handle external interrupts as + * we require a process context. We disable EXT interrupts in + * the interim and let the thread reenable them when it's done. + */ + adapter->pending_thread_intr |= F_PL_INTR_EXT; + adapter->slow_intr_mask &= ~F_PL_INTR_EXT; + writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA, + adapter->regs + A_PL_ENABLE); + ret = IRQ_WAKE_THREAD; + } /* Clear the interrupts just processed. */ writel(cause, adapter->regs + A_PL_CAUSE); readl(adapter->regs + A_PL_CAUSE); /* flush writes */ - return 1; + return ret; } -int t1_slow_intr_handler(adapter_t *adapter) +irqreturn_t t1_slow_intr_handler(adapter_t *adapter) { #ifdef CONFIG_CHELSIO_T1_1G if (!t1_is_asic(adapter)) -- GitLab From 82154580a7f72ed5b16f0b7829a6514542a6bd98 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 2 Feb 2021 18:01:04 +0100 Subject: [PATCH 1880/2012] chelsio: cxgb: Disable the card on error in threaded interrupt t1_fatal_err() is invoked from the interrupt handler. The bad part is that it invokes (via t1_sge_stop()) del_timer_sync() and tasklet_kill(). Both functions must not be called from an interrupt because it is possible that it will wait for the completion of the timer/tasklet it just interrupted. In case of a fatal error, use t1_interrupts_disable() to disable all interrupt sources and then wake the interrupt thread with F_PL_INTR_SGE_ERR as pending flag. The threaded-interrupt will stop the card via t1_sge_stop() and not re-enable the interrupts again. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb/common.h | 1 - drivers/net/ethernet/chelsio/cxgb/cxgb2.c | 10 ------ drivers/net/ethernet/chelsio/cxgb/sge.c | 20 +++++++++--- drivers/net/ethernet/chelsio/cxgb/sge.h | 2 +- drivers/net/ethernet/chelsio/cxgb/subr.c | 38 +++++++++++++++------- 5 files changed, 44 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb/common.h b/drivers/net/ethernet/chelsio/cxgb/common.h index e999a9b9fe6cc..0321be77366c4 100644 --- a/drivers/net/ethernet/chelsio/cxgb/common.h +++ b/drivers/net/ethernet/chelsio/cxgb/common.h @@ -346,7 +346,6 @@ int t1_get_board_rev(adapter_t *adapter, const struct board_info *bi, int t1_init_hw_modules(adapter_t *adapter); int t1_init_sw_modules(adapter_t *adapter, const struct board_info *bi); void t1_free_sw_modules(adapter_t *adapter); -void t1_fatal_err(adapter_t *adapter); void t1_link_changed(adapter_t *adapter, int port_id); void t1_link_negotiated(adapter_t *adapter, int port_id, int link_stat, int speed, int duplex, int pause); diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c index bd6f3c532d72e..512da98019c66 100644 --- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c +++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c @@ -917,16 +917,6 @@ static void mac_stats_task(struct work_struct *work) spin_unlock(&adapter->work_lock); } -void t1_fatal_err(struct adapter *adapter) -{ - if (adapter->flags & FULL_INIT_DONE) { - t1_sge_stop(adapter->sge); - t1_interrupts_disable(adapter); - } - pr_alert("%s: encountered fatal error, operation suspended\n", - adapter->name); -} - static const struct net_device_ops cxgb_netdev_ops = { .ndo_open = cxgb_open, .ndo_stop = cxgb_close, diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c index 5aef9ae1ecfed..cda01f22c71c8 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c @@ -940,10 +940,11 @@ void t1_sge_intr_clear(struct sge *sge) /* * SGE 'Error' interrupt handler */ -int t1_sge_intr_error_handler(struct sge *sge) +bool t1_sge_intr_error_handler(struct sge *sge) { struct adapter *adapter = sge->adapter; u32 cause = readl(adapter->regs + A_SG_INT_CAUSE); + bool wake = false; if (adapter->port[0].dev->hw_features & NETIF_F_TSO) cause &= ~F_PACKET_TOO_BIG; @@ -967,11 +968,14 @@ int t1_sge_intr_error_handler(struct sge *sge) sge->stats.pkt_mismatch++; pr_alert("%s: SGE packet mismatch\n", adapter->name); } - if (cause & SGE_INT_FATAL) - t1_fatal_err(adapter); + if (cause & SGE_INT_FATAL) { + t1_interrupts_disable(adapter); + adapter->pending_thread_intr |= F_PL_INTR_SGE_ERR; + wake = true; + } writel(cause, adapter->regs + A_SG_INT_CAUSE); - return 0; + return wake; } const struct sge_intr_counts *t1_sge_get_intr_counts(const struct sge *sge) @@ -1635,6 +1639,14 @@ irqreturn_t t1_interrupt_thread(int irq, void *data) if (pending_thread_intr & F_PL_INTR_EXT) t1_elmer0_ext_intr_handler(adapter); + /* This error is fatal, interrupts remain off */ + if (pending_thread_intr & F_PL_INTR_SGE_ERR) { + pr_alert("%s: encountered fatal error, operation suspended\n", + adapter->name); + t1_sge_stop(adapter->sge); + return IRQ_HANDLED; + } + spin_lock_irq(&adapter->async_lock); adapter->slow_intr_mask |= F_PL_INTR_EXT; diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.h b/drivers/net/ethernet/chelsio/cxgb/sge.h index 76516d2a8aa9e..716705b96f265 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.h +++ b/drivers/net/ethernet/chelsio/cxgb/sge.h @@ -82,7 +82,7 @@ netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev); void t1_vlan_mode(struct adapter *adapter, netdev_features_t features); void t1_sge_start(struct sge *); void t1_sge_stop(struct sge *); -int t1_sge_intr_error_handler(struct sge *); +bool t1_sge_intr_error_handler(struct sge *sge); void t1_sge_intr_enable(struct sge *); void t1_sge_intr_disable(struct sge *); void t1_sge_intr_clear(struct sge *); diff --git a/drivers/net/ethernet/chelsio/cxgb/subr.c b/drivers/net/ethernet/chelsio/cxgb/subr.c index d90ad07ff1a40..310add28fcf59 100644 --- a/drivers/net/ethernet/chelsio/cxgb/subr.c +++ b/drivers/net/ethernet/chelsio/cxgb/subr.c @@ -170,7 +170,7 @@ void t1_link_changed(adapter_t *adapter, int port_id) t1_link_negotiated(adapter, port_id, link_ok, speed, duplex, fc); } -static int t1_pci_intr_handler(adapter_t *adapter) +static bool t1_pci_intr_handler(adapter_t *adapter) { u32 pcix_cause; @@ -179,9 +179,13 @@ static int t1_pci_intr_handler(adapter_t *adapter) if (pcix_cause) { pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_CAUSE, pcix_cause); - t1_fatal_err(adapter); /* PCI errors are fatal */ + /* PCI errors are fatal */ + t1_interrupts_disable(adapter); + adapter->pending_thread_intr |= F_PL_INTR_SGE_ERR; + pr_alert("%s: PCI error encountered.\n", adapter->name); + return true; } - return 0; + return false; } #ifdef CONFIG_CHELSIO_T1_1G @@ -213,10 +217,13 @@ static int fpga_phy_intr_handler(adapter_t *adapter) static irqreturn_t fpga_slow_intr(adapter_t *adapter) { u32 cause = readl(adapter->regs + A_PL_CAUSE); + irqreturn_t ret = IRQ_NONE; cause &= ~F_PL_INTR_SGE_DATA; - if (cause & F_PL_INTR_SGE_ERR) - t1_sge_intr_error_handler(adapter->sge); + if (cause & F_PL_INTR_SGE_ERR) { + if (t1_sge_intr_error_handler(adapter->sge)) + ret = IRQ_WAKE_THREAD; + } if (cause & FPGA_PCIX_INTERRUPT_GMAC) fpga_phy_intr_handler(adapter); @@ -231,13 +238,18 @@ static irqreturn_t fpga_slow_intr(adapter_t *adapter) /* Clear TP interrupt */ writel(tp_cause, adapter->regs + FPGA_TP_ADDR_INTERRUPT_CAUSE); } - if (cause & FPGA_PCIX_INTERRUPT_PCIX) - t1_pci_intr_handler(adapter); + if (cause & FPGA_PCIX_INTERRUPT_PCIX) { + if (t1_pci_intr_handler(adapter)) + ret = IRQ_WAKE_THREAD; + } /* Clear the interrupts just processed. */ if (cause) writel(cause, adapter->regs + A_PL_CAUSE); + if (ret != IRQ_NONE) + return ret; + return cause == 0 ? IRQ_NONE : IRQ_HANDLED; } #endif @@ -850,14 +862,18 @@ static irqreturn_t asic_slow_intr(adapter_t *adapter) cause &= adapter->slow_intr_mask; if (!cause) return IRQ_NONE; - if (cause & F_PL_INTR_SGE_ERR) - t1_sge_intr_error_handler(adapter->sge); + if (cause & F_PL_INTR_SGE_ERR) { + if (t1_sge_intr_error_handler(adapter->sge)) + ret = IRQ_WAKE_THREAD; + } if (cause & F_PL_INTR_TP) t1_tp_intr_handler(adapter->tp); if (cause & F_PL_INTR_ESPI) t1_espi_intr_handler(adapter->espi); - if (cause & F_PL_INTR_PCIX) - t1_pci_intr_handler(adapter); + if (cause & F_PL_INTR_PCIX) { + if (t1_pci_intr_handler(adapter)) + ret = IRQ_WAKE_THREAD; + } if (cause & F_PL_INTR_EXT) { /* Wake the threaded interrupt to handle external interrupts as * we require a process context. We disable EXT interrupts in -- GitLab From 99b8202b179fc3dbbca69e8af6da660224c9d676 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 3 Feb 2021 01:31:09 +0200 Subject: [PATCH 1881/2012] net: dsa: fix SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING getting ignored The bridge emits VLAN filtering events and quite a few others via switchdev with orig_dev = br->dev. After the blamed commit, these events started getting ignored. The point of the patch was to not offload switchdev objects for ports that didn't go through dsa_port_bridge_join, because the configuration is unsupported: - ports that offload a bonding/team interface go through dsa_port_bridge_join when that bonding/team interface is later bridged with another switch port or LAG - ports that don't offload LAG don't get notified of the bridge that is on top of that LAG. Sadly, a check is missing, which is that the orig_dev is equal to the bridge device. This check is compatible with the original intention, because ports that don't offload bridging because they use a software LAG don't have dp->bridge_dev set. On a semi-related note, we should not offload switchdev objects or populate dp->bridge_dev if the driver doesn't implement .port_bridge_join either. However there is no regression associated with that, so it can be done separately. Fixes: 5696c8aedfcc ("net: dsa: Don't offload port attributes on standalone ports") Signed-off-by: Vladimir Oltean Reviewed-by: Tobias Waldekranz Tested-by: Tobias Waldekranz Link: https://lore.kernel.org/r/20210202233109.1591466-1-olteanv@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa_priv.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index edca57b558ad6..263593ce94a81 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -191,7 +191,15 @@ static inline bool dsa_port_offloads_netdev(struct dsa_port *dp, /* Switchdev offloading can be configured on: */ if (dev == dp->slave) - /* DSA ports directly connected to a bridge. */ + /* DSA ports directly connected to a bridge, and event + * was emitted for the ports themselves. + */ + return true; + + if (dp->bridge_dev == dev) + /* DSA ports connected to a bridge, and event was emitted + * for the bridge. + */ return true; if (dp->lag_dev == dev) -- GitLab From 189e7a8d94208a26b7f7876d155cf695393f8efa Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 2 Feb 2021 20:06:05 +0200 Subject: [PATCH 1882/2012] ethtool: Validate master slave configuration before rtnl_lock() Create a new function for input validations to be called before rtnl_lock() and move the master slave validation to that function. This would be a cleanup for next patch that would add another validation to the new function. Signed-off-by: Danielle Ratson Signed-off-by: Jakub Kicinski --- net/ethtool/linkmodes.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index c5bcb9abc8b98..bb8a3351fb728 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -325,6 +325,21 @@ static bool ethnl_validate_master_slave_cfg(u8 cfg) return false; } +static int ethnl_check_linkmodes(struct genl_info *info, struct nlattr **tb) +{ + const struct nlattr *master_slave_cfg; + + master_slave_cfg = tb[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG]; + if (master_slave_cfg && + !ethnl_validate_master_slave_cfg(nla_get_u8(master_slave_cfg))) { + NL_SET_ERR_MSG_ATTR(info->extack, master_slave_cfg, + "master/slave value is invalid"); + return -EOPNOTSUPP; + } + + return 0; +} + static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb, struct ethtool_link_ksettings *ksettings, bool *mod) @@ -336,19 +351,11 @@ static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb, master_slave_cfg = tb[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG]; if (master_slave_cfg) { - u8 cfg = nla_get_u8(master_slave_cfg); - if (lsettings->master_slave_cfg == MASTER_SLAVE_CFG_UNSUPPORTED) { NL_SET_ERR_MSG_ATTR(info->extack, master_slave_cfg, "master/slave configuration not supported by device"); return -EOPNOTSUPP; } - - if (!ethnl_validate_master_slave_cfg(cfg)) { - NL_SET_ERR_MSG_ATTR(info->extack, master_slave_cfg, - "master/slave value is invalid"); - return -EOPNOTSUPP; - } } *mod = false; @@ -386,6 +393,10 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info) bool mod = false; int ret; + ret = ethnl_check_linkmodes(info, tb); + if (ret < 0) + return ret; + ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_LINKMODES_HEADER], genl_info_net(info), info->extack, -- GitLab From 012ce4dd3102a0f4d80167de343e9d44b257c1b8 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 2 Feb 2021 20:06:06 +0200 Subject: [PATCH 1883/2012] ethtool: Extend link modes settings uAPI with lanes Currently, when auto negotiation is on, the user can advertise all the linkmodes which correspond to a specific speed, but does not have a similar selector for the number of lanes. This is significant when a specific speed can be achieved using different number of lanes. For example, 2x50 or 4x25. Add 'ETHTOOL_A_LINKMODES_LANES' attribute and expand 'struct ethtool_link_settings' with lanes field in order to implement a new lanes-selector that will enable the user to advertise a specific number of lanes as well. When auto negotiation is off, lanes parameter can be forced only if the driver supports it. Add a capability bit in 'struct ethtool_ops' that allows ethtool know if the driver can handle the lanes parameter when auto negotiation is off, so if it does not, an error message will be returned when trying to set lanes. Example: $ ethtool -s swp1 lanes 4 $ ethtool swp1 Settings for swp1: Supported ports: [ FIBRE ] Supported link modes: 1000baseKX/Full 10000baseKR/Full 40000baseCR4/Full 40000baseSR4/Full 40000baseLR4/Full 25000baseCR/Full 25000baseSR/Full 50000baseCR2/Full 100000baseSR4/Full 100000baseCR4/Full Supported pause frame use: Symmetric Receive-only Supports auto-negotiation: Yes Supported FEC modes: Not reported Advertised link modes: 40000baseCR4/Full 40000baseSR4/Full 40000baseLR4/Full 100000baseSR4/Full 100000baseCR4/Full Advertised pause frame use: No Advertised auto-negotiation: Yes Advertised FEC modes: Not reported Speed: Unknown! Duplex: Unknown! (255) Auto-negotiation: on Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Link detected: no Signed-off-by: Danielle Ratson Signed-off-by: Jakub Kicinski --- Documentation/networking/ethtool-netlink.rst | 11 ++- include/linux/ethtool.h | 4 + include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/linkmodes.c | 96 +++++++++++++++++--- net/ethtool/netlink.h | 2 +- 5 files changed, 93 insertions(+), 21 deletions(-) diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 30b98245979fd..05073482db055 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -431,16 +431,17 @@ Request contents: ``ETHTOOL_A_LINKMODES_SPEED`` u32 link speed (Mb/s) ``ETHTOOL_A_LINKMODES_DUPLEX`` u8 duplex mode ``ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG`` u8 Master/slave port mode + ``ETHTOOL_A_LINKMODES_LANES`` u32 lanes ========================================== ====== ========================== ``ETHTOOL_A_LINKMODES_OURS`` bit set allows setting advertised link modes. If autonegotiation is on (either set now or kept from before), advertised modes are not changed (no ``ETHTOOL_A_LINKMODES_OURS`` attribute) and at least one -of speed and duplex is specified, kernel adjusts advertised modes to all -supported modes matching speed, duplex or both (whatever is specified). This -autoselection is done on ethtool side with ioctl interface, netlink interface -is supposed to allow requesting changes without knowing what exactly kernel -supports. +of speed, duplex and lanes is specified, kernel adjusts advertised modes to all +supported modes matching speed, duplex, lanes or all (whatever is specified). +This autoselection is done on ethtool side with ioctl interface, netlink +interface is supposed to allow requesting changes without knowing what exactly +kernel supports. LINKSTATE_GET diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e3da25b51ae42..1ab13c5dfb2f5 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -128,6 +128,7 @@ struct ethtool_link_ksettings { __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); } link_modes; + u32 lanes; }; /** @@ -265,6 +266,8 @@ struct ethtool_pause_stats { /** * struct ethtool_ops - optional netdev operations + * @cap_link_lanes_supported: indicates if the driver supports lanes + * parameter. * @supported_coalesce_params: supported types of interrupt coalescing. * @get_drvinfo: Report driver/device information. Should only set the * @driver, @version, @fw_version and @bus_info fields. If not @@ -420,6 +423,7 @@ struct ethtool_pause_stats { * of the generic netdev features interface. */ struct ethtool_ops { + u32 cap_link_lanes_supported:1; u32 supported_coalesce_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); int (*get_regs_len)(struct net_device *); diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index e2bf36e6964b6..a286635ac9b8f 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -227,6 +227,7 @@ enum { ETHTOOL_A_LINKMODES_DUPLEX, /* u8 */ ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, /* u8 */ ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, /* u8 */ + ETHTOOL_A_LINKMODES_LANES, /* u32 */ /* add new constants above here */ __ETHTOOL_A_LINKMODES_CNT, diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index bb8a3351fb728..db3e31fc6709c 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -152,12 +152,47 @@ const struct ethnl_request_ops ethnl_linkmodes_request_ops = { struct link_mode_info { int speed; + u8 lanes; u8 duplex; }; -#define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \ - [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \ - .speed = SPEED_ ## _speed, \ +#define __LINK_MODE_LANES_CR 1 +#define __LINK_MODE_LANES_CR2 2 +#define __LINK_MODE_LANES_CR4 4 +#define __LINK_MODE_LANES_CR8 8 +#define __LINK_MODE_LANES_DR 1 +#define __LINK_MODE_LANES_DR2 2 +#define __LINK_MODE_LANES_DR4 4 +#define __LINK_MODE_LANES_DR8 8 +#define __LINK_MODE_LANES_KR 1 +#define __LINK_MODE_LANES_KR2 2 +#define __LINK_MODE_LANES_KR4 4 +#define __LINK_MODE_LANES_KR8 8 +#define __LINK_MODE_LANES_SR 1 +#define __LINK_MODE_LANES_SR2 2 +#define __LINK_MODE_LANES_SR4 4 +#define __LINK_MODE_LANES_SR8 8 +#define __LINK_MODE_LANES_ER 1 +#define __LINK_MODE_LANES_KX 1 +#define __LINK_MODE_LANES_KX4 4 +#define __LINK_MODE_LANES_LR 1 +#define __LINK_MODE_LANES_LR4 4 +#define __LINK_MODE_LANES_LR4_ER4 4 +#define __LINK_MODE_LANES_LR_ER_FR 1 +#define __LINK_MODE_LANES_LR2_ER2_FR2 2 +#define __LINK_MODE_LANES_LR4_ER4_FR4 4 +#define __LINK_MODE_LANES_LR8_ER8_FR8 8 +#define __LINK_MODE_LANES_LRM 1 +#define __LINK_MODE_LANES_MLD2 2 +#define __LINK_MODE_LANES_T 1 +#define __LINK_MODE_LANES_T1 1 +#define __LINK_MODE_LANES_X 1 +#define __LINK_MODE_LANES_FX 1 + +#define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \ + [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \ + .speed = SPEED_ ## _speed, \ + .lanes = __LINK_MODE_LANES_ ## _type, \ .duplex = __DUPLEX_ ## _duplex \ } #define __DUPLEX_Half DUPLEX_HALF @@ -165,6 +200,7 @@ struct link_mode_info { #define __DEFINE_SPECIAL_MODE_PARAMS(_mode) \ [ETHTOOL_LINK_MODE_ ## _mode ## _BIT] = { \ .speed = SPEED_UNKNOWN, \ + .lanes = 0, \ .duplex = DUPLEX_UNKNOWN, \ } @@ -274,16 +310,17 @@ const struct nla_policy ethnl_linkmodes_set_policy[] = { [ETHTOOL_A_LINKMODES_SPEED] = { .type = NLA_U32 }, [ETHTOOL_A_LINKMODES_DUPLEX] = { .type = NLA_U8 }, [ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG] = { .type = NLA_U8 }, + [ETHTOOL_A_LINKMODES_LANES] = NLA_POLICY_RANGE(NLA_U32, 1, 8), }; -/* Set advertised link modes to all supported modes matching requested speed - * and duplex values. Called when autonegotiation is on, speed or duplex is - * requested but no link mode change. This is done in userspace with ioctl() - * interface, move it into kernel for netlink. +/* Set advertised link modes to all supported modes matching requested speed, + * lanes and duplex values. Called when autonegotiation is on, speed, lanes or + * duplex is requested but no link mode change. This is done in userspace with + * ioctl() interface, move it into kernel for netlink. * Returns true if advertised modes bitmap was modified. */ static bool ethnl_auto_linkmodes(struct ethtool_link_ksettings *ksettings, - bool req_speed, bool req_duplex) + bool req_speed, bool req_lanes, bool req_duplex) { unsigned long *advertising = ksettings->link_modes.advertising; unsigned long *supported = ksettings->link_modes.supported; @@ -302,6 +339,7 @@ static bool ethnl_auto_linkmodes(struct ethtool_link_ksettings *ksettings, continue; if (test_bit(i, supported) && (!req_speed || info->speed == ksettings->base.speed) && + (!req_lanes || info->lanes == ksettings->lanes) && (!req_duplex || info->duplex == ksettings->base.duplex)) set_bit(i, advertising); else @@ -327,7 +365,7 @@ static bool ethnl_validate_master_slave_cfg(u8 cfg) static int ethnl_check_linkmodes(struct genl_info *info, struct nlattr **tb) { - const struct nlattr *master_slave_cfg; + const struct nlattr *master_slave_cfg, *lanes_cfg; master_slave_cfg = tb[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG]; if (master_slave_cfg && @@ -337,16 +375,23 @@ static int ethnl_check_linkmodes(struct genl_info *info, struct nlattr **tb) return -EOPNOTSUPP; } + lanes_cfg = tb[ETHTOOL_A_LINKMODES_LANES]; + if (lanes_cfg && !is_power_of_2(nla_get_u32(lanes_cfg))) { + NL_SET_ERR_MSG_ATTR(info->extack, lanes_cfg, + "lanes value is invalid"); + return -EINVAL; + } + return 0; } static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb, struct ethtool_link_ksettings *ksettings, - bool *mod) + bool *mod, const struct net_device *dev) { struct ethtool_link_settings *lsettings = &ksettings->base; - bool req_speed, req_duplex; - const struct nlattr *master_slave_cfg; + bool req_speed, req_lanes, req_duplex; + const struct nlattr *master_slave_cfg, *lanes_cfg; int ret; master_slave_cfg = tb[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG]; @@ -360,10 +405,30 @@ static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb, *mod = false; req_speed = tb[ETHTOOL_A_LINKMODES_SPEED]; + req_lanes = tb[ETHTOOL_A_LINKMODES_LANES]; req_duplex = tb[ETHTOOL_A_LINKMODES_DUPLEX]; ethnl_update_u8(&lsettings->autoneg, tb[ETHTOOL_A_LINKMODES_AUTONEG], mod); + + lanes_cfg = tb[ETHTOOL_A_LINKMODES_LANES]; + if (lanes_cfg) { + /* If autoneg is off and lanes parameter is not supported by the + * driver, return an error. + */ + if (!lsettings->autoneg && + !dev->ethtool_ops->cap_link_lanes_supported) { + NL_SET_ERR_MSG_ATTR(info->extack, lanes_cfg, + "lanes configuration not supported by device"); + return -EOPNOTSUPP; + } + } else if (!lsettings->autoneg) { + /* If autoneg is off and lanes parameter is not passed from user, + * set the lanes parameter to 0. + */ + ksettings->lanes = 0; + } + ret = ethnl_update_bitset(ksettings->link_modes.advertising, __ETHTOOL_LINK_MODE_MASK_NBITS, tb[ETHTOOL_A_LINKMODES_OURS], link_mode_names, @@ -372,13 +437,14 @@ static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb, return ret; ethnl_update_u32(&lsettings->speed, tb[ETHTOOL_A_LINKMODES_SPEED], mod); + ethnl_update_u32(&ksettings->lanes, lanes_cfg, mod); ethnl_update_u8(&lsettings->duplex, tb[ETHTOOL_A_LINKMODES_DUPLEX], mod); ethnl_update_u8(&lsettings->master_slave_cfg, master_slave_cfg, mod); if (!tb[ETHTOOL_A_LINKMODES_OURS] && lsettings->autoneg && - (req_speed || req_duplex) && - ethnl_auto_linkmodes(ksettings, req_speed, req_duplex)) + (req_speed || req_lanes || req_duplex) && + ethnl_auto_linkmodes(ksettings, req_speed, req_lanes, req_duplex)) *mod = true; return 0; @@ -420,7 +486,7 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info) goto out_ops; } - ret = ethnl_update_linkmodes(info, tb, &ksettings, &mod); + ret = ethnl_update_linkmodes(info, tb, &ksettings, &mod, dev); if (ret < 0) goto out_ops; diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index d8efec516d868..6eabd58d81bfe 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -351,7 +351,7 @@ extern const struct nla_policy ethnl_strset_get_policy[ETHTOOL_A_STRSET_COUNTS_O extern const struct nla_policy ethnl_linkinfo_get_policy[ETHTOOL_A_LINKINFO_HEADER + 1]; extern const struct nla_policy ethnl_linkinfo_set_policy[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL + 1]; extern const struct nla_policy ethnl_linkmodes_get_policy[ETHTOOL_A_LINKMODES_HEADER + 1]; -extern const struct nla_policy ethnl_linkmodes_set_policy[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG + 1]; +extern const struct nla_policy ethnl_linkmodes_set_policy[ETHTOOL_A_LINKMODES_LANES + 1]; extern const struct nla_policy ethnl_linkstate_get_policy[ETHTOOL_A_LINKSTATE_HEADER + 1]; extern const struct nla_policy ethnl_debug_get_policy[ETHTOOL_A_DEBUG_HEADER + 1]; extern const struct nla_policy ethnl_debug_set_policy[ETHTOOL_A_DEBUG_MSGMASK + 1]; -- GitLab From c8907043c6ac9ed58e6c1a76f2824be714b42228 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 2 Feb 2021 20:06:07 +0200 Subject: [PATCH 1884/2012] ethtool: Get link mode in use instead of speed and duplex parameters Currently, when user space queries the link's parameters, as speed and duplex, each parameter is passed from the driver to ethtool. Instead, get the link mode bit in use, and derive each of the parameters from it in ethtool. Signed-off-by: Danielle Ratson Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 1 + net/ethtool/common.c | 147 +++++++++++++++++++++++++++++++++++++ net/ethtool/common.h | 7 ++ net/ethtool/ioctl.c | 18 ++++- net/ethtool/linkmodes.c | 157 +--------------------------------------- 5 files changed, 174 insertions(+), 156 deletions(-) diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 1ab13c5dfb2f5..ec4cd3921c67d 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -129,6 +129,7 @@ struct ethtool_link_ksettings { __ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising); } link_modes; u32 lanes; + enum ethtool_link_mode_bit_indices link_mode; }; /** diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 181220101a6e7..835b9bba3e7e0 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -198,6 +198,153 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); +#define __LINK_MODE_LANES_CR 1 +#define __LINK_MODE_LANES_CR2 2 +#define __LINK_MODE_LANES_CR4 4 +#define __LINK_MODE_LANES_CR8 8 +#define __LINK_MODE_LANES_DR 1 +#define __LINK_MODE_LANES_DR2 2 +#define __LINK_MODE_LANES_DR4 4 +#define __LINK_MODE_LANES_DR8 8 +#define __LINK_MODE_LANES_KR 1 +#define __LINK_MODE_LANES_KR2 2 +#define __LINK_MODE_LANES_KR4 4 +#define __LINK_MODE_LANES_KR8 8 +#define __LINK_MODE_LANES_SR 1 +#define __LINK_MODE_LANES_SR2 2 +#define __LINK_MODE_LANES_SR4 4 +#define __LINK_MODE_LANES_SR8 8 +#define __LINK_MODE_LANES_ER 1 +#define __LINK_MODE_LANES_KX 1 +#define __LINK_MODE_LANES_KX4 4 +#define __LINK_MODE_LANES_LR 1 +#define __LINK_MODE_LANES_LR4 4 +#define __LINK_MODE_LANES_LR4_ER4 4 +#define __LINK_MODE_LANES_LR_ER_FR 1 +#define __LINK_MODE_LANES_LR2_ER2_FR2 2 +#define __LINK_MODE_LANES_LR4_ER4_FR4 4 +#define __LINK_MODE_LANES_LR8_ER8_FR8 8 +#define __LINK_MODE_LANES_LRM 1 +#define __LINK_MODE_LANES_MLD2 2 +#define __LINK_MODE_LANES_T 1 +#define __LINK_MODE_LANES_T1 1 +#define __LINK_MODE_LANES_X 1 +#define __LINK_MODE_LANES_FX 1 + +#define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \ + [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \ + .speed = SPEED_ ## _speed, \ + .lanes = __LINK_MODE_LANES_ ## _type, \ + .duplex = __DUPLEX_ ## _duplex \ + } +#define __DUPLEX_Half DUPLEX_HALF +#define __DUPLEX_Full DUPLEX_FULL +#define __DEFINE_SPECIAL_MODE_PARAMS(_mode) \ + [ETHTOOL_LINK_MODE_ ## _mode ## _BIT] = { \ + .speed = SPEED_UNKNOWN, \ + .lanes = 0, \ + .duplex = DUPLEX_UNKNOWN, \ + } + +const struct link_mode_info link_mode_params[] = { + __DEFINE_LINK_MODE_PARAMS(10, T, Half), + __DEFINE_LINK_MODE_PARAMS(10, T, Full), + __DEFINE_LINK_MODE_PARAMS(100, T, Half), + __DEFINE_LINK_MODE_PARAMS(100, T, Full), + __DEFINE_LINK_MODE_PARAMS(1000, T, Half), + __DEFINE_LINK_MODE_PARAMS(1000, T, Full), + __DEFINE_SPECIAL_MODE_PARAMS(Autoneg), + __DEFINE_SPECIAL_MODE_PARAMS(TP), + __DEFINE_SPECIAL_MODE_PARAMS(AUI), + __DEFINE_SPECIAL_MODE_PARAMS(MII), + __DEFINE_SPECIAL_MODE_PARAMS(FIBRE), + __DEFINE_SPECIAL_MODE_PARAMS(BNC), + __DEFINE_LINK_MODE_PARAMS(10000, T, Full), + __DEFINE_SPECIAL_MODE_PARAMS(Pause), + __DEFINE_SPECIAL_MODE_PARAMS(Asym_Pause), + __DEFINE_LINK_MODE_PARAMS(2500, X, Full), + __DEFINE_SPECIAL_MODE_PARAMS(Backplane), + __DEFINE_LINK_MODE_PARAMS(1000, KX, Full), + __DEFINE_LINK_MODE_PARAMS(10000, KX4, Full), + __DEFINE_LINK_MODE_PARAMS(10000, KR, Full), + [ETHTOOL_LINK_MODE_10000baseR_FEC_BIT] = { + .speed = SPEED_10000, + .duplex = DUPLEX_FULL, + }, + __DEFINE_LINK_MODE_PARAMS(20000, MLD2, Full), + __DEFINE_LINK_MODE_PARAMS(20000, KR2, Full), + __DEFINE_LINK_MODE_PARAMS(40000, KR4, Full), + __DEFINE_LINK_MODE_PARAMS(40000, CR4, Full), + __DEFINE_LINK_MODE_PARAMS(40000, SR4, Full), + __DEFINE_LINK_MODE_PARAMS(40000, LR4, Full), + __DEFINE_LINK_MODE_PARAMS(56000, KR4, Full), + __DEFINE_LINK_MODE_PARAMS(56000, CR4, Full), + __DEFINE_LINK_MODE_PARAMS(56000, SR4, Full), + __DEFINE_LINK_MODE_PARAMS(56000, LR4, Full), + __DEFINE_LINK_MODE_PARAMS(25000, CR, Full), + __DEFINE_LINK_MODE_PARAMS(25000, KR, Full), + __DEFINE_LINK_MODE_PARAMS(25000, SR, Full), + __DEFINE_LINK_MODE_PARAMS(50000, CR2, Full), + __DEFINE_LINK_MODE_PARAMS(50000, KR2, Full), + __DEFINE_LINK_MODE_PARAMS(100000, KR4, Full), + __DEFINE_LINK_MODE_PARAMS(100000, SR4, Full), + __DEFINE_LINK_MODE_PARAMS(100000, CR4, Full), + __DEFINE_LINK_MODE_PARAMS(100000, LR4_ER4, Full), + __DEFINE_LINK_MODE_PARAMS(50000, SR2, Full), + __DEFINE_LINK_MODE_PARAMS(1000, X, Full), + __DEFINE_LINK_MODE_PARAMS(10000, CR, Full), + __DEFINE_LINK_MODE_PARAMS(10000, SR, Full), + __DEFINE_LINK_MODE_PARAMS(10000, LR, Full), + __DEFINE_LINK_MODE_PARAMS(10000, LRM, Full), + __DEFINE_LINK_MODE_PARAMS(10000, ER, Full), + __DEFINE_LINK_MODE_PARAMS(2500, T, Full), + __DEFINE_LINK_MODE_PARAMS(5000, T, Full), + __DEFINE_SPECIAL_MODE_PARAMS(FEC_NONE), + __DEFINE_SPECIAL_MODE_PARAMS(FEC_RS), + __DEFINE_SPECIAL_MODE_PARAMS(FEC_BASER), + __DEFINE_LINK_MODE_PARAMS(50000, KR, Full), + __DEFINE_LINK_MODE_PARAMS(50000, SR, Full), + __DEFINE_LINK_MODE_PARAMS(50000, CR, Full), + __DEFINE_LINK_MODE_PARAMS(50000, LR_ER_FR, Full), + __DEFINE_LINK_MODE_PARAMS(50000, DR, Full), + __DEFINE_LINK_MODE_PARAMS(100000, KR2, Full), + __DEFINE_LINK_MODE_PARAMS(100000, SR2, Full), + __DEFINE_LINK_MODE_PARAMS(100000, CR2, Full), + __DEFINE_LINK_MODE_PARAMS(100000, LR2_ER2_FR2, Full), + __DEFINE_LINK_MODE_PARAMS(100000, DR2, Full), + __DEFINE_LINK_MODE_PARAMS(200000, KR4, Full), + __DEFINE_LINK_MODE_PARAMS(200000, SR4, Full), + __DEFINE_LINK_MODE_PARAMS(200000, LR4_ER4_FR4, Full), + __DEFINE_LINK_MODE_PARAMS(200000, DR4, Full), + __DEFINE_LINK_MODE_PARAMS(200000, CR4, Full), + __DEFINE_LINK_MODE_PARAMS(100, T1, Full), + __DEFINE_LINK_MODE_PARAMS(1000, T1, Full), + __DEFINE_LINK_MODE_PARAMS(400000, KR8, Full), + __DEFINE_LINK_MODE_PARAMS(400000, SR8, Full), + __DEFINE_LINK_MODE_PARAMS(400000, LR8_ER8_FR8, Full), + __DEFINE_LINK_MODE_PARAMS(400000, DR8, Full), + __DEFINE_LINK_MODE_PARAMS(400000, CR8, Full), + __DEFINE_SPECIAL_MODE_PARAMS(FEC_LLRS), + __DEFINE_LINK_MODE_PARAMS(100000, KR, Full), + __DEFINE_LINK_MODE_PARAMS(100000, SR, Full), + __DEFINE_LINK_MODE_PARAMS(100000, LR_ER_FR, Full), + __DEFINE_LINK_MODE_PARAMS(100000, DR, Full), + __DEFINE_LINK_MODE_PARAMS(100000, CR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, KR2, Full), + __DEFINE_LINK_MODE_PARAMS(200000, SR2, Full), + __DEFINE_LINK_MODE_PARAMS(200000, LR2_ER2_FR2, Full), + __DEFINE_LINK_MODE_PARAMS(200000, DR2, Full), + __DEFINE_LINK_MODE_PARAMS(200000, CR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, KR4, Full), + __DEFINE_LINK_MODE_PARAMS(400000, SR4, Full), + __DEFINE_LINK_MODE_PARAMS(400000, LR4_ER4_FR4, Full), + __DEFINE_LINK_MODE_PARAMS(400000, DR4, Full), + __DEFINE_LINK_MODE_PARAMS(400000, CR4, Full), + __DEFINE_LINK_MODE_PARAMS(100, FX, Half), + __DEFINE_LINK_MODE_PARAMS(100, FX, Full), +}; +static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); + const char netif_msg_class_names[][ETH_GSTRING_LEN] = { [NETIF_MSG_DRV_BIT] = "drv", [NETIF_MSG_PROBE_BIT] = "probe", diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 3d9251c95a8bc..a9d0712486987 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -14,6 +14,12 @@ #define __SOF_TIMESTAMPING_CNT (const_ilog2(SOF_TIMESTAMPING_LAST) + 1) +struct link_mode_info { + int speed; + u8 lanes; + u8 duplex; +}; + extern const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]; extern const char @@ -23,6 +29,7 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN]; extern const char phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN]; extern const char link_mode_names[][ETH_GSTRING_LEN]; +extern const struct link_mode_info link_mode_params[]; extern const char netif_msg_class_names[][ETH_GSTRING_LEN]; extern const char wol_mode_names[][ETH_GSTRING_LEN]; extern const char sof_timestamping_names[][ETH_GSTRING_LEN]; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 771688e1b0da9..24783b71c5849 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -426,13 +426,29 @@ struct ethtool_link_usettings { int __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings) { + const struct link_mode_info *link_info; + int err; + ASSERT_RTNL(); if (!dev->ethtool_ops->get_link_ksettings) return -EOPNOTSUPP; memset(link_ksettings, 0, sizeof(*link_ksettings)); - return dev->ethtool_ops->get_link_ksettings(dev, link_ksettings); + + link_ksettings->link_mode = -1; + err = dev->ethtool_ops->get_link_ksettings(dev, link_ksettings); + if (err) + return err; + + if (link_ksettings->link_mode != -1) { + link_info = &link_mode_params[link_ksettings->link_mode]; + link_ksettings->base.speed = link_info->speed; + link_ksettings->lanes = link_info->lanes; + link_ksettings->base.duplex = link_info->duplex; + } + + return 0; } EXPORT_SYMBOL(__ethtool_get_link_ksettings); diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index db3e31fc6709c..fc986d035b015 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -4,6 +4,8 @@ #include "common.h" #include "bitset.h" +/* LINKMODES_GET */ + struct linkmodes_req_info { struct ethnl_req_info base; }; @@ -150,158 +152,6 @@ const struct ethnl_request_ops ethnl_linkmodes_request_ops = { /* LINKMODES_SET */ -struct link_mode_info { - int speed; - u8 lanes; - u8 duplex; -}; - -#define __LINK_MODE_LANES_CR 1 -#define __LINK_MODE_LANES_CR2 2 -#define __LINK_MODE_LANES_CR4 4 -#define __LINK_MODE_LANES_CR8 8 -#define __LINK_MODE_LANES_DR 1 -#define __LINK_MODE_LANES_DR2 2 -#define __LINK_MODE_LANES_DR4 4 -#define __LINK_MODE_LANES_DR8 8 -#define __LINK_MODE_LANES_KR 1 -#define __LINK_MODE_LANES_KR2 2 -#define __LINK_MODE_LANES_KR4 4 -#define __LINK_MODE_LANES_KR8 8 -#define __LINK_MODE_LANES_SR 1 -#define __LINK_MODE_LANES_SR2 2 -#define __LINK_MODE_LANES_SR4 4 -#define __LINK_MODE_LANES_SR8 8 -#define __LINK_MODE_LANES_ER 1 -#define __LINK_MODE_LANES_KX 1 -#define __LINK_MODE_LANES_KX4 4 -#define __LINK_MODE_LANES_LR 1 -#define __LINK_MODE_LANES_LR4 4 -#define __LINK_MODE_LANES_LR4_ER4 4 -#define __LINK_MODE_LANES_LR_ER_FR 1 -#define __LINK_MODE_LANES_LR2_ER2_FR2 2 -#define __LINK_MODE_LANES_LR4_ER4_FR4 4 -#define __LINK_MODE_LANES_LR8_ER8_FR8 8 -#define __LINK_MODE_LANES_LRM 1 -#define __LINK_MODE_LANES_MLD2 2 -#define __LINK_MODE_LANES_T 1 -#define __LINK_MODE_LANES_T1 1 -#define __LINK_MODE_LANES_X 1 -#define __LINK_MODE_LANES_FX 1 - -#define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \ - [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \ - .speed = SPEED_ ## _speed, \ - .lanes = __LINK_MODE_LANES_ ## _type, \ - .duplex = __DUPLEX_ ## _duplex \ - } -#define __DUPLEX_Half DUPLEX_HALF -#define __DUPLEX_Full DUPLEX_FULL -#define __DEFINE_SPECIAL_MODE_PARAMS(_mode) \ - [ETHTOOL_LINK_MODE_ ## _mode ## _BIT] = { \ - .speed = SPEED_UNKNOWN, \ - .lanes = 0, \ - .duplex = DUPLEX_UNKNOWN, \ - } - -static const struct link_mode_info link_mode_params[] = { - __DEFINE_LINK_MODE_PARAMS(10, T, Half), - __DEFINE_LINK_MODE_PARAMS(10, T, Full), - __DEFINE_LINK_MODE_PARAMS(100, T, Half), - __DEFINE_LINK_MODE_PARAMS(100, T, Full), - __DEFINE_LINK_MODE_PARAMS(1000, T, Half), - __DEFINE_LINK_MODE_PARAMS(1000, T, Full), - __DEFINE_SPECIAL_MODE_PARAMS(Autoneg), - __DEFINE_SPECIAL_MODE_PARAMS(TP), - __DEFINE_SPECIAL_MODE_PARAMS(AUI), - __DEFINE_SPECIAL_MODE_PARAMS(MII), - __DEFINE_SPECIAL_MODE_PARAMS(FIBRE), - __DEFINE_SPECIAL_MODE_PARAMS(BNC), - __DEFINE_LINK_MODE_PARAMS(10000, T, Full), - __DEFINE_SPECIAL_MODE_PARAMS(Pause), - __DEFINE_SPECIAL_MODE_PARAMS(Asym_Pause), - __DEFINE_LINK_MODE_PARAMS(2500, X, Full), - __DEFINE_SPECIAL_MODE_PARAMS(Backplane), - __DEFINE_LINK_MODE_PARAMS(1000, KX, Full), - __DEFINE_LINK_MODE_PARAMS(10000, KX4, Full), - __DEFINE_LINK_MODE_PARAMS(10000, KR, Full), - [ETHTOOL_LINK_MODE_10000baseR_FEC_BIT] = { - .speed = SPEED_10000, - .duplex = DUPLEX_FULL, - }, - __DEFINE_LINK_MODE_PARAMS(20000, MLD2, Full), - __DEFINE_LINK_MODE_PARAMS(20000, KR2, Full), - __DEFINE_LINK_MODE_PARAMS(40000, KR4, Full), - __DEFINE_LINK_MODE_PARAMS(40000, CR4, Full), - __DEFINE_LINK_MODE_PARAMS(40000, SR4, Full), - __DEFINE_LINK_MODE_PARAMS(40000, LR4, Full), - __DEFINE_LINK_MODE_PARAMS(56000, KR4, Full), - __DEFINE_LINK_MODE_PARAMS(56000, CR4, Full), - __DEFINE_LINK_MODE_PARAMS(56000, SR4, Full), - __DEFINE_LINK_MODE_PARAMS(56000, LR4, Full), - __DEFINE_LINK_MODE_PARAMS(25000, CR, Full), - __DEFINE_LINK_MODE_PARAMS(25000, KR, Full), - __DEFINE_LINK_MODE_PARAMS(25000, SR, Full), - __DEFINE_LINK_MODE_PARAMS(50000, CR2, Full), - __DEFINE_LINK_MODE_PARAMS(50000, KR2, Full), - __DEFINE_LINK_MODE_PARAMS(100000, KR4, Full), - __DEFINE_LINK_MODE_PARAMS(100000, SR4, Full), - __DEFINE_LINK_MODE_PARAMS(100000, CR4, Full), - __DEFINE_LINK_MODE_PARAMS(100000, LR4_ER4, Full), - __DEFINE_LINK_MODE_PARAMS(50000, SR2, Full), - __DEFINE_LINK_MODE_PARAMS(1000, X, Full), - __DEFINE_LINK_MODE_PARAMS(10000, CR, Full), - __DEFINE_LINK_MODE_PARAMS(10000, SR, Full), - __DEFINE_LINK_MODE_PARAMS(10000, LR, Full), - __DEFINE_LINK_MODE_PARAMS(10000, LRM, Full), - __DEFINE_LINK_MODE_PARAMS(10000, ER, Full), - __DEFINE_LINK_MODE_PARAMS(2500, T, Full), - __DEFINE_LINK_MODE_PARAMS(5000, T, Full), - __DEFINE_SPECIAL_MODE_PARAMS(FEC_NONE), - __DEFINE_SPECIAL_MODE_PARAMS(FEC_RS), - __DEFINE_SPECIAL_MODE_PARAMS(FEC_BASER), - __DEFINE_LINK_MODE_PARAMS(50000, KR, Full), - __DEFINE_LINK_MODE_PARAMS(50000, SR, Full), - __DEFINE_LINK_MODE_PARAMS(50000, CR, Full), - __DEFINE_LINK_MODE_PARAMS(50000, LR_ER_FR, Full), - __DEFINE_LINK_MODE_PARAMS(50000, DR, Full), - __DEFINE_LINK_MODE_PARAMS(100000, KR2, Full), - __DEFINE_LINK_MODE_PARAMS(100000, SR2, Full), - __DEFINE_LINK_MODE_PARAMS(100000, CR2, Full), - __DEFINE_LINK_MODE_PARAMS(100000, LR2_ER2_FR2, Full), - __DEFINE_LINK_MODE_PARAMS(100000, DR2, Full), - __DEFINE_LINK_MODE_PARAMS(200000, KR4, Full), - __DEFINE_LINK_MODE_PARAMS(200000, SR4, Full), - __DEFINE_LINK_MODE_PARAMS(200000, LR4_ER4_FR4, Full), - __DEFINE_LINK_MODE_PARAMS(200000, DR4, Full), - __DEFINE_LINK_MODE_PARAMS(200000, CR4, Full), - __DEFINE_LINK_MODE_PARAMS(100, T1, Full), - __DEFINE_LINK_MODE_PARAMS(1000, T1, Full), - __DEFINE_LINK_MODE_PARAMS(400000, KR8, Full), - __DEFINE_LINK_MODE_PARAMS(400000, SR8, Full), - __DEFINE_LINK_MODE_PARAMS(400000, LR8_ER8_FR8, Full), - __DEFINE_LINK_MODE_PARAMS(400000, DR8, Full), - __DEFINE_LINK_MODE_PARAMS(400000, CR8, Full), - __DEFINE_SPECIAL_MODE_PARAMS(FEC_LLRS), - __DEFINE_LINK_MODE_PARAMS(100000, KR, Full), - __DEFINE_LINK_MODE_PARAMS(100000, SR, Full), - __DEFINE_LINK_MODE_PARAMS(100000, LR_ER_FR, Full), - __DEFINE_LINK_MODE_PARAMS(100000, DR, Full), - __DEFINE_LINK_MODE_PARAMS(100000, CR, Full), - __DEFINE_LINK_MODE_PARAMS(200000, KR2, Full), - __DEFINE_LINK_MODE_PARAMS(200000, SR2, Full), - __DEFINE_LINK_MODE_PARAMS(200000, LR2_ER2_FR2, Full), - __DEFINE_LINK_MODE_PARAMS(200000, DR2, Full), - __DEFINE_LINK_MODE_PARAMS(200000, CR2, Full), - __DEFINE_LINK_MODE_PARAMS(400000, KR4, Full), - __DEFINE_LINK_MODE_PARAMS(400000, SR4, Full), - __DEFINE_LINK_MODE_PARAMS(400000, LR4_ER4_FR4, Full), - __DEFINE_LINK_MODE_PARAMS(400000, DR4, Full), - __DEFINE_LINK_MODE_PARAMS(400000, CR4, Full), - __DEFINE_LINK_MODE_PARAMS(100, FX, Half), - __DEFINE_LINK_MODE_PARAMS(100, FX, Full), -}; - const struct nla_policy ethnl_linkmodes_set_policy[] = { [ETHTOOL_A_LINKMODES_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), @@ -327,9 +177,6 @@ static bool ethnl_auto_linkmodes(struct ethtool_link_ksettings *ksettings, DECLARE_BITMAP(old_adv, __ETHTOOL_LINK_MODE_MASK_NBITS); unsigned int i; - BUILD_BUG_ON(ARRAY_SIZE(link_mode_params) != - __ETHTOOL_LINK_MODE_MASK_NBITS); - bitmap_copy(old_adv, advertising, __ETHTOOL_LINK_MODE_MASK_NBITS); for (i = 0; i < __ETHTOOL_LINK_MODE_MASK_NBITS; i++) { -- GitLab From 7dc33f0914a9c8f992592cddfab2bab7faf162b9 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 2 Feb 2021 20:06:08 +0200 Subject: [PATCH 1885/2012] ethtool: Expose the number of lanes in use Currently, ethtool does not expose how many lanes are used when the link is up. After adding a possibility to advertise or force a specific number of lanes, the lanes in use value can be either the maximum width of the port or below. Extend ethtool to expose the number of lanes currently in use for drivers that support it. For example: $ ethtool -s swp1 speed 100000 lanes 4 $ ethtool -s swp2 speed 100000 lanes 4 $ ip link set swp1 up $ ip link set swp2 up $ ethtool swp1 Settings for swp1: Supported ports: [ FIBRE Backplane ] Supported link modes: 1000baseT/Full 10000baseT/Full 1000baseKX/Full 10000baseKR/Full 10000baseR_FEC 40000baseKR4/Full 40000baseCR4/Full 40000baseSR4/Full 40000baseLR4/Full 25000baseCR/Full 25000baseKR/Full 25000baseSR/Full 50000baseCR2/Full 50000baseKR2/Full 100000baseKR4/Full 100000baseSR4/Full 100000baseCR4/Full 100000baseLR4_ER4/Full 50000baseSR2/Full 10000baseCR/Full 10000baseSR/Full 10000baseLR/Full 10000baseER/Full 50000baseKR/Full 50000baseSR/Full 50000baseCR/Full 50000baseLR_ER_FR/Full 50000baseDR/Full 100000baseKR2/Full 100000baseSR2/Full 100000baseCR2/Full 100000baseLR2_ER2_FR2/Full 100000baseDR2/Full 200000baseKR4/Full 200000baseSR4/Full 200000baseLR4_ER4_FR4/Full 200000baseDR4/Full 200000baseCR4/Full Supported pause frame use: Symmetric Receive-only Supports auto-negotiation: Yes Supported FEC modes: Not reported Advertised link modes: 1000baseT/Full 10000baseT/Full 1000baseKX/Full 1000baseKX/Full 10000baseKR/Full 10000baseR_FEC 40000baseKR4/Full 40000baseCR4/Full 40000baseSR4/Full 40000baseLR4/Full 25000baseCR/Full 25000baseKR/Full 25000baseSR/Full 50000baseCR2/Full 50000baseKR2/Full 100000baseKR4/Full 100000baseSR4/Full 100000baseCR4/Full 100000baseLR4_ER4/Full 50000baseSR2/Full 10000baseCR/Full 10000baseSR/Full 10000baseLR/Full 10000baseER/Full 200000baseKR4/Full 200000baseSR4/Full 200000baseLR4_ER4_FR4/Full 200000baseDR4/Full 200000baseCR4/Full Advertised pause frame use: No Advertised auto-negotiation: Yes Advertised FEC modes: Not reported Advertised link modes: 100000baseKR4/Full 100000baseSR4/Full 100000baseCR4/Full 100000baseLR4_ER4/Full Advertised pause frame use: No Advertised auto-negotiation: Yes Advertised FEC modes: Not reported Speed: 100000Mb/s Lanes: 4 Duplex: Full Auto-negotiation: on Port: Direct Attach Copper PHYAD: 0 Transceiver: internal Link detected: yes Signed-off-by: Danielle Ratson Signed-off-by: Jakub Kicinski --- net/ethtool/linkmodes.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index fc986d035b015..f9eda596f3014 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -45,6 +45,9 @@ static int linkmodes_prepare_data(const struct ethnl_req_info *req_base, goto out; } + if (!dev->ethtool_ops->cap_link_lanes_supported) + data->ksettings.lanes = 0; + data->peer_empty = bitmap_empty(data->ksettings.link_modes.lp_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -65,6 +68,7 @@ static int linkmodes_reply_size(const struct ethnl_req_info *req_base, len = nla_total_size(sizeof(u8)) /* LINKMODES_AUTONEG */ + nla_total_size(sizeof(u32)) /* LINKMODES_SPEED */ + + nla_total_size(sizeof(u32)) /* LINKMODES_LANES */ + nla_total_size(sizeof(u8)) /* LINKMODES_DUPLEX */ + 0; ret = ethnl_bitset_size(ksettings->link_modes.advertising, @@ -125,6 +129,10 @@ static int linkmodes_fill_reply(struct sk_buff *skb, nla_put_u8(skb, ETHTOOL_A_LINKMODES_DUPLEX, lsettings->duplex)) return -EMSGSIZE; + if (ksettings->lanes && + nla_put_u32(skb, ETHTOOL_A_LINKMODES_LANES, ksettings->lanes)) + return -EMSGSIZE; + if (lsettings->master_slave_cfg != MASTER_SLAVE_CFG_UNSUPPORTED && nla_put_u8(skb, ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, lsettings->master_slave_cfg)) -- GitLab From 5fc4053df3d9af1bf728feff90b8494dc036aae2 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 2 Feb 2021 20:06:09 +0200 Subject: [PATCH 1886/2012] mlxsw: ethtool: Remove max lanes filtering Currently, when a speed can be supported by different number of lanes, the supported link modes bitmask contains only link modes with a single number of lanes. This was done in order to prevent auto negotiation on number of lanes after 50G-1-lane and 100G-2-lanes link modes were introduced. For example, if a port's max width is 4, only link modes with 4 lanes will be presented as supported by that port, so 100G is always achieved by 4 lanes of 25G. After the previous patches that allow selection of the number of lanes, auto negotiation on number of lanes becomes practical. Remove that filtering of the maximum number of lanes supported link modes, so indeed all the supported and advertised link modes will be shown. Signed-off-by: Danielle Ratson Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 4 +-- .../mellanox/mlxsw/spectrum_ethtool.c | 33 ++++++++----------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a3769f95a1828..1aba500af129a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -328,13 +328,13 @@ struct mlxsw_sp_port_type_speed_ops { u32 ptys_eth_proto, struct ethtool_link_ksettings *cmd); void (*from_ptys_link)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, - u8 width, unsigned long *mode); + unsigned long *mode); u32 (*from_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto); void (*from_ptys_speed_duplex)(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, u32 ptys_eth_proto, struct ethtool_link_ksettings *cmd); int (*ptys_max_speed)(struct mlxsw_sp_port *mlxsw_sp_port, u32 *p_max_speed); - u32 (*to_ptys_advert_link)(struct mlxsw_sp *mlxsw_sp, u8 width, + u32 (*to_ptys_advert_link)(struct mlxsw_sp *mlxsw_sp, const struct ethtool_link_ksettings *cmd); u32 (*to_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u8 width, u32 speed); void (*reg_ptys_eth_pack)(struct mlxsw_sp *mlxsw_sp, char *payload, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 540616469e284..809ea7be77ccb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -858,7 +858,7 @@ static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset) static void mlxsw_sp_port_get_link_supported(struct mlxsw_sp *mlxsw_sp, u32 eth_proto_cap, - u8 width, struct ethtool_link_ksettings *cmd) + struct ethtool_link_ksettings *cmd) { const struct mlxsw_sp_port_type_speed_ops *ops; @@ -869,13 +869,13 @@ mlxsw_sp_port_get_link_supported(struct mlxsw_sp *mlxsw_sp, u32 eth_proto_cap, ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); ops->from_ptys_supported_port(mlxsw_sp, eth_proto_cap, cmd); - ops->from_ptys_link(mlxsw_sp, eth_proto_cap, width, + ops->from_ptys_link(mlxsw_sp, eth_proto_cap, cmd->link_modes.supported); } static void mlxsw_sp_port_get_link_advertise(struct mlxsw_sp *mlxsw_sp, - u32 eth_proto_admin, bool autoneg, u8 width, + u32 eth_proto_admin, bool autoneg, struct ethtool_link_ksettings *cmd) { const struct mlxsw_sp_port_type_speed_ops *ops; @@ -886,7 +886,7 @@ mlxsw_sp_port_get_link_advertise(struct mlxsw_sp *mlxsw_sp, return; ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); - ops->from_ptys_link(mlxsw_sp, eth_proto_admin, width, + ops->from_ptys_link(mlxsw_sp, eth_proto_admin, cmd->link_modes.advertising); } @@ -960,11 +960,9 @@ static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev, ops = mlxsw_sp->port_type_speed_ops; autoneg = mlxsw_sp_port->link.autoneg; - mlxsw_sp_port_get_link_supported(mlxsw_sp, eth_proto_cap, - mlxsw_sp_port->mapping.width, cmd); + mlxsw_sp_port_get_link_supported(mlxsw_sp, eth_proto_cap, cmd); - mlxsw_sp_port_get_link_advertise(mlxsw_sp, eth_proto_admin, autoneg, - mlxsw_sp_port->mapping.width, cmd); + mlxsw_sp_port_get_link_advertise(mlxsw_sp, eth_proto_admin, autoneg, cmd); cmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; cmd->base.port = mlxsw_sp_port_connector_port(connector_type); @@ -997,8 +995,7 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev, autoneg = cmd->base.autoneg == AUTONEG_ENABLE; eth_proto_new = autoneg ? - ops->to_ptys_advert_link(mlxsw_sp, mlxsw_sp_port->mapping.width, - cmd) : + ops->to_ptys_advert_link(mlxsw_sp, cmd) : ops->to_ptys_speed(mlxsw_sp, mlxsw_sp_port->mapping.width, cmd->base.speed); @@ -1198,7 +1195,7 @@ mlxsw_sp1_from_ptys_supported_port(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp1_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, - u8 width, unsigned long *mode) + unsigned long *mode) { int i; @@ -1260,7 +1257,7 @@ static int mlxsw_sp1_ptys_max_speed(struct mlxsw_sp_port *mlxsw_sp_port, u32 *p_ } static u32 -mlxsw_sp1_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, u8 width, +mlxsw_sp1_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, const struct ethtool_link_ksettings *cmd) { u32 ptys_proto = 0; @@ -1619,14 +1616,12 @@ mlxsw_sp2_set_bit_ethtool(const struct mlxsw_sp2_port_link_mode *link_mode, static void mlxsw_sp2_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, - u8 width, unsigned long *mode) + unsigned long *mode) { - u8 mask_width = mlxsw_sp_port_mask_width_get(width); int i; for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { - if ((ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) && - (mask_width & mlxsw_sp2_port_link_mode[i].mask_width)) + if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) mlxsw_sp2_set_bit_ethtool(&mlxsw_sp2_port_link_mode[i], mode); } @@ -1698,16 +1693,14 @@ mlxsw_sp2_test_bit_ethtool(const struct mlxsw_sp2_port_link_mode *link_mode, } static u32 -mlxsw_sp2_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, u8 width, +mlxsw_sp2_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, const struct ethtool_link_ksettings *cmd) { - u8 mask_width = mlxsw_sp_port_mask_width_get(width); u32 ptys_proto = 0; int i; for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { - if ((mask_width & mlxsw_sp2_port_link_mode[i].mask_width) && - mlxsw_sp2_test_bit_ethtool(&mlxsw_sp2_port_link_mode[i], + if (mlxsw_sp2_test_bit_ethtool(&mlxsw_sp2_port_link_mode[i], cmd->link_modes.advertising)) ptys_proto |= mlxsw_sp2_port_link_mode[i].mask; } -- GitLab From 763ece86f0c27c751d6fac6b15863f5124f79a52 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 2 Feb 2021 20:06:10 +0200 Subject: [PATCH 1887/2012] mlxsw: ethtool: Add support for setting lanes when autoneg is off Currently, when auto negotiation is set to off, the user can force a specific speed or both speed and duplex. The user cannot influence the number of lanes that will be forced. Add support for setting speed along with lanes so one would be able to choose how many lanes will be forced. When lanes parameter is passed from user space, choose the link mode that its actual width equals to it. Otherwise, the default link mode will be the one that supports the width of the port. Signed-off-by: Danielle Ratson Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 3 +- .../mellanox/mlxsw/spectrum_ethtool.c | 116 ++++++++++++------ 2 files changed, 78 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 1aba500af129a..9ded4d1b1a9f4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -336,7 +336,8 @@ struct mlxsw_sp_port_type_speed_ops { int (*ptys_max_speed)(struct mlxsw_sp_port *mlxsw_sp_port, u32 *p_max_speed); u32 (*to_ptys_advert_link)(struct mlxsw_sp *mlxsw_sp, const struct ethtool_link_ksettings *cmd); - u32 (*to_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u8 width, u32 speed); + u32 (*to_ptys_speed_lanes)(struct mlxsw_sp *mlxsw_sp, u8 width, + const struct ethtool_link_ksettings *cmd); void (*reg_ptys_eth_pack)(struct mlxsw_sp *mlxsw_sp, char *payload, u8 local_port, u32 proto_admin, bool autoneg); void (*reg_ptys_eth_unpack)(struct mlxsw_sp *mlxsw_sp, char *payload, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 809ea7be77ccb..15c6e5af8ee52 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -996,12 +996,12 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev, autoneg = cmd->base.autoneg == AUTONEG_ENABLE; eth_proto_new = autoneg ? ops->to_ptys_advert_link(mlxsw_sp, cmd) : - ops->to_ptys_speed(mlxsw_sp, mlxsw_sp_port->mapping.width, - cmd->base.speed); + ops->to_ptys_speed_lanes(mlxsw_sp, mlxsw_sp_port->mapping.width, + cmd); eth_proto_new = eth_proto_new & eth_proto_cap; if (!eth_proto_new) { - netdev_err(dev, "No supported speed requested\n"); + netdev_err(dev, "No supported speed or lanes requested\n"); return -EINVAL; } @@ -1060,20 +1060,21 @@ mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info) } const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { - .get_drvinfo = mlxsw_sp_port_get_drvinfo, - .get_link = ethtool_op_get_link, - .get_link_ext_state = mlxsw_sp_port_get_link_ext_state, - .get_pauseparam = mlxsw_sp_port_get_pauseparam, - .set_pauseparam = mlxsw_sp_port_set_pauseparam, - .get_strings = mlxsw_sp_port_get_strings, - .set_phys_id = mlxsw_sp_port_set_phys_id, - .get_ethtool_stats = mlxsw_sp_port_get_stats, - .get_sset_count = mlxsw_sp_port_get_sset_count, - .get_link_ksettings = mlxsw_sp_port_get_link_ksettings, - .set_link_ksettings = mlxsw_sp_port_set_link_ksettings, - .get_module_info = mlxsw_sp_get_module_info, - .get_module_eeprom = mlxsw_sp_get_module_eeprom, - .get_ts_info = mlxsw_sp_get_ts_info, + .cap_link_lanes_supported = true, + .get_drvinfo = mlxsw_sp_port_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_link_ext_state = mlxsw_sp_port_get_link_ext_state, + .get_pauseparam = mlxsw_sp_port_get_pauseparam, + .set_pauseparam = mlxsw_sp_port_set_pauseparam, + .get_strings = mlxsw_sp_port_get_strings, + .set_phys_id = mlxsw_sp_port_set_phys_id, + .get_ethtool_stats = mlxsw_sp_port_get_stats, + .get_sset_count = mlxsw_sp_port_get_sset_count, + .get_link_ksettings = mlxsw_sp_port_get_link_ksettings, + .set_link_ksettings = mlxsw_sp_port_set_link_ksettings, + .get_module_info = mlxsw_sp_get_module_info, + .get_module_eeprom = mlxsw_sp_get_module_eeprom, + .get_ts_info = mlxsw_sp_get_ts_info, }; struct mlxsw_sp1_port_link_mode { @@ -1271,14 +1272,17 @@ mlxsw_sp1_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, return ptys_proto; } -static u32 mlxsw_sp1_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, u8 width, - u32 speed) +static u32 mlxsw_sp1_to_ptys_speed_lanes(struct mlxsw_sp *mlxsw_sp, u8 width, + const struct ethtool_link_ksettings *cmd) { u32 ptys_proto = 0; int i; + if (cmd->lanes > width) + return ptys_proto; + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { - if (speed == mlxsw_sp1_port_link_mode[i].speed) + if (cmd->base.speed == mlxsw_sp1_port_link_mode[i].speed) ptys_proto |= mlxsw_sp1_port_link_mode[i].mask; } return ptys_proto; @@ -1321,7 +1325,7 @@ const struct mlxsw_sp_port_type_speed_ops mlxsw_sp1_port_type_speed_ops = { .from_ptys_speed_duplex = mlxsw_sp1_from_ptys_speed_duplex, .ptys_max_speed = mlxsw_sp1_ptys_max_speed, .to_ptys_advert_link = mlxsw_sp1_to_ptys_advert_link, - .to_ptys_speed = mlxsw_sp1_to_ptys_speed, + .to_ptys_speed_lanes = mlxsw_sp1_to_ptys_speed_lanes, .reg_ptys_eth_pack = mlxsw_sp1_reg_ptys_eth_pack, .reg_ptys_eth_unpack = mlxsw_sp1_reg_ptys_eth_unpack, .ptys_proto_cap_masked_get = mlxsw_sp1_ptys_proto_cap_masked_get, @@ -1483,7 +1487,8 @@ struct mlxsw_sp2_port_link_mode { int m_ethtool_len; u32 mask; u32 speed; - u8 mask_width; + u32 width; + u8 mask_sup_width; }; static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { @@ -1491,105 +1496,117 @@ static const struct mlxsw_sp2_port_link_mode mlxsw_sp2_port_link_mode[] = { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_SGMII_100M, .mask_ethtool = mlxsw_sp2_mask_ethtool_sgmii_100m, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_SGMII_100M_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_1X | MLXSW_SP_PORT_MASK_WIDTH_2X | MLXSW_SP_PORT_MASK_WIDTH_4X | MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_100, + .width = 1, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_1000BASE_X_SGMII, .mask_ethtool = mlxsw_sp2_mask_ethtool_1000base_x_sgmii, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_1000BASE_X_SGMII_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_1X | MLXSW_SP_PORT_MASK_WIDTH_2X | MLXSW_SP_PORT_MASK_WIDTH_4X | MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_1000, + .width = 1, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_5GBASE_R, .mask_ethtool = mlxsw_sp2_mask_ethtool_5gbase_r, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_5GBASE_R_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_1X | MLXSW_SP_PORT_MASK_WIDTH_2X | MLXSW_SP_PORT_MASK_WIDTH_4X | MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_5000, + .width = 1, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_XFI_XAUI_1_10G, .mask_ethtool = mlxsw_sp2_mask_ethtool_xfi_xaui_1_10g, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_XFI_XAUI_1_10G_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_1X | MLXSW_SP_PORT_MASK_WIDTH_2X | MLXSW_SP_PORT_MASK_WIDTH_4X | MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_10000, + .width = 1, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_XLAUI_4_XLPPI_4_40G, .mask_ethtool = mlxsw_sp2_mask_ethtool_xlaui_4_xlppi_4_40g, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_XLAUI_4_XLPPI_4_40G_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_4X | + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_4X | MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_40000, + .width = 4, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_25GAUI_1_25GBASE_CR_KR, .mask_ethtool = mlxsw_sp2_mask_ethtool_25gaui_1_25gbase_cr_kr, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_25GAUI_1_25GBASE_CR_KR_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X | + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_1X | MLXSW_SP_PORT_MASK_WIDTH_2X | MLXSW_SP_PORT_MASK_WIDTH_4X | MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_25000, + .width = 1, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_2_LAUI_2_50GBASE_CR2_KR2, .mask_ethtool = mlxsw_sp2_mask_ethtool_50gaui_2_laui_2_50gbase_cr2_kr2, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_50GAUI_2_LAUI_2_50GBASE_CR2_KR2_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_2X | + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_2X | MLXSW_SP_PORT_MASK_WIDTH_4X | MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_50000, + .width = 2, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_50GAUI_1_LAUI_1_50GBASE_CR_KR, .mask_ethtool = mlxsw_sp2_mask_ethtool_50gaui_1_laui_1_50gbase_cr_kr, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_50GAUI_1_LAUI_1_50GBASE_CR_KR_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_1X, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_1X, .speed = SPEED_50000, + .width = 1, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_CAUI_4_100GBASE_CR4_KR4, .mask_ethtool = mlxsw_sp2_mask_ethtool_caui_4_100gbase_cr4_kr4, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_CAUI_4_100GBASE_CR4_KR4_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_4X | + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_4X | MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_100000, + .width = 4, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_100GAUI_2_100GBASE_CR2_KR2, .mask_ethtool = mlxsw_sp2_mask_ethtool_100gaui_2_100gbase_cr2_kr2, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_100GAUI_2_100GBASE_CR2_KR2_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_2X, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_2X, .speed = SPEED_100000, + .width = 2, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_200GAUI_4_200GBASE_CR4_KR4, .mask_ethtool = mlxsw_sp2_mask_ethtool_200gaui_4_200gbase_cr4_kr4, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_200GAUI_4_200GBASE_CR4_KR4_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_4X | + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_4X | MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_200000, + .width = 4, }, { .mask = MLXSW_REG_PTYS_EXT_ETH_SPEED_400GAUI_8, .mask_ethtool = mlxsw_sp2_mask_ethtool_400gaui_8, .m_ethtool_len = MLXSW_SP2_MASK_ETHTOOL_400GAUI_8_LEN, - .mask_width = MLXSW_SP_PORT_MASK_WIDTH_8X, + .mask_sup_width = MLXSW_SP_PORT_MASK_WIDTH_8X, .speed = SPEED_400000, + .width = 8, }, }; @@ -1707,17 +1724,36 @@ mlxsw_sp2_to_ptys_advert_link(struct mlxsw_sp *mlxsw_sp, return ptys_proto; } -static u32 mlxsw_sp2_to_ptys_speed(struct mlxsw_sp *mlxsw_sp, - u8 width, u32 speed) +static u32 mlxsw_sp2_to_ptys_speed_lanes(struct mlxsw_sp *mlxsw_sp, u8 width, + const struct ethtool_link_ksettings *cmd) { u8 mask_width = mlxsw_sp_port_mask_width_get(width); + struct mlxsw_sp2_port_link_mode link_mode; u32 ptys_proto = 0; int i; + if (cmd->lanes > width) + return ptys_proto; + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { - if ((speed == mlxsw_sp2_port_link_mode[i].speed) && - (mask_width & mlxsw_sp2_port_link_mode[i].mask_width)) - ptys_proto |= mlxsw_sp2_port_link_mode[i].mask; + if (cmd->base.speed == mlxsw_sp2_port_link_mode[i].speed) { + link_mode = mlxsw_sp2_port_link_mode[i]; + + if (!cmd->lanes) { + /* If number of lanes was not set by user space, + * choose the link mode that supports the width + * of the port. + */ + if (mask_width & link_mode.mask_sup_width) + ptys_proto |= link_mode.mask; + } else if (cmd->lanes == link_mode.width) { + /* Else if the number of lanes was set, choose + * the link mode that its actual width equals to + * it. + */ + ptys_proto |= link_mode.mask; + } + } } return ptys_proto; } @@ -1760,7 +1796,7 @@ const struct mlxsw_sp_port_type_speed_ops mlxsw_sp2_port_type_speed_ops = { .from_ptys_speed_duplex = mlxsw_sp2_from_ptys_speed_duplex, .ptys_max_speed = mlxsw_sp2_ptys_max_speed, .to_ptys_advert_link = mlxsw_sp2_to_ptys_advert_link, - .to_ptys_speed = mlxsw_sp2_to_ptys_speed, + .to_ptys_speed_lanes = mlxsw_sp2_to_ptys_speed_lanes, .reg_ptys_eth_pack = mlxsw_sp2_reg_ptys_eth_pack, .reg_ptys_eth_unpack = mlxsw_sp2_reg_ptys_eth_unpack, .ptys_proto_cap_masked_get = mlxsw_sp2_ptys_proto_cap_masked_get, -- GitLab From 25a96f057a0fab318376c85bd83afda267f8ad33 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 2 Feb 2021 20:06:11 +0200 Subject: [PATCH 1888/2012] mlxsw: ethtool: Pass link mode in use to ethtool Currently, when user space queries the link's parameters, as speed and duplex, each parameter is passed from the driver to ethtool. Instead, pass the link mode bit in use. In Spectrum-1, simply pass the bit that is set to '1' from PTYS register. In Spectrum-2, pass the first link mode bit in the mask of the used link mode. Signed-off-by: Danielle Ratson Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxsw/spectrum.h | 6 +-- .../mellanox/mlxsw/spectrum_ethtool.c | 47 +++++++++++-------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 9ded4d1b1a9f4..d9d9e1f488f94 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -330,9 +330,9 @@ struct mlxsw_sp_port_type_speed_ops { void (*from_ptys_link)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, unsigned long *mode); u32 (*from_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto); - void (*from_ptys_speed_duplex)(struct mlxsw_sp *mlxsw_sp, - bool carrier_ok, u32 ptys_eth_proto, - struct ethtool_link_ksettings *cmd); + void (*from_ptys_link_mode)(struct mlxsw_sp *mlxsw_sp, + bool carrier_ok, u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd); int (*ptys_max_speed)(struct mlxsw_sp_port *mlxsw_sp_port, u32 *p_max_speed); u32 (*to_ptys_advert_link)(struct mlxsw_sp *mlxsw_sp, const struct ethtool_link_ksettings *cmd); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 15c6e5af8ee52..bd7f873f6290b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -966,8 +966,8 @@ static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev, cmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; cmd->base.port = mlxsw_sp_port_connector_port(connector_type); - ops->from_ptys_speed_duplex(mlxsw_sp, netif_carrier_ok(dev), - eth_proto_oper, cmd); + ops->from_ptys_link_mode(mlxsw_sp, netif_carrier_ok(dev), + eth_proto_oper, cmd); return 0; } @@ -1221,19 +1221,21 @@ mlxsw_sp1_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto) } static void -mlxsw_sp1_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, - u32 ptys_eth_proto, - struct ethtool_link_ksettings *cmd) +mlxsw_sp1_from_ptys_link_mode(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, + u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd) { - cmd->base.speed = SPEED_UNKNOWN; - cmd->base.duplex = DUPLEX_UNKNOWN; + int i; + + cmd->link_mode = -1; if (!carrier_ok) return; - cmd->base.speed = mlxsw_sp1_from_ptys_speed(mlxsw_sp, ptys_eth_proto); - if (cmd->base.speed != SPEED_UNKNOWN) - cmd->base.duplex = DUPLEX_FULL; + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) + cmd->link_mode = mlxsw_sp1_port_link_mode[i].mask_ethtool; + } } static int mlxsw_sp1_ptys_max_speed(struct mlxsw_sp_port *mlxsw_sp_port, u32 *p_max_speed) @@ -1322,7 +1324,7 @@ const struct mlxsw_sp_port_type_speed_ops mlxsw_sp1_port_type_speed_ops = { .from_ptys_supported_port = mlxsw_sp1_from_ptys_supported_port, .from_ptys_link = mlxsw_sp1_from_ptys_link, .from_ptys_speed = mlxsw_sp1_from_ptys_speed, - .from_ptys_speed_duplex = mlxsw_sp1_from_ptys_speed_duplex, + .from_ptys_link_mode = mlxsw_sp1_from_ptys_link_mode, .ptys_max_speed = mlxsw_sp1_ptys_max_speed, .to_ptys_advert_link = mlxsw_sp1_to_ptys_advert_link, .to_ptys_speed_lanes = mlxsw_sp1_to_ptys_speed_lanes, @@ -1658,19 +1660,24 @@ mlxsw_sp2_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto) } static void -mlxsw_sp2_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, - u32 ptys_eth_proto, - struct ethtool_link_ksettings *cmd) +mlxsw_sp2_from_ptys_link_mode(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, + u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd) { - cmd->base.speed = SPEED_UNKNOWN; - cmd->base.duplex = DUPLEX_UNKNOWN; + struct mlxsw_sp2_port_link_mode link; + int i; + + cmd->link_mode = -1; if (!carrier_ok) return; - cmd->base.speed = mlxsw_sp2_from_ptys_speed(mlxsw_sp, ptys_eth_proto); - if (cmd->base.speed != SPEED_UNKNOWN) - cmd->base.duplex = DUPLEX_FULL; + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) { + link = mlxsw_sp2_port_link_mode[i]; + cmd->link_mode = link.mask_ethtool[1]; + } + } } static int mlxsw_sp2_ptys_max_speed(struct mlxsw_sp_port *mlxsw_sp_port, u32 *p_max_speed) @@ -1793,7 +1800,7 @@ const struct mlxsw_sp_port_type_speed_ops mlxsw_sp2_port_type_speed_ops = { .from_ptys_supported_port = mlxsw_sp2_from_ptys_supported_port, .from_ptys_link = mlxsw_sp2_from_ptys_link, .from_ptys_speed = mlxsw_sp2_from_ptys_speed, - .from_ptys_speed_duplex = mlxsw_sp2_from_ptys_speed_duplex, + .from_ptys_link_mode = mlxsw_sp2_from_ptys_link_mode, .ptys_max_speed = mlxsw_sp2_ptys_max_speed, .to_ptys_advert_link = mlxsw_sp2_to_ptys_advert_link, .to_ptys_speed_lanes = mlxsw_sp2_to_ptys_speed_lanes, -- GitLab From f72e2f48c71051f54e6fa214dc57f586386173b5 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Tue, 2 Feb 2021 20:06:12 +0200 Subject: [PATCH 1889/2012] net: selftests: Add lanes setting test Test that setting lanes parameter is working. Set max speed and max lanes in the list of advertised link modes, and then try to set max speed with the lanes below max lanes if exists in the list. And then, test that setting number of lanes larger than max lanes fails. Do the above for both autoneg on and off. $ ./ethtool_lanes.sh TEST: 4 lanes is autonegotiated [ OK ] TEST: Lanes number larger than max width is not set [ OK ] TEST: Autoneg off, 4 lanes detected during force mode [ OK ] TEST: Lanes number larger than max width is not set [ OK ] Signed-off-by: Danielle Ratson Signed-off-by: Jakub Kicinski --- .../drivers/net/mlxsw/ethtool_lanes.sh | 187 ++++++++++++++++++ .../selftests/net/forwarding/ethtool_lib.sh | 34 ++++ tools/testing/selftests/net/forwarding/lib.sh | 28 +++ 3 files changed, 249 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh new file mode 100755 index 0000000000000..91891b9418d76 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh @@ -0,0 +1,187 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + autoneg + autoneg_force_mode +" + +NUM_NETIFS=2 +: ${TIMEOUT:=30000} # ms +source $lib_dir/lib.sh +source $lib_dir/ethtool_lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up + + busywait "$TIMEOUT" wait_for_port_up ethtool $swp2 + check_err $? "ports did not come up" + + local lanes_exist=$(ethtool $swp1 | grep 'Lanes:') + if [[ -z $lanes_exist ]]; then + log_test "SKIP: driver does not support lanes setting" + exit 1 + fi + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +check_lanes() +{ + local dev=$1; shift + local lanes=$1; shift + local max_speed=$1; shift + local chosen_lanes + + chosen_lanes=$(ethtool $dev | grep 'Lanes:') + chosen_lanes=${chosen_lanes#*"Lanes: "} + + ((chosen_lanes == lanes)) + check_err $? "swp1 advertise $max_speed and $lanes, devs sync to $chosen_lanes" +} + +check_unsupported_lanes() +{ + local dev=$1; shift + local max_speed=$1; shift + local max_lanes=$1; shift + local autoneg=$1; shift + local autoneg_str="" + + local unsupported_lanes=$((max_lanes *= 2)) + + if [[ $autoneg -eq 0 ]]; then + autoneg_str="autoneg off" + fi + + ethtool -s $swp1 speed $max_speed lanes $unsupported_lanes $autoneg_str &> /dev/null + check_fail $? "Unsuccessful $unsupported_lanes lanes setting was expected" +} + +max_speed_and_lanes_get() +{ + local dev=$1; shift + local arr=("$@") + local max_lanes + local max_speed + local -a lanes_arr + local -a speeds_arr + local -a max_values + + for ((i=0; i<${#arr[@]}; i+=2)); do + speeds_arr+=("${arr[$i]}") + lanes_arr+=("${arr[i+1]}") + done + + max_values+=($(get_max "${speeds_arr[@]}")) + max_values+=($(get_max "${lanes_arr[@]}")) + + echo ${max_values[@]} +} + +search_linkmode() +{ + local speed=$1; shift + local lanes=$1; shift + local arr=("$@") + + for ((i=0; i<${#arr[@]}; i+=2)); do + if [[ $speed -eq ${arr[$i]} && $lanes -eq ${arr[i+1]} ]]; then + return 1 + fi + done + return 0 +} + +autoneg() +{ + RET=0 + + local lanes + local max_speed + local max_lanes + + local -a linkmodes_params=($(dev_linkmodes_params_get $swp1 1)) + local -a max_values=($(max_speed_and_lanes_get $swp1 "${linkmodes_params[@]}")) + max_speed=${max_values[0]} + max_lanes=${max_values[1]} + + lanes=$max_lanes + + while [[ $lanes -ge 1 ]]; do + search_linkmode $max_speed $lanes "${linkmodes_params[@]}" + if [[ $? -eq 1 ]]; then + ethtool_set $swp1 speed $max_speed lanes $lanes + ip link set dev $swp1 up + ip link set dev $swp2 up + busywait "$TIMEOUT" wait_for_port_up ethtool $swp2 + check_err $? "ports did not come up" + + check_lanes $swp1 $lanes $max_speed + log_test "$lanes lanes is autonegotiated" + fi + let $((lanes /= 2)) + done + + check_unsupported_lanes $swp1 $max_speed $max_lanes 1 + log_test "Lanes number larger than max width is not set" + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +autoneg_force_mode() +{ + RET=0 + + local lanes + local max_speed + local max_lanes + + local -a linkmodes_params=($(dev_linkmodes_params_get $swp1 1)) + local -a max_values=($(max_speed_and_lanes_get $swp1 "${linkmodes_params[@]}")) + max_speed=${max_values[0]} + max_lanes=${max_values[1]} + + lanes=$max_lanes + + while [[ $lanes -ge 1 ]]; do + search_linkmode $max_speed $lanes "${linkmodes_params[@]}" + if [[ $? -eq 1 ]]; then + ethtool_set $swp1 speed $max_speed lanes $lanes autoneg off + ethtool_set $swp2 speed $max_speed lanes $lanes autoneg off + ip link set dev $swp1 up + ip link set dev $swp2 up + busywait "$TIMEOUT" wait_for_port_up ethtool $swp2 + check_err $? "ports did not come up" + + check_lanes $swp1 $lanes $max_speed + log_test "Autoneg off, $lanes lanes detected during force mode" + fi + let $((lanes /= 2)) + done + + check_unsupported_lanes $swp1 $max_speed $max_lanes 0 + log_test "Lanes number larger than max width is not set" + + ip link set dev $swp2 down + ip link set dev $swp1 down + + ethtool -s $swp2 autoneg on + ethtool -s $swp1 autoneg on +} + +check_ethtool_lanes_support +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh index 9188e624dec07..b9bfb45085afd 100644 --- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh +++ b/tools/testing/selftests/net/forwarding/ethtool_lib.sh @@ -22,6 +22,40 @@ ethtool_set() check_err $out "error in configuration. $cmd" } +dev_linkmodes_params_get() +{ + local dev=$1; shift + local adver=$1; shift + local -a linkmodes_params + local param_count + local arr + + if (($adver)); then + mode="Advertised link modes" + else + mode="Supported link modes" + fi + + local -a dev_linkmodes=($(dev_speeds_get $dev 1 $adver)) + for ((i=0; i<${#dev_linkmodes[@]}; i++)); do + linkmodes_params[$i]=$(echo -e "${dev_linkmodes[$i]}" | \ + # Replaces all non numbers with spaces + sed -e 's/[^0-9]/ /g' | \ + # Squeeze spaces in sequence to 1 space + tr -s ' ') + # Count how many numbers were found in the linkmode + param_count=$(echo "${linkmodes_params[$i]}" | wc -w) + if [[ $param_count -eq 1 ]]; then + linkmodes_params[$i]="${linkmodes_params[$i]} 1" + elif [[ $param_count -ge 3 ]]; then + arr=(${linkmodes_params[$i]}) + # Take only first two params + linkmodes_params[$i]=$(echo "${arr[@]:0:2}") + fi + done + echo ${linkmodes_params[@]} +} + dev_speeds_get() { local dev=$1; shift diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 98ea37d26c44a..40b3a86a62cf4 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -69,6 +69,15 @@ check_tc_action_hw_stats_support() fi } +check_ethtool_lanes_support() +{ + ethtool --help 2>&1| grep lanes &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: ethtool too old; it is missing lanes support" + exit 1 + fi +} + if [[ "$(id -u)" -ne 0 ]]; then echo "SKIP: need root privileges" exit 0 @@ -263,6 +272,20 @@ not() [[ $? != 0 ]] } +get_max() +{ + local arr=("$@") + + max=${arr[0]} + for cur in ${arr[@]}; do + if [[ $cur -gt $max ]]; then + max=$cur + fi + done + + echo $max +} + grep_bridge_fdb() { local addr=$1; shift @@ -279,6 +302,11 @@ grep_bridge_fdb() $@ | grep $addr | grep $flag "$word" } +wait_for_port_up() +{ + "$@" | grep -q "Link detected: yes" +} + wait_for_offload() { "$@" | grep -q offload -- GitLab From f5a5589c72509abaeb705123b64e7f5a078becf0 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 2 Feb 2021 11:34:08 -0800 Subject: [PATCH 1890/2012] tcp: use a smaller percpu_counter batch size for sk_alloc Currently, a percpu_counter with the default batch size (2*nr_cpus) is used to record the total # of active sockets per protocol. This means sk_sockets_allocated_read_positive() could be off by +/-2*(nr_cpus^2). This under/over-estimation could lead to wrong memory suppression conditions in __sk_raise_mem_allocated(). Fix this by using a more reasonable fixed batch size of 16. See related commit cf86a086a180 ("net/dst: use a smaller percpu_counter batch for dst entries accounting") that addresses a similar issue. Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Reviewed-by: Soheil Hassas Yeganeh Link: https://lore.kernel.org/r/20210202193408.1171634-1-weiwan@google.com Signed-off-by: Jakub Kicinski --- include/net/sock.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 129d200bccb46..690e496a0e799 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1350,14 +1350,18 @@ sk_memory_allocated_sub(struct sock *sk, int amt) atomic_long_sub(amt, sk->sk_prot->memory_allocated); } +#define SK_ALLOC_PERCPU_COUNTER_BATCH 16 + static inline void sk_sockets_allocated_dec(struct sock *sk) { - percpu_counter_dec(sk->sk_prot->sockets_allocated); + percpu_counter_add_batch(sk->sk_prot->sockets_allocated, -1, + SK_ALLOC_PERCPU_COUNTER_BATCH); } static inline void sk_sockets_allocated_inc(struct sock *sk) { - percpu_counter_inc(sk->sk_prot->sockets_allocated); + percpu_counter_add_batch(sk->sk_prot->sockets_allocated, 1, + SK_ALLOC_PERCPU_COUNTER_BATCH); } static inline u64 -- GitLab From 3dd344ea84e122f791ab55498aab985535f32cba Mon Sep 17 00:00:00 2001 From: Hariharan Ananthakrishnan Date: Fri, 29 Jan 2021 00:12:10 +0000 Subject: [PATCH 1891/2012] net: tracepoint: exposing sk_family in all tcp:tracepoints Similar to sock:inet_sock_set_state tracepoint, expose sk_family to distinguish AF_INET and AF_INET6 families. The following tcp tracepoints are updated: tcp:tcp_destroy_sock tcp:tcp_rcv_space_adjust tcp:tcp_retransmit_skb tcp:tcp_send_reset tcp:tcp_receive_reset tcp:tcp_retransmit_synack tcp:tcp_probe Signed-off-by: Hariharan Ananthakrishnan Signed-off-by: Brendan Gregg Link: https://lore.kernel.org/r/20210129001210.344438-1-hari@netflix.com Signed-off-by: Jakub Kicinski --- include/trace/events/tcp.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index cf97f6339acba..ba94857eea11e 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -59,6 +59,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, __field(int, state) __field(__u16, sport) __field(__u16, dport) + __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) @@ -75,6 +76,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); + __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; @@ -86,7 +88,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb, sk->sk_v6_rcv_saddr, sk->sk_v6_daddr); ), - TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s", + TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s", + show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, show_tcp_state_name(__entry->state)) @@ -125,6 +128,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk, __field(const void *, skaddr) __field(__u16, sport) __field(__u16, dport) + __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) @@ -140,6 +144,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk, __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); + __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = inet->inet_saddr; @@ -153,7 +158,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk, __entry->sock_cookie = sock_gen_cookie(sk); ), - TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llx", + TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c sock_cookie=%llx", + show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6, @@ -192,6 +198,7 @@ TRACE_EVENT(tcp_retransmit_synack, __field(const void *, req) __field(__u16, sport) __field(__u16, dport) + __field(__u16, family) __array(__u8, saddr, 4) __array(__u8, daddr, 4) __array(__u8, saddr_v6, 16) @@ -207,6 +214,7 @@ TRACE_EVENT(tcp_retransmit_synack, __entry->sport = ireq->ir_num; __entry->dport = ntohs(ireq->ir_rmt_port); + __entry->family = sk->sk_family; p32 = (__be32 *) __entry->saddr; *p32 = ireq->ir_loc_addr; @@ -218,7 +226,8 @@ TRACE_EVENT(tcp_retransmit_synack, ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr); ), - TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", + TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", + show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, __entry->saddr_v6, __entry->daddr_v6) @@ -238,6 +247,7 @@ TRACE_EVENT(tcp_probe, __array(__u8, daddr, sizeof(struct sockaddr_in6)) __field(__u16, sport) __field(__u16, dport) + __field(__u16, family) __field(__u32, mark) __field(__u16, data_len) __field(__u32, snd_nxt) @@ -264,6 +274,7 @@ TRACE_EVENT(tcp_probe, __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); __entry->mark = skb->mark; + __entry->family = sk->sk_family; __entry->data_len = skb->len - __tcp_hdrlen(th); __entry->snd_nxt = tp->snd_nxt; @@ -276,7 +287,8 @@ TRACE_EVENT(tcp_probe, __entry->sock_cookie = sock_gen_cookie(sk); ), - TP_printk("src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx", + TP_printk("family=%s src=%pISpc dest=%pISpc mark=%#x data_len=%d snd_nxt=%#x snd_una=%#x snd_cwnd=%u ssthresh=%u snd_wnd=%u srtt=%u rcv_wnd=%u sock_cookie=%llx", + show_family_name(__entry->family), __entry->saddr, __entry->daddr, __entry->mark, __entry->data_len, __entry->snd_nxt, __entry->snd_una, __entry->snd_cwnd, __entry->ssthresh, __entry->snd_wnd, -- GitLab From 49ecc587dca2754571791bebd36e9e36e2a0d973 Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Wed, 3 Feb 2021 08:07:59 +0100 Subject: [PATCH 1892/2012] Revert "GTP: add support for flow based tunneling API" This reverts commit 9ab7e76aefc97a9aa664accb59d6e8dc5e52514a. This patch was committed without maintainer approval and despite a number of unaddressed concerns from review. There are several issues that impede the acceptance of this patch and that make a reversion of this particular instance of these changes the best way forward: i) the patch contains several logically separate changes that would be better served as smaller patches (for review purposes) ii) functionality like the handling of end markers has been introduced without further explanation iii) symmetry between the handling of GTPv0 and GTPv1 has been unnecessarily broken iv) the patchset produces 'broken' packets when extension headers are included v) there are no available userspace tools to allow for testing this functionality vi) there is an unaddressed Coverity report against the patch concering memory leakage vii) most importantly, the patch contains a large amount of superfluous churn that impedes other ongoing work with this driver This patch will be reworked into a series that aligns with other ongoing work and facilitates review. Signed-off-by: Jonas Bonn Acked-by: Harald Welte Acked-by: Pravin B Shelar Signed-off-by: Jakub Kicinski --- drivers/net/gtp.c | 527 ++++++++--------------------- include/uapi/linux/gtp.h | 12 - include/uapi/linux/if_link.h | 1 - include/uapi/linux/if_tunnel.h | 1 - tools/include/uapi/linux/if_link.h | 1 - 5 files changed, 144 insertions(+), 398 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 851364314ecc4..4c04e271f1844 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -21,7 +21,6 @@ #include #include -#include #include #include #include @@ -74,9 +73,6 @@ struct gtp_dev { unsigned int hash_size; struct hlist_head *tid_hash; struct hlist_head *addr_hash; - /* Used by LWT tunnel. */ - bool collect_md; - struct socket *collect_md_sock; }; static unsigned int gtp_net_id __read_mostly; @@ -183,121 +179,33 @@ static bool gtp_check_ms(struct sk_buff *skb, struct pdp_ctx *pctx, return false; } -static int gtp_set_tun_dst(struct gtp_dev *gtp, struct sk_buff *skb, - unsigned int hdrlen, u8 gtp_version, - __be64 tid, u8 flags) +static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb, + unsigned int hdrlen, unsigned int role) { - struct metadata_dst *tun_dst; - int opts_len = 0; - - if (unlikely(flags & GTP1_F_MASK)) - opts_len = sizeof(struct gtpu_metadata); - - tun_dst = udp_tun_rx_dst(skb, gtp->sk1u->sk_family, TUNNEL_KEY, tid, opts_len); - if (!tun_dst) { - netdev_dbg(gtp->dev, "Failed to allocate tun_dst"); - goto err; + if (!gtp_check_ms(skb, pctx, hdrlen, role)) { + netdev_dbg(pctx->dev, "No PDP ctx for this MS\n"); + return 1; } - netdev_dbg(gtp->dev, "attaching metadata_dst to skb, gtp ver %d hdrlen %d\n", - gtp_version, hdrlen); - if (unlikely(opts_len)) { - struct gtpu_metadata *opts; - struct gtp1_header *gtp1; - - opts = ip_tunnel_info_opts(&tun_dst->u.tun_info); - gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr)); - opts->ver = GTP_METADATA_V1; - opts->flags = gtp1->flags; - opts->type = gtp1->type; - netdev_dbg(gtp->dev, "recved control pkt: flag %x type: %d\n", - opts->flags, opts->type); - tun_dst->u.tun_info.key.tun_flags |= TUNNEL_GTPU_OPT; - tun_dst->u.tun_info.options_len = opts_len; - skb->protocol = htons(0xffff); /* Unknown */ - } /* Get rid of the GTP + UDP headers. */ if (iptunnel_pull_header(skb, hdrlen, skb->protocol, - !net_eq(sock_net(gtp->sk1u), dev_net(gtp->dev)))) { - gtp->dev->stats.rx_length_errors++; - goto err; - } - - skb_dst_set(skb, &tun_dst->dst); - return 0; -err: - return -1; -} - -static int gtp_rx(struct gtp_dev *gtp, struct sk_buff *skb, - unsigned int hdrlen, u8 gtp_version, unsigned int role, - __be64 tid, u8 flags, u8 type) -{ - if (ip_tunnel_collect_metadata() || gtp->collect_md) { - int err; - - err = gtp_set_tun_dst(gtp, skb, hdrlen, gtp_version, tid, flags); - if (err) - goto err; - } else { - struct pdp_ctx *pctx; - - if (flags & GTP1_F_MASK) - hdrlen += 4; - - if (type != GTP_TPDU) - return 1; + !net_eq(sock_net(pctx->sk), dev_net(pctx->dev)))) + return -1; - if (gtp_version == GTP_V0) - pctx = gtp0_pdp_find(gtp, be64_to_cpu(tid)); - else - pctx = gtp1_pdp_find(gtp, be64_to_cpu(tid)); - if (!pctx) { - netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb); - return 1; - } - - if (!gtp_check_ms(skb, pctx, hdrlen, role)) { - netdev_dbg(pctx->dev, "No PDP ctx for this MS\n"); - return 1; - } - /* Get rid of the GTP + UDP headers. */ - if (iptunnel_pull_header(skb, hdrlen, skb->protocol, - !net_eq(sock_net(pctx->sk), dev_net(gtp->dev)))) { - gtp->dev->stats.rx_length_errors++; - goto err; - } - } - netdev_dbg(gtp->dev, "forwarding packet from GGSN to uplink\n"); + netdev_dbg(pctx->dev, "forwarding packet from GGSN to uplink\n"); /* Now that the UDP and the GTP header have been removed, set up the * new network header. This is required by the upper layer to * calculate the transport header. */ skb_reset_network_header(skb); - if (pskb_may_pull(skb, sizeof(struct iphdr))) { - struct iphdr *iph; - - iph = ip_hdr(skb); - if (iph->version == 4) { - netdev_dbg(gtp->dev, "inner pkt: ipv4"); - skb->protocol = htons(ETH_P_IP); - } else if (iph->version == 6) { - netdev_dbg(gtp->dev, "inner pkt: ipv6"); - skb->protocol = htons(ETH_P_IPV6); - } else { - netdev_dbg(gtp->dev, "inner pkt error: Unknown type"); - } - } - skb->dev = gtp->dev; - dev_sw_netstats_rx_add(gtp->dev, skb->len); + skb->dev = pctx->dev; + + dev_sw_netstats_rx_add(pctx->dev, skb->len); + netif_rx(skb); return 0; - -err: - gtp->dev->stats.rx_dropped++; - return -1; } /* 1 means pass up to the stack, -1 means drop and 0 means decapsulated. */ @@ -306,6 +214,7 @@ static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb) unsigned int hdrlen = sizeof(struct udphdr) + sizeof(struct gtp0_header); struct gtp0_header *gtp0; + struct pdp_ctx *pctx; if (!pskb_may_pull(skb, hdrlen)) return -1; @@ -315,7 +224,16 @@ static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb) if ((gtp0->flags >> 5) != GTP_V0) return 1; - return gtp_rx(gtp, skb, hdrlen, GTP_V0, gtp->role, gtp0->tid, gtp0->flags, gtp0->type); + if (gtp0->type != GTP_TPDU) + return 1; + + pctx = gtp0_pdp_find(gtp, be64_to_cpu(gtp0->tid)); + if (!pctx) { + netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb); + return 1; + } + + return gtp_rx(pctx, skb, hdrlen, gtp->role); } static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb) @@ -323,30 +241,41 @@ static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb) unsigned int hdrlen = sizeof(struct udphdr) + sizeof(struct gtp1_header); struct gtp1_header *gtp1; + struct pdp_ctx *pctx; if (!pskb_may_pull(skb, hdrlen)) return -1; gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr)); - netdev_dbg(gtp->dev, "GTPv1 recv: flags %x\n", gtp1->flags); if ((gtp1->flags >> 5) != GTP_V1) return 1; + if (gtp1->type != GTP_TPDU) + return 1; + /* From 29.060: "This field shall be present if and only if any one or * more of the S, PN and E flags are set.". * * If any of the bit is set, then the remaining ones also have to be * set. */ + if (gtp1->flags & GTP1_F_MASK) + hdrlen += 4; + /* Make sure the header is larger enough, including extensions. */ if (!pskb_may_pull(skb, hdrlen)) return -1; gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr)); - return gtp_rx(gtp, skb, hdrlen, GTP_V1, gtp->role, - key32_to_tunnel_id(gtp1->tid), gtp1->flags, gtp1->type); + pctx = gtp1_pdp_find(gtp, ntohl(gtp1->tid)); + if (!pctx) { + netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb); + return 1; + } + + return gtp_rx(pctx, skb, hdrlen, gtp->role); } static void __gtp_encap_destroy(struct sock *sk) @@ -386,11 +315,6 @@ static void gtp_encap_disable(struct gtp_dev *gtp) { gtp_encap_disable_sock(gtp->sk0); gtp_encap_disable_sock(gtp->sk1u); - if (gtp->collect_md_sock) { - udp_tunnel_sock_release(gtp->collect_md_sock); - gtp->collect_md_sock = NULL; - netdev_dbg(gtp->dev, "GTP socket released.\n"); - } } /* UDP encapsulation receive handler. See net/ipv4/udp.c. @@ -405,8 +329,7 @@ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb) if (!gtp) return 1; - netdev_dbg(gtp->dev, "encap_recv sk=%p type %d\n", - sk, udp_sk(sk)->encap_type); + netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk); switch (udp_sk(sk)->encap_type) { case UDP_ENCAP_GTP0: @@ -460,13 +383,12 @@ static void gtp_dev_uninit(struct net_device *dev) static struct rtable *ip4_route_output_gtp(struct flowi4 *fl4, const struct sock *sk, - __be32 daddr, - __be32 saddr) + __be32 daddr) { memset(fl4, 0, sizeof(*fl4)); fl4->flowi4_oif = sk->sk_bound_dev_if; fl4->daddr = daddr; - fl4->saddr = saddr; + fl4->saddr = inet_sk(sk)->inet_saddr; fl4->flowi4_tos = RT_CONN_FLAGS(sk); fl4->flowi4_proto = sk->sk_protocol; @@ -490,7 +412,7 @@ static inline void gtp0_push_header(struct sk_buff *skb, struct pdp_ctx *pctx) gtp0->tid = cpu_to_be64(pctx->u.v0.tid); } -static inline void gtp1_push_header(struct sk_buff *skb, __be32 tid) +static inline void gtp1_push_header(struct sk_buff *skb, struct pdp_ctx *pctx) { int payload_len = skb->len; struct gtp1_header *gtp1; @@ -506,63 +428,46 @@ static inline void gtp1_push_header(struct sk_buff *skb, __be32 tid) gtp1->flags = 0x30; /* v1, GTP-non-prime. */ gtp1->type = GTP_TPDU; gtp1->length = htons(payload_len); - gtp1->tid = tid; + gtp1->tid = htonl(pctx->u.v1.o_tei); /* TODO: Suppport for extension header, sequence number and N-PDU. * Update the length field if any of them is available. */ } -static inline int gtp1_push_control_header(struct sk_buff *skb, - __be32 tid, - struct gtpu_metadata *opts, - struct net_device *dev) -{ - struct gtp1_header *gtp1c; - int payload_len; - - if (opts->ver != GTP_METADATA_V1) - return -ENOENT; +struct gtp_pktinfo { + struct sock *sk; + struct iphdr *iph; + struct flowi4 fl4; + struct rtable *rt; + struct pdp_ctx *pctx; + struct net_device *dev; + __be16 gtph_port; +}; - if (opts->type == 0xFE) { - /* for end marker ignore skb data. */ - netdev_dbg(dev, "xmit pkt with null data"); - pskb_trim(skb, 0); +static void gtp_push_header(struct sk_buff *skb, struct gtp_pktinfo *pktinfo) +{ + switch (pktinfo->pctx->gtp_version) { + case GTP_V0: + pktinfo->gtph_port = htons(GTP0_PORT); + gtp0_push_header(skb, pktinfo->pctx); + break; + case GTP_V1: + pktinfo->gtph_port = htons(GTP1U_PORT); + gtp1_push_header(skb, pktinfo->pctx); + break; } - if (skb_cow_head(skb, sizeof(*gtp1c)) < 0) - return -ENOMEM; - - payload_len = skb->len; - - gtp1c = skb_push(skb, sizeof(*gtp1c)); - - gtp1c->flags = opts->flags; - gtp1c->type = opts->type; - gtp1c->length = htons(payload_len); - gtp1c->tid = tid; - netdev_dbg(dev, "xmit control pkt: ver %d flags %x type %x pkt len %d tid %x", - opts->ver, opts->flags, opts->type, skb->len, tid); - return 0; } -struct gtp_pktinfo { - struct sock *sk; - __u8 tos; - struct flowi4 fl4; - struct rtable *rt; - struct net_device *dev; - __be16 gtph_port; -}; - static inline void gtp_set_pktinfo_ipv4(struct gtp_pktinfo *pktinfo, - struct sock *sk, - __u8 tos, - struct rtable *rt, + struct sock *sk, struct iphdr *iph, + struct pdp_ctx *pctx, struct rtable *rt, struct flowi4 *fl4, struct net_device *dev) { pktinfo->sk = sk; - pktinfo->tos = tos; + pktinfo->iph = iph; + pktinfo->pctx = pctx; pktinfo->rt = rt; pktinfo->fl4 = *fl4; pktinfo->dev = dev; @@ -572,99 +477,40 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, struct gtp_pktinfo *pktinfo) { struct gtp_dev *gtp = netdev_priv(dev); - struct gtpu_metadata *opts = NULL; - struct sock *sk = NULL; struct pdp_ctx *pctx; struct rtable *rt; struct flowi4 fl4; - u8 gtp_version; - __be16 df = 0; - __be32 tun_id; - __be32 daddr; - __be32 saddr; - __u8 tos; + struct iphdr *iph; + __be16 df; int mtu; - if (gtp->collect_md) { - /* LWT GTP1U encap */ - struct ip_tunnel_info *info = NULL; - - info = skb_tunnel_info(skb); - if (!info) { - netdev_dbg(dev, "missing tunnel info"); - return -ENOENT; - } - if (info->key.tp_dst && ntohs(info->key.tp_dst) != GTP1U_PORT) { - netdev_dbg(dev, "unexpected GTP dst port: %d", ntohs(info->key.tp_dst)); - return -EOPNOTSUPP; - } - pctx = NULL; - gtp_version = GTP_V1; - tun_id = tunnel_id_to_key32(info->key.tun_id); - daddr = info->key.u.ipv4.dst; - saddr = info->key.u.ipv4.src; - sk = gtp->sk1u; - if (!sk) { - netdev_dbg(dev, "missing tunnel sock"); - return -EOPNOTSUPP; - } - tos = info->key.tos; - if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) - df = htons(IP_DF); - - if (info->options_len != 0) { - if (info->key.tun_flags & TUNNEL_GTPU_OPT) { - opts = ip_tunnel_info_opts(info); - } else { - netdev_dbg(dev, "missing tunnel metadata for control pkt"); - return -EOPNOTSUPP; - } - } - netdev_dbg(dev, "flow-based GTP1U encap: tunnel id %d\n", - be32_to_cpu(tun_id)); - } else { - struct iphdr *iph; - - if (ntohs(skb->protocol) != ETH_P_IP) - return -EOPNOTSUPP; - - iph = ip_hdr(skb); - - /* Read the IP destination address and resolve the PDP context. - * Prepend PDP header with TEI/TID from PDP ctx. - */ - if (gtp->role == GTP_ROLE_SGSN) - pctx = ipv4_pdp_find(gtp, iph->saddr); - else - pctx = ipv4_pdp_find(gtp, iph->daddr); + /* Read the IP destination address and resolve the PDP context. + * Prepend PDP header with TEI/TID from PDP ctx. + */ + iph = ip_hdr(skb); + if (gtp->role == GTP_ROLE_SGSN) + pctx = ipv4_pdp_find(gtp, iph->saddr); + else + pctx = ipv4_pdp_find(gtp, iph->daddr); - if (!pctx) { - netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n", - &iph->daddr); - return -ENOENT; - } - sk = pctx->sk; - netdev_dbg(dev, "found PDP context %p\n", pctx); - - gtp_version = pctx->gtp_version; - tun_id = htonl(pctx->u.v1.o_tei); - daddr = pctx->peer_addr_ip4.s_addr; - saddr = inet_sk(sk)->inet_saddr; - tos = iph->tos; - df = iph->frag_off; - netdev_dbg(dev, "gtp -> IP src: %pI4 dst: %pI4\n", - &iph->saddr, &iph->daddr); + if (!pctx) { + netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n", + &iph->daddr); + return -ENOENT; } + netdev_dbg(dev, "found PDP context %p\n", pctx); - rt = ip4_route_output_gtp(&fl4, sk, daddr, saddr); + rt = ip4_route_output_gtp(&fl4, pctx->sk, pctx->peer_addr_ip4.s_addr); if (IS_ERR(rt)) { - netdev_dbg(dev, "no route to SSGN %pI4\n", &daddr); + netdev_dbg(dev, "no route to SSGN %pI4\n", + &pctx->peer_addr_ip4.s_addr); dev->stats.tx_carrier_errors++; goto err; } if (rt->dst.dev == dev) { - netdev_dbg(dev, "circular route to SSGN %pI4\n", &daddr); + netdev_dbg(dev, "circular route to SSGN %pI4\n", + &pctx->peer_addr_ip4.s_addr); dev->stats.collisions++; goto err_rt; } @@ -672,10 +518,11 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, skb_dst_drop(skb); /* This is similar to tnl_update_pmtu(). */ + df = iph->frag_off; if (df) { mtu = dst_mtu(&rt->dst) - dev->hard_header_len - sizeof(struct iphdr) - sizeof(struct udphdr); - switch (gtp_version) { + switch (pctx->gtp_version) { case GTP_V0: mtu -= sizeof(struct gtp0_header); break; @@ -689,38 +536,17 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false); - if (!skb_is_gso(skb) && (df & htons(IP_DF)) && mtu < skb->len) { - netdev_dbg(dev, "packet too big, fragmentation needed"); + if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && + mtu < ntohs(iph->tot_len)) { + netdev_dbg(dev, "packet too big, fragmentation needed\n"); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); goto err_rt; } - gtp_set_pktinfo_ipv4(pktinfo, sk, tos, rt, &fl4, dev); - - if (unlikely(opts)) { - int err; - - pktinfo->gtph_port = htons(GTP1U_PORT); - err = gtp1_push_control_header(skb, tun_id, opts, dev); - if (err) { - netdev_info(dev, "cntr pkt error %d", err); - goto err_rt; - } - return 0; - } - - switch (gtp_version) { - case GTP_V0: - pktinfo->gtph_port = htons(GTP0_PORT); - gtp0_push_header(skb, pctx); - break; - case GTP_V1: - pktinfo->gtph_port = htons(GTP1U_PORT); - gtp1_push_header(skb, tun_id); - break; - } + gtp_set_pktinfo_ipv4(pktinfo, pctx->sk, iph, pctx, rt, &fl4, dev); + gtp_push_header(skb, pktinfo); return 0; err_rt: @@ -731,6 +557,7 @@ err: static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) { + unsigned int proto = ntohs(skb->protocol); struct gtp_pktinfo pktinfo; int err; @@ -742,22 +569,32 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) /* PDP context lookups in gtp_build_skb_*() need rcu read-side lock. */ rcu_read_lock(); - err = gtp_build_skb_ip4(skb, dev, &pktinfo); + switch (proto) { + case ETH_P_IP: + err = gtp_build_skb_ip4(skb, dev, &pktinfo); + break; + default: + err = -EOPNOTSUPP; + break; + } rcu_read_unlock(); if (err < 0) goto tx_err; - udp_tunnel_xmit_skb(pktinfo.rt, pktinfo.sk, skb, - pktinfo.fl4.saddr, - pktinfo.fl4.daddr, - pktinfo.tos, - ip4_dst_hoplimit(&pktinfo.rt->dst), - 0, - pktinfo.gtph_port, - pktinfo.gtph_port, - true, - false); + switch (proto) { + case ETH_P_IP: + netdev_dbg(pktinfo.dev, "gtp -> IP src: %pI4 dst: %pI4\n", + &pktinfo.iph->saddr, &pktinfo.iph->daddr); + udp_tunnel_xmit_skb(pktinfo.rt, pktinfo.sk, skb, + pktinfo.fl4.saddr, pktinfo.fl4.daddr, + pktinfo.iph->tos, + ip4_dst_hoplimit(&pktinfo.rt->dst), + 0, + pktinfo.gtph_port, pktinfo.gtph_port, + true, false); + break; + } return NETDEV_TX_OK; tx_err: @@ -773,19 +610,6 @@ static const struct net_device_ops gtp_netdev_ops = { .ndo_get_stats64 = dev_get_tstats64, }; -static struct gtp_dev *gtp_find_flow_based_dev(struct net *net) -{ - struct gtp_net *gn = net_generic(net, gtp_net_id); - struct gtp_dev *gtp; - - list_for_each_entry(gtp, &gn->gtp_dev_list, list) { - if (gtp->collect_md) - return gtp; - } - - return NULL; -} - static void gtp_link_setup(struct net_device *dev) { dev->netdev_ops = >p_netdev_ops; @@ -810,7 +634,7 @@ static void gtp_link_setup(struct net_device *dev) } static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize); -static int gtp_encap_enable(struct gtp_dev *gtp, struct net_device *dev, struct nlattr *data[]); +static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]); static void gtp_destructor(struct net_device *dev) { @@ -828,24 +652,11 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, struct gtp_net *gn; int hashsize, err; - if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1] && - !data[IFLA_GTP_COLLECT_METADATA]) + if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1]) return -EINVAL; gtp = netdev_priv(dev); - if (data[IFLA_GTP_COLLECT_METADATA]) { - if (data[IFLA_GTP_FD0]) { - netdev_dbg(dev, "LWT device does not support setting v0 socket"); - return -EINVAL; - } - if (gtp_find_flow_based_dev(src_net)) { - netdev_dbg(dev, "LWT device already exist"); - return -EBUSY; - } - gtp->collect_md = true; - } - if (!data[IFLA_GTP_PDP_HASHSIZE]) { hashsize = 1024; } else { @@ -858,7 +669,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, if (err < 0) return err; - err = gtp_encap_enable(gtp, dev, data); + err = gtp_encap_enable(gtp, data); if (err < 0) goto out_hashtable; @@ -872,7 +683,7 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, list_add_rcu(>p->list, &gn->gtp_dev_list); dev->priv_destructor = gtp_destructor; - netdev_dbg(dev, "registered new GTP interface %s\n", dev->name); + netdev_dbg(dev, "registered new GTP interface\n"); return 0; @@ -903,7 +714,6 @@ static const struct nla_policy gtp_policy[IFLA_GTP_MAX + 1] = { [IFLA_GTP_FD1] = { .type = NLA_U32 }, [IFLA_GTP_PDP_HASHSIZE] = { .type = NLA_U32 }, [IFLA_GTP_ROLE] = { .type = NLA_U32 }, - [IFLA_GTP_COLLECT_METADATA] = { .type = NLA_FLAG }, }; static int gtp_validate(struct nlattr *tb[], struct nlattr *data[], @@ -927,9 +737,6 @@ static int gtp_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u32(skb, IFLA_GTP_PDP_HASHSIZE, gtp->hash_size)) goto nla_put_failure; - if (gtp->collect_md && nla_put_flag(skb, IFLA_GTP_COLLECT_METADATA)) - goto nla_put_failure; - return 0; nla_put_failure: @@ -975,24 +782,35 @@ err1: return -ENOMEM; } -static int __gtp_encap_enable_socket(struct socket *sock, int type, - struct gtp_dev *gtp) +static struct sock *gtp_encap_enable_socket(int fd, int type, + struct gtp_dev *gtp) { struct udp_tunnel_sock_cfg tuncfg = {NULL}; + struct socket *sock; struct sock *sk; + int err; + + pr_debug("enable gtp on %d, %d\n", fd, type); + + sock = sockfd_lookup(fd, &err); + if (!sock) { + pr_debug("gtp socket fd=%d not found\n", fd); + return NULL; + } sk = sock->sk; if (sk->sk_protocol != IPPROTO_UDP || sk->sk_type != SOCK_DGRAM || (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)) { - pr_debug("socket not UDP\n"); - return -EINVAL; + pr_debug("socket fd=%d not UDP\n", fd); + sk = ERR_PTR(-EINVAL); + goto out_sock; } lock_sock(sk); if (sk->sk_user_data) { - release_sock(sock->sk); - return -EBUSY; + sk = ERR_PTR(-EBUSY); + goto out_rel_sock; } sock_hold(sk); @@ -1003,58 +821,15 @@ static int __gtp_encap_enable_socket(struct socket *sock, int type, tuncfg.encap_destroy = gtp_encap_destroy; setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg); - release_sock(sock->sk); - return 0; -} -static struct sock *gtp_encap_enable_socket(int fd, int type, - struct gtp_dev *gtp) -{ - struct socket *sock; - int err; - - pr_debug("enable gtp on %d, %d\n", fd, type); - - sock = sockfd_lookup(fd, &err); - if (!sock) { - pr_debug("gtp socket fd=%d not found\n", fd); - return NULL; - } - err = __gtp_encap_enable_socket(sock, type, gtp); +out_rel_sock: + release_sock(sock->sk); +out_sock: sockfd_put(sock); - if (err) - return ERR_PTR(err); - - return sock->sk; + return sk; } -static struct socket *gtp_create_gtp_socket(struct gtp_dev *gtp, struct net_device *dev) -{ - struct udp_port_cfg udp_conf; - struct socket *sock; - int err; - - memset(&udp_conf, 0, sizeof(udp_conf)); - udp_conf.family = AF_INET; - udp_conf.local_ip.s_addr = htonl(INADDR_ANY); - udp_conf.local_udp_port = htons(GTP1U_PORT); - - err = udp_sock_create(dev_net(dev), &udp_conf, &sock); - if (err < 0) { - pr_debug("create gtp sock failed: %d\n", err); - return ERR_PTR(err); - } - err = __gtp_encap_enable_socket(sock, UDP_ENCAP_GTP1U, gtp); - if (err) { - pr_debug("enable gtp sock encap failed: %d\n", err); - udp_tunnel_sock_release(sock); - return ERR_PTR(err); - } - pr_debug("create gtp sock done\n"); - return sock; -} - -static int gtp_encap_enable(struct gtp_dev *gtp, struct net_device *dev, struct nlattr *data[]) +static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]) { struct sock *sk1u = NULL; struct sock *sk0 = NULL; @@ -1078,25 +853,11 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct net_device *dev, struct } } - if (data[IFLA_GTP_COLLECT_METADATA]) { - struct socket *sock; - - if (!sk1u) { - sock = gtp_create_gtp_socket(gtp, dev); - if (IS_ERR(sock)) - return PTR_ERR(sock); - - gtp->collect_md_sock = sock; - sk1u = sock->sk; - } else { - gtp->collect_md_sock = NULL; - } - } - if (data[IFLA_GTP_ROLE]) { role = nla_get_u32(data[IFLA_GTP_ROLE]); if (role > GTP_ROLE_SGSN) { - gtp_encap_disable(gtp); + gtp_encap_disable_sock(sk0); + gtp_encap_disable_sock(sk1u); return -EINVAL; } } @@ -1655,7 +1416,7 @@ static int __init gtp_init(void) if (err < 0) goto unreg_genl_family; - pr_info("GTP module loaded (pdp ctx size %zd bytes) with tnl-md support\n", + pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", sizeof(struct pdp_ctx)); return 0; diff --git a/include/uapi/linux/gtp.h b/include/uapi/linux/gtp.h index 62aff78b7c569..79f9191bbb24c 100644 --- a/include/uapi/linux/gtp.h +++ b/include/uapi/linux/gtp.h @@ -2,8 +2,6 @@ #ifndef _UAPI_LINUX_GTP_H_ #define _UAPI_LINUX_GTP_H_ -#include - #define GTP_GENL_MCGRP_NAME "gtp" enum gtp_genl_cmds { @@ -36,14 +34,4 @@ enum gtp_attrs { }; #define GTPA_MAX (__GTPA_MAX + 1) -enum { - GTP_METADATA_V1 -}; - -struct gtpu_metadata { - __u8 ver; - __u8 flags; - __u8 type; -}; - #endif /* _UAPI_LINUX_GTP_H_ */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index eb8018c3a7372..91c8dda6d95dc 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -811,7 +811,6 @@ enum { IFLA_GTP_FD1, IFLA_GTP_PDP_HASHSIZE, IFLA_GTP_ROLE, - IFLA_GTP_COLLECT_METADATA, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 802da679fab1c..7d9105533c7b9 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -176,7 +176,6 @@ enum { #define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) #define TUNNEL_NOCACHE __cpu_to_be16(0x2000) #define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000) -#define TUNNEL_GTPU_OPT __cpu_to_be16(0x8000) #define TUNNEL_OPTIONS_PRESENT \ (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT) diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 28d649bda686a..d208b2af697fd 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -617,7 +617,6 @@ enum { IFLA_GTP_FD1, IFLA_GTP_PDP_HASHSIZE, IFLA_GTP_ROLE, - IFLA_GTP_COLLECT_METADATA, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) -- GitLab From e21eb3a065a2d90ee3bb06cc2e77acad403ec7cd Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Wed, 3 Feb 2021 08:08:00 +0100 Subject: [PATCH 1893/2012] gtp: set initial MTU The GTP link is brought up with a default MTU of zero. This can lead to some rather unexpected behaviour for users who are more accustomed to interfaces coming online with reasonable defaults. This patch sets an initial MTU for the GTP link of 1500 less worst-case tunnel overhead. Signed-off-by: Jonas Bonn Acked-by: Harald Welte Acked-by: Pravin B Shelar Signed-off-by: Jakub Kicinski --- drivers/net/gtp.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 4c04e271f1844..5a048f050a9ce 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -612,11 +612,16 @@ static const struct net_device_ops gtp_netdev_ops = { static void gtp_link_setup(struct net_device *dev) { + unsigned int max_gtp_header_len = sizeof(struct iphdr) + + sizeof(struct udphdr) + + sizeof(struct gtp0_header); + dev->netdev_ops = >p_netdev_ops; dev->needs_free_netdev = true; dev->hard_header_len = 0; dev->addr_len = 0; + dev->mtu = ETH_DATA_LEN - max_gtp_header_len; /* Zero header length. */ dev->type = ARPHRD_NONE; @@ -626,11 +631,7 @@ static void gtp_link_setup(struct net_device *dev) dev->features |= NETIF_F_LLTX; netif_keep_dst(dev); - /* Assume largest header, ie. GTPv0. */ - dev->needed_headroom = LL_MAX_HEADER + - sizeof(struct iphdr) + - sizeof(struct udphdr) + - sizeof(struct gtp0_header); + dev->needed_headroom = LL_MAX_HEADER + max_gtp_header_len; } static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize); -- GitLab From e1b2914e645caa702ad6ddf4f1c48bdedb3d43cf Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Wed, 3 Feb 2021 08:08:01 +0100 Subject: [PATCH 1894/2012] gtp: include role in link info Querying link info for the GTP interface doesn't reveal in which "role" the device is set to operate. Include this information in the info query result. Signed-off-by: Jonas Bonn Acked-by: Harald Welte Acked-by: Pravin B Shelar Signed-off-by: Jakub Kicinski --- drivers/net/gtp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 5a048f050a9ce..5682d3ba7aa59 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -728,7 +728,8 @@ static int gtp_validate(struct nlattr *tb[], struct nlattr *data[], static size_t gtp_get_size(const struct net_device *dev) { - return nla_total_size(sizeof(__u32)); /* IFLA_GTP_PDP_HASHSIZE */ + return nla_total_size(sizeof(__u32)) + /* IFLA_GTP_PDP_HASHSIZE */ + nla_total_size(sizeof(__u32)); /* IFLA_GTP_ROLE */ } static int gtp_fill_info(struct sk_buff *skb, const struct net_device *dev) @@ -737,6 +738,8 @@ static int gtp_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u32(skb, IFLA_GTP_PDP_HASHSIZE, gtp->hash_size)) goto nla_put_failure; + if (nla_put_u32(skb, IFLA_GTP_ROLE, gtp->role)) + goto nla_put_failure; return 0; -- GitLab From a9c0df76d002111d0796cf04b5ad16f4f5a8d794 Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Wed, 3 Feb 2021 08:08:02 +0100 Subject: [PATCH 1895/2012] gtp: really check namespaces before xmit Blindly assuming that packet transmission crosses namespaces results in skb marks being lost in the single namespace case. Signed-off-by: Jonas Bonn Acked-by: Harald Welte Acked-by: Pravin B Shelar Signed-off-by: Jakub Kicinski --- drivers/net/gtp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 5682d3ba7aa59..e4e57c0552ee7 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -592,7 +592,9 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) ip4_dst_hoplimit(&pktinfo.rt->dst), 0, pktinfo.gtph_port, pktinfo.gtph_port, - true, false); + !net_eq(sock_net(pktinfo.pctx->sk), + dev_net(dev)), + false); break; } -- GitLab From 70d132462998dcef701180e047c91933bda33ae2 Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Wed, 3 Feb 2021 08:08:03 +0100 Subject: [PATCH 1896/2012] gtp: drop unnecessary call to skb_dst_drop The call to skb_dst_drop() is already done as part of udp_tunnel_xmit(). Signed-off-by: Jonas Bonn Acked-by: Harald Welte Acked-by: Pravin B Shelar Signed-off-by: Jakub Kicinski --- drivers/net/gtp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index e4e57c0552ee7..04d9de3855496 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -515,8 +515,6 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, goto err_rt; } - skb_dst_drop(skb); - /* This is similar to tnl_update_pmtu(). */ df = iph->frag_off; if (df) { -- GitLab From 29f53b5c00c1deb27d31603ccf6ee161bd5ebb2f Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Wed, 3 Feb 2021 08:08:04 +0100 Subject: [PATCH 1897/2012] gtp: set device type Set the devtype to 'gtp' when setting up the link. Signed-off-by: Jonas Bonn Acked-by: Harald Welte Acked-by: Pravin B Shelar Signed-off-by: Jakub Kicinski --- drivers/net/gtp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 04d9de3855496..a1bb028189776 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -610,6 +610,10 @@ static const struct net_device_ops gtp_netdev_ops = { .ndo_get_stats64 = dev_get_tstats64, }; +static const struct device_type gtp_type = { + .name = "gtp", +}; + static void gtp_link_setup(struct net_device *dev) { unsigned int max_gtp_header_len = sizeof(struct iphdr) + @@ -618,6 +622,7 @@ static void gtp_link_setup(struct net_device *dev) dev->netdev_ops = >p_netdev_ops; dev->needs_free_netdev = true; + SET_NETDEV_DEVTYPE(dev, >p_type); dev->hard_header_len = 0; dev->addr_len = 0; -- GitLab From 9716178a3abd7e4eb00ac44d664cfb2311e88c3b Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Wed, 3 Feb 2021 08:08:05 +0100 Subject: [PATCH 1898/2012] gtp: update rx_length_errors for abnormally short packets Based on work by Pravin Shelar. Update appropriate stats when packet transmission isn't possible. Signed-off-by: Jonas Bonn Acked-by: Harald Welte Acked-by: Pravin B Shelar Signed-off-by: Jakub Kicinski --- drivers/net/gtp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index a1bb028189776..9a70f05baf6e4 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -189,8 +189,10 @@ static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb, /* Get rid of the GTP + UDP headers. */ if (iptunnel_pull_header(skb, hdrlen, skb->protocol, - !net_eq(sock_net(pctx->sk), dev_net(pctx->dev)))) - return -1; + !net_eq(sock_net(pctx->sk), dev_net(pctx->dev)))) { + pctx->dev->stats.rx_length_errors++; + goto err; + } netdev_dbg(pctx->dev, "forwarding packet from GGSN to uplink\n"); @@ -206,6 +208,10 @@ static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb, netif_rx(skb); return 0; + +err: + pctx->dev->stats.rx_dropped++; + return -1; } /* 1 means pass up to the stack, -1 means drop and 0 means decapsulated. */ -- GitLab From e93fac3b51617401df46332499daae000e322ff8 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 2 Feb 2021 18:17:49 +0800 Subject: [PATCH 1899/2012] drivers: net: xen-netfront: Simplify the calculation of variables Fix the following coccicheck warnings: ./drivers/net/xen-netfront.c:1816:52-54: WARNING !A || A && B is equivalent to !A || B. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/1612261069-13315-1-git-send-email-jiapeng.chong@linux.alibaba.com Signed-off-by: Jakub Kicinski --- drivers/net/xen-netfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 6ef2adbd283a3..cc19cd9203da6 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1811,7 +1811,7 @@ static int setup_netfront(struct xenbus_device *dev, * a) feature-split-event-channels == 0 * b) feature-split-event-channels == 1 but failed to setup */ - if (!feature_split_evtchn || (feature_split_evtchn && err)) + if (!feature_split_evtchn || err) err = setup_netfront_single(queue); if (err) -- GitLab From 7e3ce05e7f650371061d0b9eec1e1cf74ed6fca0 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 3 Feb 2021 22:48:16 -0300 Subject: [PATCH 1900/2012] netlink: add tracepoint at NL_SET_ERR_MSG Often userspace won't request the extack information, or they don't log it because of log level or so, and even when they do, sometimes it's not enough to know exactly what caused the error. Netlink extack is the standard way of reporting erros with descriptive error messages. With a trace point on it, we then can know exactly where the error happened, regardless of userspace app. Also, we can even see if the err msg was overwritten. The wrapper do_trace_netlink_extack() is because trace points shouldn't be called from .h files, as trace points are not that small, and the function call to do_trace_netlink_extack() on the macros is not protected by tracepoint_enabled() because the macros are called from modules, and this would require exporting some trace structs. As this is error path, it's better to export just the wrapper instead. v2: removed leftover tracepoint declaration Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: David Ahern Link: https://lore.kernel.org/r/4546b63e67b2989789d146498b13cc09e1fdc543.1612403190.git.marcelo.leitner@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/netlink.h | 6 ++++++ include/trace/events/netlink.h | 29 +++++++++++++++++++++++++++++ net/netlink/af_netlink.c | 8 ++++++++ 3 files changed, 43 insertions(+) create mode 100644 include/trace/events/netlink.h diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 9f118771e2480..0bcf98098c5a0 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -11,6 +11,8 @@ struct net; +void do_trace_netlink_extack(const char *msg); + static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) { return (struct nlmsghdr *)skb->data; @@ -90,6 +92,8 @@ struct netlink_ext_ack { static const char __msg[] = msg; \ struct netlink_ext_ack *__extack = (extack); \ \ + do_trace_netlink_extack(__msg); \ + \ if (__extack) \ __extack->_msg = __msg; \ } while (0) @@ -110,6 +114,8 @@ struct netlink_ext_ack { static const char __msg[] = msg; \ struct netlink_ext_ack *__extack = (extack); \ \ + do_trace_netlink_extack(__msg); \ + \ if (__extack) { \ __extack->_msg = __msg; \ __extack->bad_attr = (attr); \ diff --git a/include/trace/events/netlink.h b/include/trace/events/netlink.h new file mode 100644 index 0000000000000..3b7be3b386a4f --- /dev/null +++ b/include/trace/events/netlink.h @@ -0,0 +1,29 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM netlink + +#if !defined(_TRACE_NETLINK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NETLINK_H + +#include + +TRACE_EVENT(netlink_extack, + + TP_PROTO(const char *msg), + + TP_ARGS(msg), + + TP_STRUCT__entry( + __string( msg, msg ) + ), + + TP_fast_assign( + __assign_str(msg, msg); + ), + + TP_printk("msg=%s", __get_str(msg)) +); + +#endif /* _TRACE_NETLINK_H */ + +/* This part must be outside protection */ +#include diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index daca50d6bb128..dd488938447f9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -67,6 +67,8 @@ #include #include #include +#define CREATE_TRACE_POINTS +#include #include "af_netlink.h" @@ -147,6 +149,12 @@ static BLOCKING_NOTIFIER_HEAD(netlink_chain); static const struct rhashtable_params netlink_rhashtable_params; +void do_trace_netlink_extack(const char *msg) +{ + trace_netlink_extack(msg); +} +EXPORT_SYMBOL(do_trace_netlink_extack); + static inline u32 netlink_group_mask(u32 group) { return group ? 1 << (group - 1) : 0; -- GitLab From 0053859496baa17c7675526936677c9213bf5a0d Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Thu, 4 Feb 2021 18:18:38 +0000 Subject: [PATCH 1901/2012] net: add EXPORT_INDIRECT_CALLABLE wrapper When a static function is annotated with INDIRECT_CALLABLE_SCOPE and CONFIG_RETPOLINE is set, the static keyword is removed. Sometimes the function needs to be exported but EXPORT_SYMBOL can't be used because if CONFIG_RETPOLINE is not set, we will attempt to export a static symbol. This patch introduces a new indirect call wrapper: EXPORT_INDIRECT_CALLABLE. This basically does EXPORT_SYMBOL when CONFIG_RETPOLINE is set, but does nothing when it's not. Reported-by: Stephen Rothwell Signed-off-by: Brian Vazquez Link: https://lore.kernel.org/r/20210204181839.558951-1-brianvv@google.com Signed-off-by: Jakub Kicinski --- include/linux/indirect_call_wrapper.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index 54c02c84906ab..a8345c8a613de 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -36,6 +36,7 @@ #define INDIRECT_CALLABLE_DECLARE(f) f #define INDIRECT_CALLABLE_SCOPE +#define EXPORT_INDIRECT_CALLABLE(f) EXPORT_SYMBOL(f) #else #define INDIRECT_CALL_1(f, f1, ...) f(__VA_ARGS__) @@ -44,6 +45,7 @@ #define INDIRECT_CALL_4(f, f4, f3, f2, f1, ...) f(__VA_ARGS__) #define INDIRECT_CALLABLE_DECLARE(f) #define INDIRECT_CALLABLE_SCOPE static +#define EXPORT_INDIRECT_CALLABLE(f) #endif /* -- GitLab From 9c97921a51a013917cfc387998882ecd0795937c Mon Sep 17 00:00:00 2001 From: Brian Vazquez Date: Thu, 4 Feb 2021 18:18:39 +0000 Subject: [PATCH 1902/2012] net: fix building errors on powerpc when CONFIG_RETPOLINE is not set This commit fixes the errores reported when building for powerpc: ERROR: modpost: "ip6_dst_check" [vmlinux] is a static EXPORT_SYMBOL ERROR: modpost: "ipv4_dst_check" [vmlinux] is a static EXPORT_SYMBOL ERROR: modpost: "ipv4_mtu" [vmlinux] is a static EXPORT_SYMBOL ERROR: modpost: "ip6_mtu" [vmlinux] is a static EXPORT_SYMBOL Fixes: f67fbeaebdc0 ("net: use indirect call helpers for dst_mtu") Fixes: bbd807dfbf20 ("net: indirect call helpers for ipv4/ipv6 dst_check functions") Reported-by: Stephen Rothwell Signed-off-by: Brian Vazquez Link: https://lore.kernel.org/r/20210204181839.558951-2-brianvv@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 4 ++-- net/ipv6/route.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9e65377097943..be31e2446470c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1206,7 +1206,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst, return NULL; return dst; } -EXPORT_SYMBOL(ipv4_dst_check); +EXPORT_INDIRECT_CALLABLE(ipv4_dst_check); static void ipv4_send_dest_unreach(struct sk_buff *skb) { @@ -1337,7 +1337,7 @@ INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst) return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } -EXPORT_SYMBOL(ipv4_mtu); +EXPORT_INDIRECT_CALLABLE(ipv4_mtu); static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8d9e053dc0717..0d1784b0d65df 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2644,7 +2644,7 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst, return dst_ret; } -EXPORT_SYMBOL(ip6_dst_check); +EXPORT_INDIRECT_CALLABLE(ip6_dst_check); static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) { @@ -3115,7 +3115,7 @@ out: return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } -EXPORT_SYMBOL(ip6_mtu); +EXPORT_INDIRECT_CALLABLE(ip6_mtu); /* MTU selection: * 1. mtu on route is locked - use it -- GitLab From 1d7bab6a94458e959f3f55788fd50ddc7d97403b Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 2 Feb 2021 13:30:54 +0000 Subject: [PATCH 1903/2012] mm: constify page_is_pfmemalloc() argument The function only tests for page->index, so its argument should be const. Signed-off-by: Alexander Lobakin Reviewed-by: Jesse Brandeburg Acked-by: David Rientjes Signed-off-by: Jakub Kicinski --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index ecdf8a8cd6aeb..078633d43af95 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1584,7 +1584,7 @@ struct address_space *page_mapping_file(struct page *page); * ALLOC_NO_WATERMARKS and the low watermark was not * met implying that the system is under some pressure. */ -static inline bool page_is_pfmemalloc(struct page *page) +static inline bool page_is_pfmemalloc(const struct page *page) { /* * Page index cannot be this large so this must be -- GitLab From 48f971c9c80a728646fc03367a28df747f20d0f4 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 2 Feb 2021 13:31:05 +0000 Subject: [PATCH 1904/2012] skbuff: constify skb_propagate_pfmemalloc() "page" argument The function doesn't write anything to the page struct itself, so this argument can be const. Misc: align second argument to the brace while at it. Signed-off-by: Alexander Lobakin Reviewed-by: Jesse Brandeburg Acked-by: David Rientjes Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9313b5aaf45b6..b027526da4f9c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2943,8 +2943,8 @@ static inline struct page *dev_alloc_page(void) * @page: The page that was allocated from skb_alloc_page * @skb: The skb that may need pfmemalloc set */ -static inline void skb_propagate_pfmemalloc(struct page *page, - struct sk_buff *skb) +static inline void skb_propagate_pfmemalloc(const struct page *page, + struct sk_buff *skb) { if (page_is_pfmemalloc(page)) skb->pfmemalloc = true; -- GitLab From bc38f30f8dbce0afb8af05d917bee084b1329418 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 2 Feb 2021 13:31:21 +0000 Subject: [PATCH 1905/2012] net: introduce common dev_page_is_reusable() A bunch of drivers test the page before reusing/recycling for two common conditions: - if a page was allocated under memory pressure (pfmemalloc page); - if a page was allocated at a distant memory node (to exclude slowdowns). Introduce a new common inline for doing this, with likely() already folded inside to make driver code a bit simpler. Suggested-by: David Rientjes Suggested-by: Jakub Kicinski Cc: John Hubbard Signed-off-by: Alexander Lobakin Reviewed-by: Jesse Brandeburg Acked-by: David Rientjes Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b027526da4f9c..0e42c53b8ca93 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2938,6 +2938,22 @@ static inline struct page *dev_alloc_page(void) return dev_alloc_pages(0); } +/** + * dev_page_is_reusable - check whether a page can be reused for network Rx + * @page: the page to test + * + * A page shouldn't be considered for reusing/recycling if it was allocated + * under memory pressure or at a distant memory node. + * + * Returns false if this page should be returned to page allocator, true + * otherwise. + */ +static inline bool dev_page_is_reusable(const struct page *page) +{ + return likely(page_to_nid(page) == numa_mem_id() && + !page_is_pfmemalloc(page)); +} + /** * skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page * @page: The page that was allocated from skb_alloc_page -- GitLab From a79afa78e625e4dbe0e07c70929d477ba3386e45 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 2 Feb 2021 13:31:35 +0000 Subject: [PATCH 1906/2012] net: use the new dev_page_is_reusable() instead of private versions Now we can remove a bunch of identical functions from the drivers and make them use common dev_page_is_reusable(). All {,un}likely() checks are omitted since it's already present in this helper. Also update some comments near the call sites. Suggested-by: David Rientjes Suggested-by: Jakub Kicinski Cc: John Hubbard Signed-off-by: Alexander Lobakin Reviewed-by: Jesse Brandeburg Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 17 ++++++----------- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 13 ++++--------- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 15 +-------------- drivers/net/ethernet/intel/iavf/iavf_txrx.c | 15 +-------------- drivers/net/ethernet/intel/ice/ice_txrx.c | 13 ++----------- drivers/net/ethernet/intel/igb/igb_main.c | 9 ++------- drivers/net/ethernet/intel/igc/igc_main.c | 9 ++------- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 9 ++------- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 9 ++------- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 7 +------ 10 files changed, 23 insertions(+), 93 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 512080640cbc2..f39f5b1c4cec5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2800,12 +2800,6 @@ static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, writel(i, ring->tqp->io_base + HNS3_RING_RX_RING_HEAD_REG); } -static bool hns3_page_is_reusable(struct page *page) -{ - return page_to_nid(page) == numa_mem_id() && - !page_is_pfmemalloc(page); -} - static bool hns3_can_reuse_page(struct hns3_desc_cb *cb) { return (page_count(cb->priv) - cb->pagecnt_bias) == 1; @@ -2823,10 +2817,11 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, skb_add_rx_frag(skb, i, desc_cb->priv, desc_cb->page_offset + pull_len, size - pull_len, truesize); - /* Avoid re-using remote pages, or the stack is still using the page - * when page_offset rollback to zero, flag default unreuse + /* Avoid re-using remote and pfmemalloc pages, or the stack is still + * using the page when page_offset rollback to zero, flag default + * unreuse */ - if (unlikely(!hns3_page_is_reusable(desc_cb->priv)) || + if (!dev_page_is_reusable(desc_cb->priv) || (!desc_cb->page_offset && !hns3_can_reuse_page(desc_cb))) { __page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias); return; @@ -3083,8 +3078,8 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, if (length <= HNS3_RX_HEAD_SIZE) { memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long))); - /* We can reuse buffer as-is, just make sure it is local */ - if (likely(hns3_page_is_reusable(desc_cb->priv))) + /* We can reuse buffer as-is, just make sure it is reusable */ + if (dev_page_is_reusable(desc_cb->priv)) desc_cb->reuse_flag = 1; else /* This page cannot be reused so discard it */ __page_frag_cache_drain(desc_cb->priv, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 99b8252eb969e..247f44f4cb300 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -194,17 +194,12 @@ static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring, DMA_FROM_DEVICE); } -static inline bool fm10k_page_is_reserved(struct page *page) -{ - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); -} - static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer, struct page *page, unsigned int __maybe_unused truesize) { - /* avoid re-using remote pages */ - if (unlikely(fm10k_page_is_reserved(page))) + /* avoid re-using remote and pfmemalloc pages */ + if (!dev_page_is_reusable(page)) return false; #if (PAGE_SIZE < 8192) @@ -265,8 +260,8 @@ static bool fm10k_add_rx_frag(struct fm10k_rx_buffer *rx_buffer, if (likely(size <= FM10K_RX_HDR_LEN)) { memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - /* page is not reserved, we can reuse buffer as-is */ - if (likely(!fm10k_page_is_reserved(page))) + /* page is reusable, we can reuse buffer as-is */ + if (dev_page_is_reusable(page)) return true; /* this page cannot be reused so discard it */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 2574e78f75978..8d2ea4293d695 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1843,19 +1843,6 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb, return false; } -/** - * i40e_page_is_reusable - check if any reuse is possible - * @page: page struct to check - * - * A page is not reusable if it was allocated under low memory - * conditions, or it's not in the same NUMA node as this CPU. - */ -static inline bool i40e_page_is_reusable(struct page *page) -{ - return (page_to_nid(page) == numa_mem_id()) && - !page_is_pfmemalloc(page); -} - /** * i40e_can_reuse_rx_page - Determine if this page can be reused by * the adapter for another receive @@ -1891,7 +1878,7 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, struct page *page = rx_buffer->page; /* Is any reuse possible? */ - if (unlikely(!i40e_page_is_reusable(page))) + if (!dev_page_is_reusable(page)) return false; #if (PAGE_SIZE < 8192) diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 256fa07d54d5d..ffaf2742a2e0f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -1141,19 +1141,6 @@ static void iavf_reuse_rx_page(struct iavf_ring *rx_ring, new_buff->pagecnt_bias = old_buff->pagecnt_bias; } -/** - * iavf_page_is_reusable - check if any reuse is possible - * @page: page struct to check - * - * A page is not reusable if it was allocated under low memory - * conditions, or it's not in the same NUMA node as this CPU. - */ -static inline bool iavf_page_is_reusable(struct page *page) -{ - return (page_to_nid(page) == numa_mem_id()) && - !page_is_pfmemalloc(page); -} - /** * iavf_can_reuse_rx_page - Determine if this page can be reused by * the adapter for another receive @@ -1187,7 +1174,7 @@ static bool iavf_can_reuse_rx_page(struct iavf_rx_buffer *rx_buffer) struct page *page = rx_buffer->page; /* Is any reuse possible? */ - if (unlikely(!iavf_page_is_reusable(page))) + if (!dev_page_is_reusable(page)) return false; #if (PAGE_SIZE < 8192) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 2c2de56e28248..8ca63c6a6ba4f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -728,15 +728,6 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count) return !!cleaned_count; } -/** - * ice_page_is_reserved - check if reuse is possible - * @page: page struct to check - */ -static bool ice_page_is_reserved(struct page *page) -{ - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); -} - /** * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse * @rx_buf: Rx buffer to adjust @@ -775,8 +766,8 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt) unsigned int pagecnt_bias = rx_buf->pagecnt_bias; struct page *page = rx_buf->page; - /* avoid re-using remote pages */ - if (unlikely(ice_page_is_reserved(page))) + /* avoid re-using remote and pfmemalloc pages */ + if (!dev_page_is_reusable(page)) return false; #if (PAGE_SIZE < 8192) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 84d4284b8b326..7d8e02b4d0925 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -8215,18 +8215,13 @@ static void igb_reuse_rx_page(struct igb_ring *rx_ring, new_buff->pagecnt_bias = old_buff->pagecnt_bias; } -static inline bool igb_page_is_reserved(struct page *page) -{ - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); -} - static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; - /* avoid re-using remote pages */ - if (unlikely(igb_page_is_reserved(page))) + /* avoid re-using remote and pfmemalloc pages */ + if (!dev_page_is_reusable(page)) return false; #if (PAGE_SIZE < 8192) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 43aec42e6d9d4..ae0de7f085686 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1648,18 +1648,13 @@ static void igc_reuse_rx_page(struct igc_ring *rx_ring, new_buff->pagecnt_bias = old_buff->pagecnt_bias; } -static inline bool igc_page_is_reserved(struct page *page) -{ - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); -} - static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; - /* avoid re-using remote pages */ - if (unlikely(igc_page_is_reserved(page))) + /* avoid re-using remote and pfmemalloc pages */ + if (!dev_page_is_reusable(page)) return false; #if (PAGE_SIZE < 8192) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e08c01525fd26..237e09342f285 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1940,19 +1940,14 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, new_buff->pagecnt_bias = old_buff->pagecnt_bias; } -static inline bool ixgbe_page_is_reserved(struct page *page) -{ - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); -} - static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer, int rx_buffer_pgcnt) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; - /* avoid re-using remote pages */ - if (unlikely(ixgbe_page_is_reserved(page))) + /* avoid re-using remote and pfmemalloc pages */ + if (!dev_page_is_reusable(page)) return false; #if (PAGE_SIZE < 8192) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index a14e55e7fce88..449d7d5b280dd 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -781,18 +781,13 @@ static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring, new_buff->pagecnt_bias = old_buff->pagecnt_bias; } -static inline bool ixgbevf_page_is_reserved(struct page *page) -{ - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); -} - static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; - /* avoid re-using remote pages */ - if (unlikely(ixgbevf_page_is_reserved(page))) + /* avoid re-using remote and pfmemalloc pages */ + if (!dev_page_is_reusable(page)) return false; #if (PAGE_SIZE < 8192) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 50723e5fb085a..4de5a97ceac6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -213,11 +213,6 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1; } -static inline bool mlx5e_page_is_reserved(struct page *page) -{ - return page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id(); -} - static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { @@ -230,7 +225,7 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, return false; } - if (unlikely(mlx5e_page_is_reserved(dma_info->page))) { + if (!dev_page_is_reusable(dma_info->page)) { stats->cache_waive++; return false; } -- GitLab From 05656132a8745568692c4b505630e65990266101 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 2 Feb 2021 13:31:46 +0000 Subject: [PATCH 1907/2012] net: page_pool: simplify page recycling condition tests pool_page_reusable() is a leftover from pre-NUMA-aware times. For now, this function is just a redundant wrapper over page_is_pfmemalloc(), so inline it into its sole call site. Signed-off-by: Alexander Lobakin Acked-by: Jesper Dangaard Brouer Reviewed-by: Ilias Apalodimas Reviewed-by: Jesse Brandeburg Acked-by: David Rientjes Signed-off-by: Jakub Kicinski --- net/core/page_pool.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index f3c690b8c8e36..ad8b0707af04b 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -350,14 +350,6 @@ static bool page_pool_recycle_in_cache(struct page *page, return true; } -/* page is NOT reusable when: - * 1) allocated when system is under some pressure. (page_is_pfmemalloc) - */ -static bool pool_page_reusable(struct page_pool *pool, struct page *page) -{ - return !page_is_pfmemalloc(page); -} - /* If the page refcnt == 1, this will try to recycle the page. * if PP_FLAG_DMA_SYNC_DEV is set, we'll try to sync the DMA area for * the configured size min(dma_sync_size, pool->max_len). @@ -373,9 +365,11 @@ __page_pool_put_page(struct page_pool *pool, struct page *page, * regular page allocator APIs. * * refcnt == 1 means page_pool owns page, and can recycle it. + * + * page is NOT reusable when allocated when system is under + * some pressure. (page_is_pfmemalloc) */ - if (likely(page_ref_count(page) == 1 && - pool_page_reusable(pool, page))) { + if (likely(page_ref_count(page) == 1 && !page_is_pfmemalloc(page))) { /* Read barrier done in page_ref_count / READ_ONCE */ if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) -- GitLab From c9dca822c72914ff33593b12f9fb229f0c0afd47 Mon Sep 17 00:00:00 2001 From: Jian Yang Date: Mon, 1 Feb 2021 15:34:45 -0800 Subject: [PATCH 1908/2012] net-loopback: set lo dev initial state to UP Traditionally loopback devices come up with initial state as DOWN for any new network-namespace. This would mean that anyone needing this device would have to bring this UP by issuing something like 'ip link set lo up'. This can be avoided if the initial state is set as UP. Signed-off-by: Mahesh Bandewar Signed-off-by: Jian Yang Link: https://lore.kernel.org/r/20210201233445.2044327-1-jianyang.kernel@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/loopback.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index a1c77cc004165..24487ec17f8b1 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -219,6 +219,12 @@ static __net_init int loopback_net_init(struct net *net) BUG_ON(dev->ifindex != LOOPBACK_IFINDEX); net->loopback_dev = dev; + + /* bring loopback device UP */ + rtnl_lock(); + dev_open(dev, NULL); + rtnl_unlock(); + return 0; out_free_netdev: -- GitLab From a4a600dd301ccde6ea239804ec1f19364a39d643 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 3 Feb 2021 16:54:22 +0800 Subject: [PATCH 1909/2012] udp: call udp_encap_enable for v6 sockets when enabling encap When enabling encap for a ipv6 socket without udp_encap_needed_key increased, UDP GRO won't work for v4 mapped v6 address packets as sk will be NULL in udp4_gro_receive(). This patch is to enable it by increasing udp_encap_needed_key for v6 sockets in udp_tunnel_encap_enable(), and correspondingly decrease udp_encap_needed_key in udpv6_destroy_sock(). v1->v2: - add udp_encap_disable() and export it. v2->v3: - add the change for rxrpc and bareudp into one patch, as Alex suggested. v3->v4: - move rxrpc part to another patch. Acked-by: Willem de Bruijn Signed-off-by: Xin Long Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 6 ------ include/net/udp.h | 1 + include/net/udp_tunnel.h | 3 +-- net/ipv4/udp.c | 6 ++++++ net/ipv6/udp.c | 4 +++- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 1b8f59713fc7d..7511bca9c15ed 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -240,12 +240,6 @@ static int bareudp_socket_create(struct bareudp_dev *bareudp, __be16 port) tunnel_cfg.encap_destroy = NULL; setup_udp_tunnel_sock(bareudp->net, sock, &tunnel_cfg); - /* As the setup_udp_tunnel_sock does not call udp_encap_enable if the - * socket type is v6 an explicit call to udp_encap_enable is needed. - */ - if (sock->sk->sk_family == AF_INET6) - udp_encap_enable(); - rcu_assign_pointer(bareudp->sock, sock); return 0; } diff --git a/include/net/udp.h b/include/net/udp.h index 01351ba25b874..5ddbb42fdb360 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -467,6 +467,7 @@ void udp_init(void); DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key); void udp_encap_enable(void); +void udp_encap_disable(void); #if IS_ENABLED(CONFIG_IPV6) DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 282d10ee60e13..afc7ce713657b 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -181,9 +181,8 @@ static inline void udp_tunnel_encap_enable(struct socket *sock) #if IS_ENABLED(CONFIG_IPV6) if (sock->sk->sk_family == PF_INET6) ipv6_stub->udpv6_encap_enable(); - else #endif - udp_encap_enable(); + udp_encap_enable(); } #define UDP_TUNNEL_NIC_MAX_TABLES 4 diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 69ea76578abb9..48208fb4e8957 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -596,6 +596,12 @@ void udp_encap_enable(void) } EXPORT_SYMBOL(udp_encap_enable); +void udp_encap_disable(void) +{ + static_branch_dec(&udp_encap_needed_key); +} +EXPORT_SYMBOL(udp_encap_disable); + /* Handler for tunnels with arbitrary destination ports: no socket lookup, go * through error handlers in encapsulations looking for a match. */ diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b9f3dfdd23834..d754292050848 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1608,8 +1608,10 @@ void udpv6_destroy_sock(struct sock *sk) if (encap_destroy) encap_destroy(sk); } - if (up->encap_enabled) + if (up->encap_enabled) { static_branch_dec(&udpv6_encap_needed_key); + udp_encap_disable(); + } } inet6_destroy_sock(sk); -- GitLab From 5d30c626b67e1c70ba7805d468c7de39f1da2f7e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 3 Feb 2021 16:54:23 +0800 Subject: [PATCH 1910/2012] rxrpc: call udp_tunnel_encap_enable in rxrpc_open_socket When doing encap_enable/increasing encap_needed_key, up->encap_enabled is not set in rxrpc_open_socket(), and it will cause encap_needed_key not being decreased in udpv6_destroy_sock(). This patch is to improve it by just calling udp_tunnel_encap_enable() where it increases both UDP and UDPv6 encap_needed_key and sets up->encap_enabled. v4->v5: - add the missing '#include ', as David Howells noticed. Acked-and-tested-by: David Howells Signed-off-by: Xin Long Signed-off-by: Jakub Kicinski --- net/rxrpc/local_object.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 8c2881054266d..33b49367d5759 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "ar-internal.h" @@ -135,11 +136,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) udp_sk(usk)->gro_receive = NULL; udp_sk(usk)->gro_complete = NULL; - udp_encap_enable(); -#if IS_ENABLED(CONFIG_AF_RXRPC_IPV6) - if (local->srx.transport.family == AF_INET6) - udpv6_encap_enable(); -#endif + udp_tunnel_encap_enable(local->socket); usk->sk_error_report = rxrpc_error_report; /* if a local address was supplied then bind it */ -- GitLab From d6adfd37e7eb9ec65e6fd95790f718dda85dec2f Mon Sep 17 00:00:00 2001 From: wengjianfeng Date: Wed, 3 Feb 2021 17:38:42 +0800 Subject: [PATCH 1911/2012] nfc: pn533: Fix typo issue change 'piority' to 'priority' change 'succesfult' to 'successful' Signed-off-by: wengjianfeng Link: https://lore.kernel.org/r/20210203093842.11180-1-samirweng1979@163.com Signed-off-by: Jakub Kicinski --- drivers/nfc/pn533/pn533.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c index f7464bd6d57cb..f1469ac8ff425 100644 --- a/drivers/nfc/pn533/pn533.c +++ b/drivers/nfc/pn533/pn533.c @@ -513,7 +513,7 @@ static int pn533_send_cmd_async(struct pn533 *dev, u8 cmd_code, /* * pn533_send_cmd_direct_async * - * The function sends a piority cmd directly to the chip omitting the cmd + * The function sends a priority cmd directly to the chip omitting the cmd * queue. It's intended to be used by chaining mechanism of received responses * where the host has to request every single chunk of data before scheduling * next cmd from the queue. @@ -615,7 +615,7 @@ static int pn533_send_sync_complete(struct pn533 *dev, void *_arg, * as it's been already freed at the beginning of RX path by * async_complete_cb. * - * 3. valid pointer in case of succesfult RX path + * 3. valid pointer in case of successful RX path * * A caller has to check a return value with IS_ERR macro. If the test pass, * the returned pointer is valid. -- GitLab From 8f8a42ff003aaa3c1424923154c9a92f3bd4c634 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 3 Feb 2021 13:10:40 +0000 Subject: [PATCH 1912/2012] net: hns3: remove redundant null check of an array The null check of filp->f_path.dentry->d_iname is redundant because it is an array of DNAME_INLINE_LEN chars and cannot be a null. Fix this by removing the null check. Addresses-Coverity: ("Array compared against 0") Fixes: 04987ca1b9b6 ("net: hns3: add debugfs support for tm nodes, priority and qset info") Signed-off-by: Colin Ian King Reviewed-by: Jesse Brandeburg Link: https://lore.kernel.org/r/20210203131040.21656-1-colin.king@canonical.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 6978304f1ac53..c5958754f939a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -494,9 +494,6 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer, ssize_t size = 0; int ret = 0; - if (!filp->f_path.dentry->d_iname) - return -EINVAL; - read_buf = kzalloc(HNS3_DBG_READ_LEN, GFP_KERNEL); if (!read_buf) return -ENOMEM; -- GitLab From 1faba27f11c8da244e793546a1b35a9b1da8208e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 3 Feb 2021 15:51:09 +0200 Subject: [PATCH 1913/2012] ipv6: silence compilation warning for non-IPV6 builds The W=1 compilation of allmodconfig generates the following warning: net/ipv6/icmp.c:448:6: warning: no previous prototype for 'icmp6_send' [-Wmissing-prototypes] 448 | void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, | ^~~~~~~~~~ Fix it by providing function declaration for builds with ipv6 as a module. Signed-off-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- include/linux/icmpv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 1b3371ae81936..452d8978ffc7a 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -16,9 +16,9 @@ static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr); -#if IS_BUILTIN(CONFIG_IPV6) void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr); +#if IS_BUILTIN(CONFIG_IPV6) static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) { icmp6_send(skb, type, code, info, NULL); -- GitLab From f9a4719cc16fcf4f8816521e0c903a218fa072b6 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 3 Feb 2021 15:51:10 +0200 Subject: [PATCH 1914/2012] ipv6: move udp declarations to net/udp.h Fix the following compilation warning: net/ipv6/udp.c:1031:30: warning: no previous prototype for 'udp_v6_early_demux' [-Wmissing-prototypes] 1031 | INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) | ^~~~~~~~~~~~~~~~~~ net/ipv6/udp.c:1072:29: warning: no previous prototype for 'udpv6_rcv' [-Wmissing-prototypes] 1072 | INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb) | ^~~~~~~~~ Signed-off-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- include/net/udp.h | 3 +++ net/ipv6/ip6_input.c | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 5ddbb42fdb360..a132a02b2f2cb 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -173,6 +173,9 @@ INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); +INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); + struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, struct udphdr *uh, struct sock *sk); int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index e96304d8a4a7f..e9d2a4a409aab 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -44,7 +45,6 @@ #include #include -INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *)); static void ip6_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -352,7 +352,6 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt, ip6_sublist_rcv(&sublist, curr_dev, curr_net); } -INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *)); /* -- GitLab From 04f00ab2275f60658b5e4996e28f52ab1bc51d75 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 3 Feb 2021 15:51:11 +0200 Subject: [PATCH 1915/2012] net/core: move gro function declarations to separate header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fir the following compilation warnings: 1031 | INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb) net/ipv6/ip6_offload.c:182:41: warning: no previous prototype for ‘ipv6_gro_receive’ [-Wmissing-prototypes] 182 | INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, | ^~~~~~~~~~~~~~~~ net/ipv6/ip6_offload.c:320:29: warning: no previous prototype for ‘ipv6_gro_complete’ [-Wmissing-prototypes] 320 | INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) | ^~~~~~~~~~~~~~~~~ net/ipv6/ip6_offload.c:182:41: warning: no previous prototype for ‘ipv6_gro_receive’ [-Wmissing-prototypes] 182 | INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, | ^~~~~~~~~~~~~~~~ net/ipv6/ip6_offload.c:320:29: warning: no previous prototype for ‘ipv6_gro_complete’ [-Wmissing-prototypes] 320 | INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) Signed-off-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- include/net/gro.h | 12 ++++++++++++ net/core/dev.c | 7 +------ net/ipv6/ip6_offload.c | 1 + 3 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 include/net/gro.h diff --git a/include/net/gro.h b/include/net/gro.h new file mode 100644 index 0000000000000..8a6eb5303cc4c --- /dev/null +++ b/include/net/gro.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _NET_IPV6_GRO_H +#define _NET_IPV6_GRO_H + +INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); +INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, + struct sk_buff *)); +INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); +#endif /* _NET_IPV6_GRO_H */ diff --git a/net/core/dev.c b/net/core/dev.c index aae116d059da7..21d74d30f5d70 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -101,6 +101,7 @@ #include #include #include +#include #include #include #include @@ -5751,8 +5752,6 @@ static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) gro_normal_list(napi); } -INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); -INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) { struct packet_offload *ptype; @@ -5921,10 +5920,6 @@ static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head) napi_gro_complete(napi, oldest); } -INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, - struct sk_buff *)); -INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, - struct sk_buff *)); static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index a80f90bf3ae7d..1b9827ff8ccf4 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "ip6_offload.h" -- GitLab From edf597da02a01edb26bddf06890fb81eee3d82cf Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 3 Feb 2021 15:51:12 +0200 Subject: [PATCH 1916/2012] netfilter: move handlers to net/ip_vs.h Fix the following compilation warnings: net/netfilter/ipvs/ip_vs_proto_tcp.c:147:1: warning: no previous prototype for 'tcp_snat_handler' [-Wmissing-prototypes] 147 | tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, | ^~~~~~~~~~~~~~~~ net/netfilter/ipvs/ip_vs_proto_udp.c:136:1: warning: no previous prototype for 'udp_snat_handler' [-Wmissing-prototypes] 136 | udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, | ^~~~~~~~~~~~~~~~ Signed-off-by: Leon Romanovsky Signed-off-by: Jakub Kicinski --- include/net/ip_vs.h | 11 +++++++++++ net/netfilter/ipvs/ip_vs_core.c | 12 ------------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d609e957a3ec0..7cb5a1aace40d 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1712,4 +1712,15 @@ ip_vs_dest_conn_overhead(struct ip_vs_dest *dest) atomic_read(&dest->inactconns); } +#ifdef CONFIG_IP_VS_PROTO_TCP +INDIRECT_CALLABLE_DECLARE(int + tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)); +#endif + +#ifdef CONFIG_IP_VS_PROTO_UDP +INDIRECT_CALLABLE_DECLARE(int + udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)); +#endif #endif /* _NET_IP_VS_H */ diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 54e086c657210..0c132ff9b4467 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -68,18 +68,6 @@ EXPORT_SYMBOL(ip_vs_get_debug_level); #endif EXPORT_SYMBOL(ip_vs_new_conn_out); -#ifdef CONFIG_IP_VS_PROTO_TCP -INDIRECT_CALLABLE_DECLARE(int - tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, - struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)); -#endif - -#ifdef CONFIG_IP_VS_PROTO_UDP -INDIRECT_CALLABLE_DECLARE(int - udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, - struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)); -#endif - #if defined(CONFIG_IP_VS_PROTO_TCP) && defined(CONFIG_IP_VS_PROTO_UDP) #define SNAT_CALL(f, ...) \ INDIRECT_CALL_2(f, tcp_snat_handler, udp_snat_handler, __VA_ARGS__) -- GitLab From 53b823b29aac320b2d4b64fc79af869720c73cf5 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Thu, 4 Feb 2021 08:46:48 +0530 Subject: [PATCH 1917/2012] drivers: net: ethernet: i825xx: Fix couple of spellings in the file ether1.c s/initialsation/initialisation/ s/specifiing/specifying/ Signed-off-by: Bhaskar Chowdhury Acked-by: Randy Dunlap Link: https://lore.kernel.org/r/20210204031648.27300-1-unixbhaskar@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/i825xx/ether1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/i825xx/ether1.c b/drivers/net/ethernet/i825xx/ether1.c index a0bfb509e0023..c612ef526d16d 100644 --- a/drivers/net/ethernet/i825xx/ether1.c +++ b/drivers/net/ethernet/i825xx/ether1.c @@ -20,7 +20,7 @@ * 1.02 RMK 25/05/1997 Added code to restart RU if it goes not ready * 1.03 RMK 14/09/1997 Cleaned up the handling of a reset during the TX interrupt. * Should prevent lockup. - * 1.04 RMK 17/09/1997 Added more info when initialsation of chip goes wrong. + * 1.04 RMK 17/09/1997 Added more info when initialisation of chip goes wrong. * TDR now only reports failure when chip reports non-zero * TDR time-distance. * 1.05 RMK 31/12/1997 Removed calls to dev_tint for 2.1 @@ -117,7 +117,7 @@ ether1_outw_p (struct net_device *dev, unsigned short val, int addr, int svflgs) * Some inline assembler to allow fast transfers on to/off of the card. * Since this driver depends on some features presented by the ARM * specific architecture, and that you can't configure this driver - * without specifiing ARM mode, this is not a problem. + * without specifying ARM mode, this is not a problem. * * This routine is essentially an optimised memcpy from the card's * onboard RAM to kernel memory. -- GitLab From b53014f0791cbc4925f52d7c46220845e42d0a91 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 3 Feb 2021 21:39:18 +0200 Subject: [PATCH 1918/2012] net: dsa: bcm_sf2: Check egress tagging of CFP rule with proper accessor The flow steering struct ethtool_flow_ext::data field is __be32, so when the CFP code needs to check the VLAN egress tagging attribute in bit 0, it does this in CPU native endianness. So logically, the endianness conversion is set up the other way around, although in practice the same result is produced. Gets rid of build warning: warning: cast from restricted __be32 warning: incorrect type in argument 1 (different base types) expected unsigned int [usertype] val got restricted __be32 warning: cast from restricted __be32 warning: cast from restricted __be32 warning: cast from restricted __be32 warning: cast from restricted __be32 warning: restricted __be32 degrades to integer Signed-off-by: Vladimir Oltean Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20210203193918.2236994-1-olteanv@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/bcm_sf2_cfp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index ed45d16250e16..178218cf73a3f 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -886,7 +886,7 @@ static int bcm_sf2_cfp_rule_insert(struct dsa_switch *ds, int port, vid = be16_to_cpu(fs->h_ext.vlan_tci) & VLAN_VID_MASK; vlan.vid = vid; - if (cpu_to_be32(fs->h_ext.data[1]) & 1) + if (be32_to_cpu(fs->h_ext.data[1]) & 1) vlan.flags = BRIDGE_VLAN_INFO_UNTAGGED; else vlan.flags = 0; -- GitLab From add285bce37720675af5b1873f71af8561d0e2fe Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Tue, 2 Feb 2021 20:16:45 +0100 Subject: [PATCH 1919/2012] net: dsa: xrs700x: Correctly address device over I2C On read, master should send 31 MSB of the register (only even values are ever used), followed by a 1 to indicate read. Then, reading two bytes, the device will output the register's value. On write, master sends 31 MSB of the register, followed by a 0 to indicate write, followed by two bytes containing the register value. Flexibilis' documentation (version 1.3) specifies the opposite polarity (#read/write), but the scope indicates that it is, in fact, read/#write. Signed-off-by: Tobias Waldekranz Reviewed-by: George McCollister Link: https://lore.kernel.org/r/20210202191645.439-1-tobias@waldekranz.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/xrs700x/xrs700x_i2c.c | 31 ++++++++++++--------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/drivers/net/dsa/xrs700x/xrs700x_i2c.c b/drivers/net/dsa/xrs700x/xrs700x_i2c.c index a5f8883af8292..16a46a78a0370 100644 --- a/drivers/net/dsa/xrs700x/xrs700x_i2c.c +++ b/drivers/net/dsa/xrs700x/xrs700x_i2c.c @@ -10,33 +10,34 @@ #include "xrs700x.h" #include "xrs700x_reg.h" +struct xrs700x_i2c_cmd { + __be32 reg; + __be16 val; +} __packed; + static int xrs700x_i2c_reg_read(void *context, unsigned int reg, unsigned int *val) { struct device *dev = context; struct i2c_client *i2c = to_i2c_client(dev); - unsigned char buf[4]; + struct xrs700x_i2c_cmd cmd; int ret; - buf[0] = reg >> 23 & 0xff; - buf[1] = reg >> 15 & 0xff; - buf[2] = reg >> 7 & 0xff; - buf[3] = (reg & 0x7f) << 1; + cmd.reg = cpu_to_be32(reg | 1); - ret = i2c_master_send(i2c, buf, sizeof(buf)); + ret = i2c_master_send(i2c, (char *)&cmd.reg, sizeof(cmd.reg)); if (ret < 0) { dev_err(dev, "xrs i2c_master_send returned %d\n", ret); return ret; } - ret = i2c_master_recv(i2c, buf, 2); + ret = i2c_master_recv(i2c, (char *)&cmd.val, sizeof(cmd.val)); if (ret < 0) { dev_err(dev, "xrs i2c_master_recv returned %d\n", ret); return ret; } - *val = buf[0] << 8 | buf[1]; - + *val = be16_to_cpu(cmd.val); return 0; } @@ -45,17 +46,13 @@ static int xrs700x_i2c_reg_write(void *context, unsigned int reg, { struct device *dev = context; struct i2c_client *i2c = to_i2c_client(dev); - unsigned char buf[6]; + struct xrs700x_i2c_cmd cmd; int ret; - buf[0] = reg >> 23 & 0xff; - buf[1] = reg >> 15 & 0xff; - buf[2] = reg >> 7 & 0xff; - buf[3] = (reg & 0x7f) << 1 | 1; - buf[4] = val >> 8 & 0xff; - buf[5] = val & 0xff; + cmd.reg = cpu_to_be32(reg); + cmd.val = cpu_to_be16(val); - ret = i2c_master_send(i2c, buf, sizeof(buf)); + ret = i2c_master_send(i2c, (char *)&cmd, sizeof(cmd)); if (ret < 0) { dev_err(dev, "xrs i2c_master_send returned %d\n", ret); return ret; -- GitLab From a08c0d309d8c078d22717d815cf9853f6f2c07bd Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 3 Feb 2021 17:14:28 +0800 Subject: [PATCH 1920/2012] r8152: replace several functions about phy patch request Replace r8153_patch_request() with rtl_phy_patch_request(). Replace r8153_pre_ram_code() with rtl_pre_ram_code(). Replace r8153_post_ram_code() with rtl_post_ram_code(). Add rtl_patch_key_set(). The new functions have an additional parameter. It is used to wait the patch request command finished. When the PHY is resumed from the state of power cut, the PHY is at a safe mode and the OCP_PHY_PATCH_STAT wouldn't be updated. For this situation, it is safe to set patch request command without waiting OCP_PHY_PATCH_STAT. Signed-off-by: Hayes Wang Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 84 ++++++++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0d7d2938e21d8..e4dda75a6e0d0 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3468,59 +3468,76 @@ static void rtl_clear_bp(struct r8152 *tp, u16 type) ocp_write_word(tp, type, PLA_BP_BA, 0); } -static int r8153_patch_request(struct r8152 *tp, bool request) +static int rtl_phy_patch_request(struct r8152 *tp, bool request, bool wait) { - u16 data; + u16 data, check; int i; data = ocp_reg_read(tp, OCP_PHY_PATCH_CMD); - if (request) + if (request) { data |= PATCH_REQUEST; - else + check = 0; + } else { data &= ~PATCH_REQUEST; + check = PATCH_READY; + } ocp_reg_write(tp, OCP_PHY_PATCH_CMD, data); - for (i = 0; request && i < 5000; i++) { + for (i = 0; wait && i < 5000; i++) { + u32 ocp_data; + usleep_range(1000, 2000); - if (ocp_reg_read(tp, OCP_PHY_PATCH_STAT) & PATCH_READY) + ocp_data = ocp_reg_read(tp, OCP_PHY_PATCH_STAT); + if ((ocp_data & PATCH_READY) ^ check) break; } - if (request && !(ocp_reg_read(tp, OCP_PHY_PATCH_STAT) & PATCH_READY)) { - netif_err(tp, drv, tp->netdev, "patch request fail\n"); - r8153_patch_request(tp, false); + if (request && wait && + !(ocp_reg_read(tp, OCP_PHY_PATCH_STAT) & PATCH_READY)) { + dev_err(&tp->intf->dev, "PHY patch request fail\n"); + rtl_phy_patch_request(tp, false, false); return -ETIME; } else { return 0; } } -static int r8153_pre_ram_code(struct r8152 *tp, u16 key_addr, u16 patch_key) +static void rtl_patch_key_set(struct r8152 *tp, u16 key_addr, u16 patch_key) { - if (r8153_patch_request(tp, true)) { - dev_err(&tp->intf->dev, "patch request fail\n"); - return -ETIME; - } + if (patch_key && key_addr) { + sram_write(tp, key_addr, patch_key); + sram_write(tp, SRAM_PHY_LOCK, PHY_PATCH_LOCK); + } else if (key_addr) { + u16 data; - sram_write(tp, key_addr, patch_key); - sram_write(tp, SRAM_PHY_LOCK, PHY_PATCH_LOCK); + sram_write(tp, 0x0000, 0x0000); - return 0; + data = ocp_reg_read(tp, OCP_PHY_LOCK); + data &= ~PATCH_LOCK; + ocp_reg_write(tp, OCP_PHY_LOCK, data); + + sram_write(tp, key_addr, 0x0000); + } else { + WARN_ON_ONCE(1); + } } -static int r8153_post_ram_code(struct r8152 *tp, u16 key_addr) +static int +rtl_pre_ram_code(struct r8152 *tp, u16 key_addr, u16 patch_key, bool wait) { - u16 data; + if (rtl_phy_patch_request(tp, true, wait)) + return -ETIME; - sram_write(tp, 0x0000, 0x0000); + rtl_patch_key_set(tp, key_addr, patch_key); - data = ocp_reg_read(tp, OCP_PHY_LOCK); - data &= ~PATCH_LOCK; - ocp_reg_write(tp, OCP_PHY_LOCK, data); + return 0; +} - sram_write(tp, key_addr, 0x0000); +static int rtl_post_ram_code(struct r8152 *tp, u16 key_addr, bool wait) +{ + rtl_patch_key_set(tp, key_addr, 0); - r8153_patch_request(tp, false); + rtl_phy_patch_request(tp, false, wait); ocp_write_word(tp, MCU_TYPE_PLA, PLA_OCP_GPHY_BASE, tp->ocp_base); @@ -4005,7 +4022,7 @@ static void rtl8152_fw_mac_apply(struct r8152 *tp, struct fw_mac *mac) dev_dbg(&tp->intf->dev, "successfully applied %s\n", mac->info); } -static void rtl8152_apply_firmware(struct r8152 *tp) +static void rtl8152_apply_firmware(struct r8152 *tp, bool power_cut) { struct rtl_fw *rtl_fw = &tp->rtl_fw; const struct firmware *fw; @@ -4036,12 +4053,11 @@ static void rtl8152_apply_firmware(struct r8152 *tp) case RTL_FW_PHY_START: key = (struct fw_phy_patch_key *)block; key_addr = __le16_to_cpu(key->key_reg); - r8153_pre_ram_code(tp, key_addr, - __le16_to_cpu(key->key_data)); + rtl_pre_ram_code(tp, key_addr, __le16_to_cpu(key->key_data), !power_cut); break; case RTL_FW_PHY_STOP: WARN_ON(!key_addr); - r8153_post_ram_code(tp, key_addr); + rtl_post_ram_code(tp, key_addr, !power_cut); break; case RTL_FW_PHY_NC: rtl8152_fw_phy_nc_apply(tp, (struct fw_phy_nc *)block); @@ -4246,7 +4262,7 @@ static void rtl8152_disable(struct r8152 *tp) static void r8152b_hw_phy_cfg(struct r8152 *tp) { - rtl8152_apply_firmware(tp); + rtl8152_apply_firmware(tp, false); rtl_eee_enable(tp, tp->eee_en); r8152_aldps_en(tp, true); r8152b_enable_fc(tp); @@ -4528,7 +4544,7 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) /* disable EEE before updating the PHY parameters */ rtl_eee_enable(tp, false); - rtl8152_apply_firmware(tp); + rtl8152_apply_firmware(tp, false); if (tp->version == RTL_VER_03) { data = ocp_reg_read(tp, OCP_EEE_CFG); @@ -4602,7 +4618,7 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp) /* disable EEE before updating the PHY parameters */ rtl_eee_enable(tp, false); - rtl8152_apply_firmware(tp); + rtl8152_apply_firmware(tp, false); r8153b_green_en(tp, test_bit(GREEN_ETHERNET, &tp->flags)); @@ -4643,7 +4659,7 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp) ocp_write_word(tp, MCU_TYPE_PLA, PLA_PHY_PWR, ocp_data); /* Advnace EEE */ - if (!r8153_patch_request(tp, true)) { + if (!rtl_phy_patch_request(tp, true, true)) { data = ocp_reg_read(tp, OCP_POWER_CFG); data |= EEE_CLKDIV_EN; ocp_reg_write(tp, OCP_POWER_CFG, data); @@ -4660,7 +4676,7 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp) ocp_reg_write(tp, OCP_SYSCLK_CFG, clk_div_expo(5)); tp->ups_info._250m_ckdiv = true; - r8153_patch_request(tp, false); + rtl_phy_patch_request(tp, false, true); } if (tp->eee_en) -- GitLab From 80fd850b31f09263ad175b2f640d5c5c6f76ed41 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Wed, 3 Feb 2021 17:14:29 +0800 Subject: [PATCH 1921/2012] r8152: adjust the flow of power cut for RTL8153B For runtime resuming, the RTL8153B may be resumed from the state of power cut, when enabling the feature of UPS. Then, the PHY would be reset, so it is necessary to be initailized again. Besides, the USB_U1U2_TIMER also has to be set again, so I move it from r8153b_init() to r8153b_hw_phy_cfg(). Signed-off-by: Hayes Wang Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 68 ++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index e4dda75a6e0d0..2d7cc63bef899 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1371,6 +1371,10 @@ void write_mii_word(struct net_device *netdev, int phy_id, int reg, int val) static int r8152_submit_rx(struct r8152 *tp, struct rx_agg *agg, gfp_t mem_flags); +static int +rtl8152_set_speed(struct r8152 *tp, u8 autoneg, u32 speed, u8 duplex, + u32 advertising); + static int rtl8152_set_mac_address(struct net_device *netdev, void *p) { struct r8152 *tp = netdev_priv(netdev); @@ -3205,8 +3209,6 @@ static void r8153b_ups_en(struct r8152 *tp, bool enable) ocp_data |= BIT(0); ocp_write_byte(tp, MCU_TYPE_USB, 0xcfff, ocp_data); } else { - u16 data; - ocp_data &= ~(UPS_EN | USP_PREWAKE); ocp_write_byte(tp, MCU_TYPE_USB, USB_POWER_CUT, ocp_data); @@ -3214,31 +3216,20 @@ static void r8153b_ups_en(struct r8152 *tp, bool enable) ocp_data &= ~BIT(0); ocp_write_byte(tp, MCU_TYPE_USB, 0xcfff, ocp_data); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0); - ocp_data &= ~PCUT_STATUS; - ocp_write_word(tp, MCU_TYPE_USB, USB_MISC_0, ocp_data); - - data = r8153_phy_status(tp, 0); - - switch (data) { - case PHY_STAT_PWRDN: - case PHY_STAT_EXT_INIT: - r8153b_green_en(tp, - test_bit(GREEN_ETHERNET, &tp->flags)); + if (ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0) & PCUT_STATUS) { + int i; - data = r8152_mdio_read(tp, MII_BMCR); - data &= ~BMCR_PDOWN; - data |= BMCR_RESET; - r8152_mdio_write(tp, MII_BMCR, data); + for (i = 0; i < 500; i++) { + if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & + AUTOLOAD_DONE) + break; + msleep(20); + } - data = r8153_phy_status(tp, PHY_STAT_LAN_ON); - fallthrough; + tp->rtl_ops.hw_phy_cfg(tp); - default: - if (data != PHY_STAT_LAN_ON) - netif_warn(tp, link, tp->netdev, - "PHY not ready"); - break; + rtl8152_set_speed(tp, tp->autoneg, tp->speed, + tp->duplex, tp->advertising); } } } @@ -4612,13 +4603,37 @@ static void r8153b_hw_phy_cfg(struct r8152 *tp) u32 ocp_data; u16 data; + ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_MISC_0); + if (ocp_data & PCUT_STATUS) { + ocp_data &= ~PCUT_STATUS; + ocp_write_word(tp, MCU_TYPE_USB, USB_MISC_0, ocp_data); + } + /* disable ALDPS before updating the PHY parameters */ r8153_aldps_en(tp, false); /* disable EEE before updating the PHY parameters */ rtl_eee_enable(tp, false); - rtl8152_apply_firmware(tp, false); + /* U1/U2/L1 idle timer. 500 us */ + ocp_write_word(tp, MCU_TYPE_USB, USB_U1U2_TIMER, 500); + + data = r8153_phy_status(tp, 0); + + switch (data) { + case PHY_STAT_PWRDN: + case PHY_STAT_EXT_INIT: + rtl8152_apply_firmware(tp, true); + + data = r8152_mdio_read(tp, MII_BMCR); + data &= ~BMCR_PDOWN; + r8152_mdio_write(tp, MII_BMCR, data); + break; + case PHY_STAT_LAN_ON: + default: + rtl8152_apply_firmware(tp, false); + break; + } r8153b_green_en(tp, test_bit(GREEN_ETHERNET, &tp->flags)); @@ -5544,9 +5559,6 @@ static void r8153b_init(struct r8152 *tp) /* MSC timer = 0xfff * 8ms = 32760 ms */ ocp_write_word(tp, MCU_TYPE_USB, USB_MSC_TIMER, 0x0fff); - /* U1/U2/L1 idle timer. 500 us */ - ocp_write_word(tp, MCU_TYPE_USB, USB_U1U2_TIMER, 500); - r8153b_power_cut_en(tp, false); r8153b_ups_en(tp, false); r8153_queue_wake(tp, false); -- GitLab From 0102eeedb71757d6589144cf019424f69b3ab289 Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Wed, 3 Feb 2021 12:35:12 +0100 Subject: [PATCH 1922/2012] hv_netvsc: Allocate the recv_buf buffers after NVSP_MSG1_TYPE_SEND_RECV_BUF The recv_buf buffers are allocated in netvsc_device_add(). Later in netvsc_init_buf() the response to NVSP_MSG1_TYPE_SEND_RECV_BUF allows the host to set up a recv_section_size that could be bigger than the (default) value used for that allocation. The host-controlled value could be used by a malicious host to bypass the check on the packet's length in netvsc_receive() and hence to overflow the recv_buf buffer. Move the allocation of the recv_buf buffers into netvsc_init_but(). Reported-by: Juan Vazquez Signed-off-by: Andrea Parri (Microsoft) Fixes: 0ba35fe91ce34f ("hv_netvsc: Copy packets sent by Hyper-V out of the receive buffer") Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/netvsc.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 0fba8257fc119..9db1ea3affbb3 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -311,7 +311,7 @@ static int netvsc_init_buf(struct hv_device *device, struct nvsp_message *init_packet; unsigned int buf_size; size_t map_words; - int ret = 0; + int i, ret = 0; /* Get receive buffer area. */ buf_size = device_info->recv_sections * device_info->recv_section_size; @@ -405,6 +405,16 @@ static int netvsc_init_buf(struct hv_device *device, goto cleanup; } + for (i = 0; i < VRSS_CHANNEL_MAX; i++) { + struct netvsc_channel *nvchan = &net_device->chan_table[i]; + + nvchan->recv_buf = kzalloc(net_device->recv_section_size, GFP_KERNEL); + if (nvchan->recv_buf == NULL) { + ret = -ENOMEM; + goto cleanup; + } + } + /* Setup receive completion ring. * Add 1 to the recv_section_cnt because at least one entry in a * ring buffer has to be empty. @@ -1549,12 +1559,6 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, for (i = 0; i < VRSS_CHANNEL_MAX; i++) { struct netvsc_channel *nvchan = &net_device->chan_table[i]; - nvchan->recv_buf = kzalloc(device_info->recv_section_size, GFP_KERNEL); - if (nvchan->recv_buf == NULL) { - ret = -ENOMEM; - goto cleanup2; - } - nvchan->channel = device->channel; nvchan->net_device = net_device; u64_stats_init(&nvchan->tx_stats.syncp); -- GitLab From 8dff9808e9734fb5b4eddd0a5b1472fade215490 Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Wed, 3 Feb 2021 12:35:13 +0100 Subject: [PATCH 1923/2012] hv_netvsc: Load and store the proper (NBL_HASH_INFO) per-packet info Fix the typo. Signed-off-by: Andrea Parri (Microsoft) Fixes: 0ba35fe91ce34f ("hv_netvsc: Copy packets sent by Hyper-V out of the receive buffer") Signed-off-by: Jakub Kicinski --- drivers/net/hyperv/rndis_filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 6c48a4d627368..0c2ebe7ac6554 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -465,7 +465,7 @@ void rsc_add_data(struct netvsc_channel *nvchan, } nvchan->rsc.pktlen = len; if (hash_info != NULL) { - nvchan->rsc.csum_info = *csum_info; + nvchan->rsc.hash_info = *hash_info; nvchan->rsc.ppi_flags |= NVSC_RSC_HASH_INFO; } else { nvchan->rsc.ppi_flags &= ~NVSC_RSC_HASH_INFO; -- GitLab From 08e1294daa2986939b7585404fe0f432695c6613 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 11 Nov 2020 10:59:15 -0800 Subject: [PATCH 1924/2012] ice: report timeout length for erasing during devlink flash When erasing, notify userspace of how long we will potentially take to erase a module. Doing so allows userspace to report the timeout, giving a clear indication of the upper time bound of the operation. Since we're re-using the erase timeout value, make it a macro rather than a magic number. Signed-off-by: Jacob Keller Reviewed-by: Shannon Nelson Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_fw_update.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c index 8f81b95e679c5..dcec0360ce552 100644 --- a/drivers/net/ethernet/intel/ice/ice_fw_update.c +++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c @@ -417,6 +417,11 @@ ice_write_nvm_module(struct ice_pf *pf, u16 module, const char *component, return err; } +/* Length in seconds to wait before timing out when erasing a flash module. + * Yes, erasing really can take minutes to complete. + */ +#define ICE_FW_ERASE_TIMEOUT 300 + /** * ice_erase_nvm_module - Erase an NVM module and await firmware completion * @pf: the PF data structure @@ -449,7 +454,7 @@ ice_erase_nvm_module(struct ice_pf *pf, u16 module, const char *component, devlink = priv_to_devlink(pf); - devlink_flash_update_status_notify(devlink, "Erasing", component, 0, 0); + devlink_flash_update_timeout_notify(devlink, "Erasing", component, ICE_FW_ERASE_TIMEOUT); status = ice_aq_erase_nvm(hw, module, NULL); if (status) { @@ -461,8 +466,7 @@ ice_erase_nvm_module(struct ice_pf *pf, u16 module, const char *component, goto out_notify_devlink; } - /* Yes, this really can take minutes to complete */ - err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_erase, 300 * HZ, &event); + err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_erase, ICE_FW_ERASE_TIMEOUT * HZ, &event); if (err) { dev_err(dev, "Timed out waiting for firmware to respond with erase completion for %s (module 0x%02x), err %d\n", component, module, err); -- GitLab From 9af368fa9c640ab3f3d8ad98a96f43c605315daa Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 1 Oct 2020 10:31:41 -0700 Subject: [PATCH 1925/2012] ice: create flash_info structure and separate NVM version The ice_nvm_info structure has become somewhat of a dumping ground for all of the fields related to flash version. It holds the NVM version and EETRACK id, the OptionROM info structure, the flash size, the ShadowRAM size, and more. A future change is going to add the ability to read the NVM version and EETRACK ID from the inactive NVM bank. To make this simpler, it is useful to have these NVM version info fields extracted to their own structure. Rename ice_nvm_info into ice_flash_info, and create a separate ice_nvm_info structure that will contain the eetrack and NVM map version. Move the netlist_ver structure into ice_flash_info and rename it ice_netlist_info for consistency. Modify the static ice_get_orom_ver_info to take the option rom structure as a pointer. This makes it more obvious what portion of the hw struct is being modified. Do the same for ice_get_netlist_ver_info. Introduce a new ice_get_nvm_ver_info function, which will be similar to ice_get_orom_ver_info and ice_get_netlist_ver_info, used to keep the NVM version extraction code co-located. Signed-off-by: Jacob Keller Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_devlink.c | 16 ++-- drivers/net/ethernet/intel/ice/ice_ethtool.c | 8 +- drivers/net/ethernet/intel/ice/ice_nvm.c | 92 ++++++++++++-------- drivers/net/ethernet/intel/ice/ice_type.h | 37 ++++---- 4 files changed, 91 insertions(+), 62 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 29d6192b15f32..44b64524b1b8e 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -58,7 +58,7 @@ static int ice_info_fw_build(struct ice_pf *pf, char *buf, size_t len) static int ice_info_orom_ver(struct ice_pf *pf, char *buf, size_t len) { - struct ice_orom_info *orom = &pf->hw.nvm.orom; + struct ice_orom_info *orom = &pf->hw.flash.orom; snprintf(buf, len, "%u.%u.%u", orom->major, orom->build, orom->patch); @@ -67,16 +67,16 @@ static int ice_info_orom_ver(struct ice_pf *pf, char *buf, size_t len) static int ice_info_nvm_ver(struct ice_pf *pf, char *buf, size_t len) { - struct ice_nvm_info *nvm = &pf->hw.nvm; + struct ice_nvm_info *nvm = &pf->hw.flash.nvm; - snprintf(buf, len, "%x.%02x", nvm->major_ver, nvm->minor_ver); + snprintf(buf, len, "%x.%02x", nvm->major, nvm->minor); return 0; } static int ice_info_eetrack(struct ice_pf *pf, char *buf, size_t len) { - struct ice_nvm_info *nvm = &pf->hw.nvm; + struct ice_nvm_info *nvm = &pf->hw.flash.nvm; snprintf(buf, len, "0x%08x", nvm->eetrack); @@ -111,7 +111,7 @@ static int ice_info_ddp_pkg_bundle_id(struct ice_pf *pf, char *buf, size_t len) static int ice_info_netlist_ver(struct ice_pf *pf, char *buf, size_t len) { - struct ice_netlist_ver_info *netlist = &pf->hw.netlist_ver; + struct ice_netlist_info *netlist = &pf->hw.flash.netlist; /* The netlist version fields are BCD formatted */ snprintf(buf, len, "%x.%x.%x-%x.%x.%x", netlist->major, netlist->minor, @@ -123,7 +123,7 @@ static int ice_info_netlist_ver(struct ice_pf *pf, char *buf, size_t len) static int ice_info_netlist_build(struct ice_pf *pf, char *buf, size_t len) { - struct ice_netlist_ver_info *netlist = &pf->hw.netlist_ver; + struct ice_netlist_info *netlist = &pf->hw.flash.netlist; snprintf(buf, len, "0x%08x", netlist->hash); @@ -433,7 +433,7 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, void *nvm_data; u32 nvm_size; - nvm_size = hw->nvm.flash_size; + nvm_size = hw->flash.flash_size; nvm_data = vzalloc(nvm_size); if (!nvm_data) return -ENOMEM; @@ -533,7 +533,7 @@ void ice_devlink_init_regions(struct ice_pf *pf) struct device *dev = ice_pf_to_dev(pf); u64 nvm_size; - nvm_size = pf->hw.nvm.flash_size; + nvm_size = pf->hw.flash.flash_size; pf->nvm_region = devlink_region_create(devlink, &ice_nvm_region_ops, 1, nvm_size); if (IS_ERR(pf->nvm_region)) { diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 69c113a4de7e6..e01b7e34da5e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -179,8 +179,8 @@ ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) struct ice_orom_info *orom; struct ice_nvm_info *nvm; - nvm = &hw->nvm; - orom = &nvm->orom; + nvm = &hw->flash.nvm; + orom = &hw->flash.orom; strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); @@ -188,7 +188,7 @@ ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) * determined) which contains more pertinent information. */ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%x.%02x 0x%x %d.%d.%d", nvm->major_ver, nvm->minor_ver, + "%x.%02x 0x%x %d.%d.%d", nvm->major, nvm->minor, nvm->eetrack, orom->major, orom->build, orom->patch); strscpy(drvinfo->bus_info, pci_name(pf->pdev), @@ -250,7 +250,7 @@ static int ice_get_eeprom_len(struct net_device *netdev) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_pf *pf = np->vsi->back; - return (int)pf->hw.nvm.flash_size; + return (int)pf->hw.flash.flash_size; } static int diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index f729cd0c62245..f446f89b551b7 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -72,7 +72,7 @@ ice_read_flat_nvm(struct ice_hw *hw, u32 offset, u32 *length, u8 *data, *length = 0; /* Verify the length of the read if this is for the Shadow RAM */ - if (read_shadow_ram && ((offset + inlen) > (hw->nvm.sr_words * 2u))) { + if (read_shadow_ram && ((offset + inlen) > (hw->flash.sr_words * 2u))) { ice_debug(hw, ICE_DBG_NVM, "NVM error: requested offset is beyond Shadow RAM limit\n"); return ICE_ERR_PARAM; } @@ -213,7 +213,7 @@ ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data) enum ice_status ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access) { - if (hw->nvm.blank_nvm_mode) + if (hw->flash.blank_nvm_mode) return 0; return ice_acquire_res(hw, ICE_NVM_RES_ID, access, ICE_NVM_TIMEOUT); @@ -227,7 +227,7 @@ ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access) */ void ice_release_nvm(struct ice_hw *hw) { - if (hw->nvm.blank_nvm_mode) + if (hw->flash.blank_nvm_mode) return; ice_release_res(hw, ICE_NVM_RES_ID); @@ -379,17 +379,56 @@ ice_read_pba_string(struct ice_hw *hw, u8 *pba_num, u32 pba_num_size) return status; } +/** + * ice_get_nvm_ver_info - Read NVM version information + * @hw: pointer to the HW struct + * @nvm: pointer to NVM info structure + * + * Read the NVM EETRACK ID and map version of the main NVM image bank, filling + * in the NVM info structure. + */ +static enum ice_status +ice_get_nvm_ver_info(struct ice_hw *hw, struct ice_nvm_info *nvm) +{ + u16 eetrack_lo, eetrack_hi, ver; + enum ice_status status; + + status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &ver); + if (status) { + ice_debug(hw, ICE_DBG_NVM, "Failed to read DEV starter version.\n"); + return status; + } + nvm->major = (ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT; + nvm->minor = (ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT; + + status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_LO, &eetrack_lo); + if (status) { + ice_debug(hw, ICE_DBG_NVM, "Failed to read EETRACK lo.\n"); + return status; + } + status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_HI, &eetrack_hi); + if (status) { + ice_debug(hw, ICE_DBG_NVM, "Failed to read EETRACK hi.\n"); + return status; + } + + nvm->eetrack = (eetrack_hi << 16) | eetrack_lo; + + return 0; +} + /** * ice_get_orom_ver_info - Read Option ROM version information * @hw: pointer to the HW struct + * @orom: pointer to Option ROM info structure * * Read the Combo Image version data from the Boot Configuration TLV and fill * in the option ROM version data. */ -static enum ice_status ice_get_orom_ver_info(struct ice_hw *hw) +static enum ice_status +ice_get_orom_ver_info(struct ice_hw *hw, struct ice_orom_info *orom) { u16 combo_hi, combo_lo, boot_cfg_tlv, boot_cfg_tlv_len; - struct ice_orom_info *orom = &hw->nvm.orom; enum ice_status status; u32 combo_ver; @@ -436,12 +475,13 @@ static enum ice_status ice_get_orom_ver_info(struct ice_hw *hw) /** * ice_get_netlist_ver_info * @hw: pointer to the HW struct + * @ver: pointer to netlist version info structure * * Get the netlist version information */ -static enum ice_status ice_get_netlist_ver_info(struct ice_hw *hw) +static enum ice_status +ice_get_netlist_ver_info(struct ice_hw *hw, struct ice_netlist_info *ver) { - struct ice_netlist_ver_info *ver = &hw->netlist_ver; enum ice_status ret; u32 id_blk_start; __le16 raw_data; @@ -555,7 +595,7 @@ static enum ice_status ice_discover_flash_size(struct ice_hw *hw) ice_debug(hw, ICE_DBG_NVM, "Predicted flash size is %u bytes\n", max_size); - hw->nvm.flash_size = max_size; + hw->flash.flash_size = max_size; err_read_flat_nvm: ice_release_nvm(hw); @@ -572,8 +612,7 @@ err_read_flat_nvm: */ enum ice_status ice_init_nvm(struct ice_hw *hw) { - struct ice_nvm_info *nvm = &hw->nvm; - u16 eetrack_lo, eetrack_hi, ver; + struct ice_flash_info *flash = &hw->flash; enum ice_status status; u32 fla, gens_stat; u8 sr_size; @@ -585,54 +624,39 @@ enum ice_status ice_init_nvm(struct ice_hw *hw) sr_size = (gens_stat & GLNVM_GENS_SR_SIZE_M) >> GLNVM_GENS_SR_SIZE_S; /* Switching to words (sr_size contains power of 2) */ - nvm->sr_words = BIT(sr_size) * ICE_SR_WORDS_IN_1KB; + flash->sr_words = BIT(sr_size) * ICE_SR_WORDS_IN_1KB; /* Check if we are in the normal or blank NVM programming mode */ fla = rd32(hw, GLNVM_FLA); if (fla & GLNVM_FLA_LOCKED_M) { /* Normal programming mode */ - nvm->blank_nvm_mode = false; + flash->blank_nvm_mode = false; } else { /* Blank programming mode */ - nvm->blank_nvm_mode = true; + flash->blank_nvm_mode = true; ice_debug(hw, ICE_DBG_NVM, "NVM init error: unsupported blank mode.\n"); return ICE_ERR_NVM_BLANK_MODE; } - status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &ver); - if (status) { - ice_debug(hw, ICE_DBG_INIT, "Failed to read DEV starter version.\n"); - return status; - } - nvm->major_ver = (ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT; - nvm->minor_ver = (ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT; - - status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_LO, &eetrack_lo); - if (status) { - ice_debug(hw, ICE_DBG_INIT, "Failed to read EETRACK lo.\n"); - return status; - } - status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_HI, &eetrack_hi); + status = ice_discover_flash_size(hw); if (status) { - ice_debug(hw, ICE_DBG_INIT, "Failed to read EETRACK hi.\n"); + ice_debug(hw, ICE_DBG_NVM, "NVM init error: failed to discover flash size.\n"); return status; } - nvm->eetrack = (eetrack_hi << 16) | eetrack_lo; - - status = ice_discover_flash_size(hw); + status = ice_get_nvm_ver_info(hw, &flash->nvm); if (status) { - ice_debug(hw, ICE_DBG_NVM, "NVM init error: failed to discover flash size.\n"); + ice_debug(hw, ICE_DBG_INIT, "Failed to read NVM info.\n"); return status; } - status = ice_get_orom_ver_info(hw); + status = ice_get_orom_ver_info(hw, &flash->orom); if (status) { ice_debug(hw, ICE_DBG_INIT, "Failed to read Option ROM info.\n"); return status; } /* read the netlist version information */ - status = ice_get_netlist_ver_info(hw); + status = ice_get_netlist_ver_info(hw, &flash->netlist); if (status) ice_debug(hw, ICE_DBG_INIT, "Failed to read netlist info.\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 2226a291a3943..7af7758374d4f 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -313,14 +313,30 @@ struct ice_orom_info { u16 build; /* Build version of OROM */ }; -/* NVM Information */ +/* NVM version information */ struct ice_nvm_info { + u32 eetrack; + u8 major; + u8 minor; +}; + +/* netlist version information */ +struct ice_netlist_info { + u32 major; /* major high/low */ + u32 minor; /* minor high/low */ + u32 type; /* type high/low */ + u32 rev; /* revision high/low */ + u32 hash; /* SHA-1 hash word */ + u16 cust_ver; /* customer version */ +}; + +/* Flash Chip Information */ +struct ice_flash_info { struct ice_orom_info orom; /* Option ROM version info */ - u32 eetrack; /* NVM data version */ + struct ice_nvm_info nvm; /* NVM version information */ + struct ice_netlist_info netlist;/* Netlist version info */ u16 sr_words; /* Shadow RAM size in words */ u32 flash_size; /* Size of available flash in bytes */ - u8 major_ver; /* major version of NVM package */ - u8 minor_ver; /* minor version of dev starter */ u8 blank_nvm_mode; /* is NVM empty (no FW present) */ }; @@ -348,16 +364,6 @@ struct ice_link_default_override_tlv { #define ICE_NVM_VER_LEN 32 -/* netlist version information */ -struct ice_netlist_ver_info { - u32 major; /* major high/low */ - u32 minor; /* minor high/low */ - u32 type; /* type high/low */ - u32 rev; /* revision high/low */ - u32 hash; /* SHA-1 hash word */ - u16 cust_ver; /* customer version */ -}; - /* Max number of port to queue branches w.r.t topology */ #define ICE_MAX_TRAFFIC_CLASS 8 #define ICE_TXSCHED_MAX_BRANCHES ICE_MAX_TRAFFIC_CLASS @@ -605,10 +611,9 @@ struct ice_hw { u8 evb_veb; /* true for VEB, false for VEPA */ u8 reset_ongoing; /* true if HW is in reset, false otherwise */ struct ice_bus_info bus; - struct ice_nvm_info nvm; + struct ice_flash_info flash; struct ice_hw_dev_caps dev_caps; /* device capabilities */ struct ice_hw_func_caps func_caps; /* function capabilities */ - struct ice_netlist_ver_info netlist_ver; /* netlist version info */ struct ice_switch_info *switch_info; /* switch filter lists */ -- GitLab From 74789085d9ce9c626102d267eabfbff01a8cd855 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 11 Nov 2020 16:43:24 -0800 Subject: [PATCH 1926/2012] ice: introduce context struct for info report The ice driver uses an array of structures which link an info name with a function that formats the associated version data into a string. All existing format functions simply format already captured static data from the driver hw structure. Future changes will introduce format functions for reporting the versions of flash sections stored but not yet applied. This type of version data is not stored as a member of the hw structure. This is because (a) it might not yet exist in the case there is no pending flash update, and (b) even if it does, it might change such as if an update is canceled or replaced by a new update before finalizing. We could simply have each format function gather its own data upon being called. However, in some cases the raw binary version data is a combination of multiple different reported fields. Additionally, the current interface doesn't have a way for the function to indicate that the version doesn't exist. Refactor this function interface to take a new ice_info_ctx structure instead of the buffer pointer and length. This context structure allows for future extensions to pre-gather version data that is stored within the context struct instead of the hw struct. Allocate this context structure initially at the start of ice_devlink_info_get. We use dynamic allocation instead of a local stack variable in order to avoid using too much kernel stack once we extend it with additional data structures. Modify the main loop that drives the info reporting so that the version buffer string is always cleared between each format. Explicitly check that the format function actually filled in a version string of non-zero length. If the string is not provided, simply skip this version without reporting an error. This allows for introducing format functions of versions which may or may not be present, such as the version of a pending update that has not yet been activated. Signed-off-by: Jacob Keller Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_devlink.c | 109 ++++++++++++------- 1 file changed, 68 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 44b64524b1b8e..4d5ae1d6fe1cd 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -6,126 +6,141 @@ #include "ice_devlink.h" #include "ice_fw_update.h" -static void ice_info_get_dsn(struct ice_pf *pf, char *buf, size_t len) +/* context for devlink info version reporting */ +struct ice_info_ctx { + char buf[128]; +}; + +/* The following functions are used to format specific strings for various + * devlink info versions. The ctx parameter is used to provide the storage + * buffer, as well as any ancillary information calculated when the info + * request was made. + * + * If a version does not exist, for example when attempting to get the + * inactive version of flash when there is no pending update, the function + * should leave the buffer in the ctx structure empty and return 0. + */ + +static void ice_info_get_dsn(struct ice_pf *pf, struct ice_info_ctx *ctx) { u8 dsn[8]; /* Copy the DSN into an array in Big Endian format */ put_unaligned_be64(pci_get_dsn(pf->pdev), dsn); - snprintf(buf, len, "%8phD", dsn); + snprintf(ctx->buf, sizeof(ctx->buf), "%8phD", dsn); } -static int ice_info_pba(struct ice_pf *pf, char *buf, size_t len) +static int ice_info_pba(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_hw *hw = &pf->hw; enum ice_status status; - status = ice_read_pba_string(hw, (u8 *)buf, len); + status = ice_read_pba_string(hw, (u8 *)ctx->buf, sizeof(ctx->buf)); if (status) return -EIO; return 0; } -static int ice_info_fw_mgmt(struct ice_pf *pf, char *buf, size_t len) +static int ice_info_fw_mgmt(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_hw *hw = &pf->hw; - snprintf(buf, len, "%u.%u.%u", hw->fw_maj_ver, hw->fw_min_ver, + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch); return 0; } -static int ice_info_fw_api(struct ice_pf *pf, char *buf, size_t len) +static int ice_info_fw_api(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_hw *hw = &pf->hw; - snprintf(buf, len, "%u.%u", hw->api_maj_ver, hw->api_min_ver); + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u", hw->api_maj_ver, hw->api_min_ver); return 0; } -static int ice_info_fw_build(struct ice_pf *pf, char *buf, size_t len) +static int ice_info_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_hw *hw = &pf->hw; - snprintf(buf, len, "0x%08x", hw->fw_build); + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", hw->fw_build); return 0; } -static int ice_info_orom_ver(struct ice_pf *pf, char *buf, size_t len) +static int ice_info_orom_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_orom_info *orom = &pf->hw.flash.orom; - snprintf(buf, len, "%u.%u.%u", orom->major, orom->build, orom->patch); + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", orom->major, orom->build, orom->patch); return 0; } -static int ice_info_nvm_ver(struct ice_pf *pf, char *buf, size_t len) +static int ice_info_nvm_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_nvm_info *nvm = &pf->hw.flash.nvm; - snprintf(buf, len, "%x.%02x", nvm->major, nvm->minor); + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", nvm->major, nvm->minor); return 0; } -static int ice_info_eetrack(struct ice_pf *pf, char *buf, size_t len) +static int ice_info_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_nvm_info *nvm = &pf->hw.flash.nvm; - snprintf(buf, len, "0x%08x", nvm->eetrack); + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack); return 0; } -static int ice_info_ddp_pkg_name(struct ice_pf *pf, char *buf, size_t len) +static int ice_info_ddp_pkg_name(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_hw *hw = &pf->hw; - snprintf(buf, len, "%s", hw->active_pkg_name); + snprintf(ctx->buf, sizeof(ctx->buf), "%s", hw->active_pkg_name); return 0; } -static int ice_info_ddp_pkg_version(struct ice_pf *pf, char *buf, size_t len) +static int ice_info_ddp_pkg_version(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_pkg_ver *pkg = &pf->hw.active_pkg_ver; - snprintf(buf, len, "%u.%u.%u.%u", pkg->major, pkg->minor, pkg->update, + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u.%u", pkg->major, pkg->minor, pkg->update, pkg->draft); return 0; } -static int ice_info_ddp_pkg_bundle_id(struct ice_pf *pf, char *buf, size_t len) +static int ice_info_ddp_pkg_bundle_id(struct ice_pf *pf, struct ice_info_ctx *ctx) { - snprintf(buf, len, "0x%08x", pf->hw.active_track_id); + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", pf->hw.active_track_id); return 0; } -static int ice_info_netlist_ver(struct ice_pf *pf, char *buf, size_t len) +static int ice_info_netlist_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_netlist_info *netlist = &pf->hw.flash.netlist; /* The netlist version fields are BCD formatted */ - snprintf(buf, len, "%x.%x.%x-%x.%x.%x", netlist->major, netlist->minor, + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x", netlist->major, netlist->minor, netlist->type >> 16, netlist->type & 0xFFFF, netlist->rev, netlist->cust_ver); return 0; } -static int ice_info_netlist_build(struct ice_pf *pf, char *buf, size_t len) +static int ice_info_netlist_build(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_netlist_info *netlist = &pf->hw.flash.netlist; - snprintf(buf, len, "0x%08x", netlist->hash); + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash); return 0; } @@ -142,7 +157,7 @@ enum ice_version_type { static const struct ice_devlink_version { enum ice_version_type type; const char *key; - int (*getter)(struct ice_pf *pf, char *buf, size_t len); + int (*getter)(struct ice_pf *pf, struct ice_info_ctx *ctx); } ice_devlink_versions[] = { fixed(DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, ice_info_pba), running(DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, ice_info_fw_mgmt), @@ -174,60 +189,72 @@ static int ice_devlink_info_get(struct devlink *devlink, struct netlink_ext_ack *extack) { struct ice_pf *pf = devlink_priv(devlink); - char buf[100]; + struct ice_info_ctx *ctx; size_t i; int err; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + err = devlink_info_driver_name_put(req, KBUILD_MODNAME); if (err) { NL_SET_ERR_MSG_MOD(extack, "Unable to set driver name"); - return err; + goto out_free_ctx; } - ice_info_get_dsn(pf, buf, sizeof(buf)); + ice_info_get_dsn(pf, ctx); - err = devlink_info_serial_number_put(req, buf); + err = devlink_info_serial_number_put(req, ctx->buf); if (err) { NL_SET_ERR_MSG_MOD(extack, "Unable to set serial number"); - return err; + goto out_free_ctx; } for (i = 0; i < ARRAY_SIZE(ice_devlink_versions); i++) { enum ice_version_type type = ice_devlink_versions[i].type; const char *key = ice_devlink_versions[i].key; - err = ice_devlink_versions[i].getter(pf, buf, sizeof(buf)); + memset(ctx->buf, 0, sizeof(ctx->buf)); + + err = ice_devlink_versions[i].getter(pf, ctx); if (err) { NL_SET_ERR_MSG_MOD(extack, "Unable to obtain version info"); - return err; + goto out_free_ctx; } + /* Do not report missing versions */ + if (ctx->buf[0] == '\0') + continue; + switch (type) { case ICE_VERSION_FIXED: - err = devlink_info_version_fixed_put(req, key, buf); + err = devlink_info_version_fixed_put(req, key, ctx->buf); if (err) { NL_SET_ERR_MSG_MOD(extack, "Unable to set fixed version"); - return err; + goto out_free_ctx; } break; case ICE_VERSION_RUNNING: - err = devlink_info_version_running_put(req, key, buf); + err = devlink_info_version_running_put(req, key, ctx->buf); if (err) { NL_SET_ERR_MSG_MOD(extack, "Unable to set running version"); - return err; + goto out_free_ctx; } break; case ICE_VERSION_STORED: - err = devlink_info_version_stored_put(req, key, buf); + err = devlink_info_version_stored_put(req, key, ctx->buf); if (err) { NL_SET_ERR_MSG_MOD(extack, "Unable to set stored version"); - return err; + goto out_free_ctx; } break; } } - return 0; +out_free_ctx: + kfree(ctx); + return err; } /** -- GitLab From 1fa95e0120ebe4d8953cb86e1617e3ab1dc5ce89 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 1 Oct 2020 10:31:42 -0700 Subject: [PATCH 1927/2012] ice: cache NVM module bank information The ice flash contains two copies of each of the NVM, Option ROM, and Netlist modules. Each bank has a pointer word and a size word. In order to correctly read from the active flash bank, the driver must calculate the offset manually. During NVM initialization, read the Shadow RAM control word and determine which bank is active for each NVM module. Additionally, cache the size and pointer values for use in calculating the correct offset. Signed-off-by: Jacob Keller Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_nvm.c | 151 ++++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_type.h | 37 ++++++ 2 files changed, 188 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index f446f89b551b7..0649923d6e2f6 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -603,6 +603,151 @@ err_read_flat_nvm: return status; } +/** + * ice_read_sr_pointer - Read the value of a Shadow RAM pointer word + * @hw: pointer to the HW structure + * @offset: the word offset of the Shadow RAM word to read + * @pointer: pointer value read from Shadow RAM + * + * Read the given Shadow RAM word, and convert it to a pointer value specified + * in bytes. This function assumes the specified offset is a valid pointer + * word. + * + * Each pointer word specifies whether it is stored in word size or 4KB + * sector size by using the highest bit. The reported pointer value will be in + * bytes, intended for flat NVM reads. + */ +static enum ice_status +ice_read_sr_pointer(struct ice_hw *hw, u16 offset, u32 *pointer) +{ + enum ice_status status; + u16 value; + + status = ice_read_sr_word(hw, offset, &value); + if (status) + return status; + + /* Determine if the pointer is in 4KB or word units */ + if (value & ICE_SR_NVM_PTR_4KB_UNITS) + *pointer = (value & ~ICE_SR_NVM_PTR_4KB_UNITS) * 4 * 1024; + else + *pointer = value * 2; + + return 0; +} + +/** + * ice_read_sr_area_size - Read an area size from a Shadow RAM word + * @hw: pointer to the HW structure + * @offset: the word offset of the Shadow RAM to read + * @size: size value read from the Shadow RAM + * + * Read the given Shadow RAM word, and convert it to an area size value + * specified in bytes. This function assumes the specified offset is a valid + * area size word. + * + * Each area size word is specified in 4KB sector units. This function reports + * the size in bytes, intended for flat NVM reads. + */ +static enum ice_status +ice_read_sr_area_size(struct ice_hw *hw, u16 offset, u32 *size) +{ + enum ice_status status; + u16 value; + + status = ice_read_sr_word(hw, offset, &value); + if (status) + return status; + + /* Area sizes are always specified in 4KB units */ + *size = value * 4 * 1024; + + return 0; +} + +/** + * ice_determine_active_flash_banks - Discover active bank for each module + * @hw: pointer to the HW struct + * + * Read the Shadow RAM control word and determine which banks are active for + * the NVM, OROM, and Netlist modules. Also read and calculate the associated + * pointer and size. These values are then cached into the ice_flash_info + * structure for later use in order to calculate the correct offset to read + * from the active module. + */ +static enum ice_status +ice_determine_active_flash_banks(struct ice_hw *hw) +{ + struct ice_bank_info *banks = &hw->flash.banks; + enum ice_status status; + u16 ctrl_word; + + status = ice_read_sr_word(hw, ICE_SR_NVM_CTRL_WORD, &ctrl_word); + if (status) { + ice_debug(hw, ICE_DBG_NVM, "Failed to read the Shadow RAM control word\n"); + return status; + } + + /* Check that the control word indicates validity */ + if ((ctrl_word & ICE_SR_CTRL_WORD_1_M) >> ICE_SR_CTRL_WORD_1_S != ICE_SR_CTRL_WORD_VALID) { + ice_debug(hw, ICE_DBG_NVM, "Shadow RAM control word is invalid\n"); + return ICE_ERR_CFG; + } + + if (!(ctrl_word & ICE_SR_CTRL_WORD_NVM_BANK)) + banks->nvm_bank = ICE_1ST_FLASH_BANK; + else + banks->nvm_bank = ICE_2ND_FLASH_BANK; + + if (!(ctrl_word & ICE_SR_CTRL_WORD_OROM_BANK)) + banks->orom_bank = ICE_1ST_FLASH_BANK; + else + banks->orom_bank = ICE_2ND_FLASH_BANK; + + if (!(ctrl_word & ICE_SR_CTRL_WORD_NETLIST_BANK)) + banks->netlist_bank = ICE_1ST_FLASH_BANK; + else + banks->netlist_bank = ICE_2ND_FLASH_BANK; + + status = ice_read_sr_pointer(hw, ICE_SR_1ST_NVM_BANK_PTR, &banks->nvm_ptr); + if (status) { + ice_debug(hw, ICE_DBG_NVM, "Failed to read NVM bank pointer\n"); + return status; + } + + status = ice_read_sr_area_size(hw, ICE_SR_NVM_BANK_SIZE, &banks->nvm_size); + if (status) { + ice_debug(hw, ICE_DBG_NVM, "Failed to read NVM bank area size\n"); + return status; + } + + status = ice_read_sr_pointer(hw, ICE_SR_1ST_OROM_BANK_PTR, &banks->orom_ptr); + if (status) { + ice_debug(hw, ICE_DBG_NVM, "Failed to read OROM bank pointer\n"); + return status; + } + + status = ice_read_sr_area_size(hw, ICE_SR_OROM_BANK_SIZE, &banks->orom_size); + if (status) { + ice_debug(hw, ICE_DBG_NVM, "Failed to read OROM bank area size\n"); + return status; + } + + status = ice_read_sr_pointer(hw, ICE_SR_NETLIST_BANK_PTR, &banks->netlist_ptr); + if (status) { + ice_debug(hw, ICE_DBG_NVM, "Failed to read Netlist bank pointer\n"); + return status; + } + + status = ice_read_sr_area_size(hw, ICE_SR_NETLIST_BANK_SIZE, &banks->netlist_size); + if (status) { + ice_debug(hw, ICE_DBG_NVM, "Failed to read Netlist bank area size\n"); + return status; + } + + return 0; +} + /** * ice_init_nvm - initializes NVM setting * @hw: pointer to the HW struct @@ -643,6 +788,12 @@ enum ice_status ice_init_nvm(struct ice_hw *hw) return status; } + status = ice_determine_active_flash_banks(hw); + if (status) { + ice_debug(hw, ICE_DBG_NVM, "Failed to determine active flash banks.\n"); + return status; + } + status = ice_get_nvm_ver_info(hw, &flash->nvm); if (status) { ice_debug(hw, ICE_DBG_INIT, "Failed to read NVM info.\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 7af7758374d4f..bc3be64cf3d94 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -330,11 +330,34 @@ struct ice_netlist_info { u16 cust_ver; /* customer version */ }; +/* Enumeration of possible flash banks for the NVM, OROM, and Netlist modules + * of the flash image. + */ +enum ice_flash_bank { + ICE_INVALID_FLASH_BANK, + ICE_1ST_FLASH_BANK, + ICE_2ND_FLASH_BANK, +}; + +/* information for accessing NVM, OROM, and Netlist flash banks */ +struct ice_bank_info { + u32 nvm_ptr; /* Pointer to 1st NVM bank */ + u32 nvm_size; /* Size of NVM bank */ + u32 orom_ptr; /* Pointer to 1st OROM bank */ + u32 orom_size; /* Size of OROM bank */ + u32 netlist_ptr; /* Pointer to 1st Netlist bank */ + u32 netlist_size; /* Size of Netlist bank */ + enum ice_flash_bank nvm_bank; /* Active NVM bank */ + enum ice_flash_bank orom_bank; /* Active OROM bank */ + enum ice_flash_bank netlist_bank; /* Active Netlist bank */ +}; + /* Flash Chip Information */ struct ice_flash_info { struct ice_orom_info orom; /* Option ROM version info */ struct ice_nvm_info nvm; /* NVM version information */ struct ice_netlist_info netlist;/* Netlist version info */ + struct ice_bank_info banks; /* Flash Bank information */ u16 sr_words; /* Shadow RAM size in words */ u32 flash_size; /* Size of available flash in bytes */ u8 blank_nvm_mode; /* is NVM empty (no FW present) */ @@ -770,6 +793,7 @@ struct ice_hw_port_stats { }; /* Checksum and Shadow RAM pointers */ +#define ICE_SR_NVM_CTRL_WORD 0x00 #define ICE_SR_BOOT_CFG_PTR 0x132 #define ICE_SR_NVM_WOL_CFG 0x19 #define ICE_NVM_OROM_VER_OFF 0x02 @@ -789,10 +813,23 @@ struct ice_hw_port_stats { #define ICE_OROM_VER_MASK (0xff << ICE_OROM_VER_SHIFT) #define ICE_SR_PFA_PTR 0x40 #define ICE_SR_1ST_NVM_BANK_PTR 0x42 +#define ICE_SR_NVM_BANK_SIZE 0x43 #define ICE_SR_1ST_OROM_BANK_PTR 0x44 +#define ICE_SR_OROM_BANK_SIZE 0x45 #define ICE_SR_NETLIST_BANK_PTR 0x46 +#define ICE_SR_NETLIST_BANK_SIZE 0x47 #define ICE_SR_SECTOR_SIZE_IN_WORDS 0x800 +/* Auxiliary field, mask, and shift definition for Shadow RAM and NVM Flash */ +#define ICE_SR_CTRL_WORD_1_S 0x06 +#define ICE_SR_CTRL_WORD_1_M (0x03 << ICE_SR_CTRL_WORD_1_S) +#define ICE_SR_CTRL_WORD_VALID 0x1 +#define ICE_SR_CTRL_WORD_OROM_BANK BIT(3) +#define ICE_SR_CTRL_WORD_NETLIST_BANK BIT(4) +#define ICE_SR_CTRL_WORD_NVM_BANK BIT(5) + +#define ICE_SR_NVM_PTR_4KB_UNITS BIT(15) + /* Link override related */ #define ICE_SR_PFA_LINK_OVERRIDE_WORDS 10 #define ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS 4 -- GitLab From 0ce50c7066e214545ea4543d73946073725c4421 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 1 Oct 2020 10:31:43 -0700 Subject: [PATCH 1928/2012] ice: introduce function for reading from flash modules When reading from the flash memory of the device, the ice driver has two interfaces available to it. First, it can use a mediated interface via firmware that allows specifying a module ID. This allows reading from specific modules of the active flash bank. The second interface available is to perform flat reads. This allows complete access to the entire flash. However, using it requires the software to handle calculating module location and interpret pointer addresses. While most data required is accessible through the convenient first interface, certain flash contents are not. This includes the CSS header information associated with the Option ROM and NVM banks, as well as any access to the "inactive" banks used as scratch space for performing flash updates. In order to access all of the relevant flash contents, software must use the flat reads. Rather than forcing all flows to perform flat read calculations, introduce a new abstraction for reading from the flash: ice_read_flash_module. This function provides an abstraction for reading from either the active or inactive flash bank at the requested module. This interface is very similar to the abstraction provided via firmware, but allows access to additional modules, as well as providing a mechanism to request access to both flash banks. At first glance, it might make sense for this abstraction to allow specifying precisely which bank (1st or 2nd) the caller wishes to read. This is simpler to implement but more difficult to use. In practice, most callers only know whether they want the active bank, or the inactive bank. Rather than force callers to determine for themselves which bank to read from, implement ice_read_flash_module in terms of "active" vs "inactive". This significantly simplifies the implementation at the caller level and is a more useful abstraction over the flash contents. Make use of this new interface to refactor reading of the main NVM version information. Instead of using the firmware's mediated ShadowRAM function, use the ice_read_flash_module abstraction. To do this, notice that most reads of the NVM are going to be in 2-byte word chunks. To simplify using ice_read_flash_module for this case, ice_read_nvm_module is introduced. This is a simple wrapper around ice_read_flash_module which takes the correct pointer address for the NVM bank, and forces the 2-byte word format onto the caller. When reading the NVM versions, some fields are read from the Shadow RAM. The Shadow RAM is the first 64KB of flash memory, and is populated during device load. Most fields are copied from a section within the active NVM bank. In order to read this data from both the active and inactive NVM banks, we need to read not from the first 64KB of flash, but instead from the correct offset into the NVM bank. Introduce ice_read_nvm_sr_copy for this purpose. This function wraps around ice_read_nvm_module and has the same interface as the ice_read_sr_word, with the exception of allowing the caller to specify whether to read the active or inactive flash bank. With this change, it is now trivial to refactor ice_get_nvm_ver_info to read using the software mediated ice_read_flash_module interface instead of relying on the firmware mediated interface. This will be used in the following change to implement support for stored versions in the devlink info report. Additionally, the overall ice_read_flash_module interface will be used and extended to support all three major flash banks, and additionally to support reading the flash image security revision information. Signed-off-by: Jacob Keller Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_nvm.c | 162 +++++++++++++++++++++- drivers/net/ethernet/intel/ice/ice_type.h | 22 +++ 2 files changed, 179 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index 0649923d6e2f6..2434b7a4626bc 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -233,6 +233,156 @@ void ice_release_nvm(struct ice_hw *hw) ice_release_res(hw, ICE_NVM_RES_ID); } +/** + * ice_get_flash_bank_offset - Get offset into requested flash bank + * @hw: pointer to the HW structure + * @bank: whether to read from the active or inactive flash bank + * @module: the module to read from + * + * Based on the module, lookup the module offset from the beginning of the + * flash. + * + * Returns the flash offset. Note that a value of zero is invalid and must be + * treated as an error. + */ +static u32 ice_get_flash_bank_offset(struct ice_hw *hw, enum ice_bank_select bank, u16 module) +{ + struct ice_bank_info *banks = &hw->flash.banks; + enum ice_flash_bank active_bank; + bool second_bank_active; + u32 offset, size; + + switch (module) { + case ICE_SR_1ST_NVM_BANK_PTR: + offset = banks->nvm_ptr; + size = banks->nvm_size; + active_bank = banks->nvm_bank; + break; + case ICE_SR_1ST_OROM_BANK_PTR: + offset = banks->orom_ptr; + size = banks->orom_size; + active_bank = banks->orom_bank; + break; + case ICE_SR_NETLIST_BANK_PTR: + offset = banks->netlist_ptr; + size = banks->netlist_size; + active_bank = banks->netlist_bank; + break; + default: + ice_debug(hw, ICE_DBG_NVM, "Unexpected value for flash module: 0x%04x\n", module); + return 0; + } + + switch (active_bank) { + case ICE_1ST_FLASH_BANK: + second_bank_active = false; + break; + case ICE_2ND_FLASH_BANK: + second_bank_active = true; + break; + default: + ice_debug(hw, ICE_DBG_NVM, "Unexpected value for active flash bank: %u\n", + active_bank); + return 0; + } + + /* The second flash bank is stored immediately following the first + * bank. Based on whether the 1st or 2nd bank is active, and whether + * we want the active or inactive bank, calculate the desired offset. + */ + switch (bank) { + case ICE_ACTIVE_FLASH_BANK: + return offset + (second_bank_active ? size : 0); + case ICE_INACTIVE_FLASH_BANK: + return offset + (second_bank_active ? 0 : size); + } + + ice_debug(hw, ICE_DBG_NVM, "Unexpected value for flash bank selection: %u\n", bank); + return 0; +} + +/** + * ice_read_flash_module - Read a word from one of the main NVM modules + * @hw: pointer to the HW structure + * @bank: which bank of the module to read + * @module: the module to read + * @offset: the offset into the module in bytes + * @data: storage for the word read from the flash + * @length: bytes of data to read + * + * Read data from the specified flash module. The bank parameter indicates + * whether or not to read from the active bank or the inactive bank of that + * module. + * + * The word will be read using flat NVM access, and relies on the + * hw->flash.banks data being setup by ice_determine_active_flash_banks() + * during initialization. + */ +static enum ice_status +ice_read_flash_module(struct ice_hw *hw, enum ice_bank_select bank, u16 module, + u32 offset, u8 *data, u32 length) +{ + enum ice_status status; + u32 start; + + start = ice_get_flash_bank_offset(hw, bank, module); + if (!start) { + ice_debug(hw, ICE_DBG_NVM, "Unable to calculate flash bank offset for module 0x%04x\n", + module); + return ICE_ERR_PARAM; + } + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (status) + return status; + + status = ice_read_flat_nvm(hw, start + offset, &length, data, false); + + ice_release_nvm(hw); + + return status; +} + +/** + * ice_read_nvm_module - Read from the active main NVM module + * @hw: pointer to the HW structure + * @bank: whether to read from active or inactive NVM module + * @offset: offset into the NVM module to read, in words + * @data: storage for returned word value + * + * Read the specified word from the active NVM module. This includes the CSS + * header at the start of the NVM module. + */ +static enum ice_status +ice_read_nvm_module(struct ice_hw *hw, enum ice_bank_select bank, u32 offset, u16 *data) +{ + enum ice_status status; + __le16 data_local; + + status = ice_read_flash_module(hw, bank, ICE_SR_1ST_NVM_BANK_PTR, offset * sizeof(u16), + (__force u8 *)&data_local, sizeof(u16)); + if (!status) + *data = le16_to_cpu(data_local); + + return status; +} + +/** + * ice_read_nvm_sr_copy - Read a word from the Shadow RAM copy in the NVM bank + * @hw: pointer to the HW structure + * @bank: whether to read from the active or inactive NVM module + * @offset: offset into the Shadow RAM copy to read, in words + * @data: storage for returned word value + * + * Read the specified word from the copy of the Shadow RAM found in the + * specified NVM module. + */ +static enum ice_status +ice_read_nvm_sr_copy(struct ice_hw *hw, enum ice_bank_select bank, u32 offset, u16 *data) +{ + return ice_read_nvm_module(hw, bank, ICE_NVM_SR_COPY_WORD_OFFSET + offset, data); +} + /** * ice_read_sr_word - Reads Shadow RAM word and acquire NVM if necessary * @hw: pointer to the HW structure @@ -382,31 +532,33 @@ ice_read_pba_string(struct ice_hw *hw, u8 *pba_num, u32 pba_num_size) /** * ice_get_nvm_ver_info - Read NVM version information * @hw: pointer to the HW struct + * @bank: whether to read from the active or inactive flash bank * @nvm: pointer to NVM info structure * * Read the NVM EETRACK ID and map version of the main NVM image bank, filling * in the NVM info structure. */ static enum ice_status -ice_get_nvm_ver_info(struct ice_hw *hw, struct ice_nvm_info *nvm) +ice_get_nvm_ver_info(struct ice_hw *hw, enum ice_bank_select bank, struct ice_nvm_info *nvm) { u16 eetrack_lo, eetrack_hi, ver; enum ice_status status; - status = ice_read_sr_word(hw, ICE_SR_NVM_DEV_STARTER_VER, &ver); + status = ice_read_nvm_sr_copy(hw, bank, ICE_SR_NVM_DEV_STARTER_VER, &ver); if (status) { ice_debug(hw, ICE_DBG_NVM, "Failed to read DEV starter version.\n"); return status; } + nvm->major = (ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT; nvm->minor = (ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT; - status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_LO, &eetrack_lo); + status = ice_read_nvm_sr_copy(hw, bank, ICE_SR_NVM_EETRACK_LO, &eetrack_lo); if (status) { ice_debug(hw, ICE_DBG_NVM, "Failed to read EETRACK lo.\n"); return status; } - status = ice_read_sr_word(hw, ICE_SR_NVM_EETRACK_HI, &eetrack_hi); + status = ice_read_nvm_sr_copy(hw, bank, ICE_SR_NVM_EETRACK_HI, &eetrack_hi); if (status) { ice_debug(hw, ICE_DBG_NVM, "Failed to read EETRACK hi.\n"); return status; @@ -794,7 +946,7 @@ enum ice_status ice_init_nvm(struct ice_hw *hw) return status; } - status = ice_get_nvm_ver_info(hw, &flash->nvm); + status = ice_get_nvm_ver_info(hw, ICE_ACTIVE_FLASH_BANK, &flash->nvm); if (status) { ice_debug(hw, ICE_DBG_INIT, "Failed to read NVM info.\n"); return status; diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index bc3be64cf3d94..f414cac159f3e 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -339,6 +339,15 @@ enum ice_flash_bank { ICE_2ND_FLASH_BANK, }; +/* Enumeration of which flash bank is desired to read from, either the active + * bank or the inactive bank. Used to abstract 1st and 2nd bank notion from + * code which just wants to read the active or inactive flash bank. + */ +enum ice_bank_select { + ICE_ACTIVE_FLASH_BANK, + ICE_INACTIVE_FLASH_BANK, +}; + /* information for accessing NVM, OROM, and Netlist flash banks */ struct ice_bank_info { u32 nvm_ptr; /* Pointer to 1st NVM bank */ @@ -820,6 +829,19 @@ struct ice_hw_port_stats { #define ICE_SR_NETLIST_BANK_SIZE 0x47 #define ICE_SR_SECTOR_SIZE_IN_WORDS 0x800 +/* CSS Header words */ +#define ICE_NVM_CSS_SREV_L 0x14 +#define ICE_NVM_CSS_SREV_H 0x15 + +/* Length of CSS header section in words */ +#define ICE_CSS_HEADER_LENGTH 330 + +/* Offset of Shadow RAM copy in the NVM bank area. */ +#define ICE_NVM_SR_COPY_WORD_OFFSET roundup(ICE_CSS_HEADER_LENGTH, 32) + +/* Size in bytes of Option ROM trailer */ +#define ICE_NVM_OROM_TRAILER_LENGTH (2 * ICE_CSS_HEADER_LENGTH) + /* Auxiliary field, mask, and shift definition for Shadow RAM and NVM Flash */ #define ICE_SR_CTRL_WORD_1_S 0x06 #define ICE_SR_CTRL_WORD_1_M (0x03 << ICE_SR_CTRL_WORD_1_S) -- GitLab From 2c4fe41d727f230df59ba053f8ade36b24d22520 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 11 Nov 2020 16:43:28 -0800 Subject: [PATCH 1929/2012] ice: display some stored NVM versions via devlink info The devlink info interface supports drivers reporting "stored" versions. These versions indicate the version of an update that has been downloaded to the device, but is not yet active. The code for extracting the NVM version recently changed to enable support for reading from either the active or the inactive bank. Use this to implement ice_get_inactive_nvm_ver, which will read the NVM version data from the inactive section of flash. When reporting the versions via devlink info, first read the device capabilities. Determine if there is a pending flash update, and if so, extract relevant version information from the inactive flash. Store these within the info context structure. When reporting "stored" firmware versions, devlink documentation indicates that we ought to always report a stored value, even if there is no pending update. In this common case, the stored version should match the running version. This means that each stored version should by default fallback to the same value as reported by the running handler. To support this, modify the version structure to have both a "getter" and a "fallback". Modify the control loop so that it will use the "fallback" function if the "getter" function does not report a version. To report versions for which we can read the stored value, use a new "stored()" macro. This macro will insert two entries into the version list. The first entry is the traditional running version. The second is the stored version, implemented with a fallback to the active version. This is a little tricky, but reduces the overall duplication of elements in the entry list, and ensures that running and stored values remain consistent. To avoid some duplication, add a combined() macro that will insert both the running and stored versions into the version entry list. Using this new support, add pending version reporter functions for "fw.psid.api" and "fw.bundle_id". This enables reporting the stored values for some of versions in the NVM module of the flash. Reporting management versions is not implemented by this patch. The active management version is reported to the driver via the AdminQ mailbox during load. Although the version must be in the firmware binary somewhere, accessing this from the inactive firmware is not trivial and has not been implemented in this change. Future changes will introduce support for reading the UNDI Option ROM version and the version associated with the Netlist module. Signed-off-by: Jacob Keller Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_devlink.c | 82 +++++++++++++++++++- drivers/net/ethernet/intel/ice/ice_nvm.c | 14 ++++ drivers/net/ethernet/intel/ice/ice_nvm.h | 2 + 3 files changed, 94 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 4d5ae1d6fe1cd..757f42aad30b3 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -9,6 +9,8 @@ /* context for devlink info version reporting */ struct ice_info_ctx { char buf[128]; + struct ice_nvm_info pending_nvm; + struct ice_hw_dev_caps dev_caps; }; /* The following functions are used to format specific strings for various @@ -89,6 +91,17 @@ static int ice_info_nvm_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) return 0; } +static int +ice_info_pending_nvm_ver(struct ice_pf __always_unused *pf, struct ice_info_ctx *ctx) +{ + struct ice_nvm_info *nvm = &ctx->pending_nvm; + + if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", nvm->major, nvm->minor); + + return 0; +} + static int ice_info_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_nvm_info *nvm = &pf->hw.flash.nvm; @@ -98,6 +111,17 @@ static int ice_info_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx) return 0; } +static int +ice_info_pending_eetrack(struct ice_pf __always_unused *pf, struct ice_info_ctx *ctx) +{ + struct ice_nvm_info *nvm = &ctx->pending_nvm; + + if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack); + + return 0; +} + static int ice_info_ddp_pkg_name(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_hw *hw = &pf->hw; @@ -145,8 +169,23 @@ static int ice_info_netlist_build(struct ice_pf *pf, struct ice_info_ctx *ctx) return 0; } -#define fixed(key, getter) { ICE_VERSION_FIXED, key, getter } -#define running(key, getter) { ICE_VERSION_RUNNING, key, getter } +#define fixed(key, getter) { ICE_VERSION_FIXED, key, getter, NULL } +#define running(key, getter) { ICE_VERSION_RUNNING, key, getter, NULL } +#define stored(key, getter, fallback) { ICE_VERSION_STORED, key, getter, fallback } + +/* The combined() macro inserts both the running entry as well as a stored + * entry. The running entry will always report the version from the active + * handler. The stored entry will first try the pending handler, and fallback + * to the active handler if the pending function does not report a version. + * The pending handler should check the status of a pending update for the + * relevant flash component. It should only fill in the buffer in the case + * where a valid pending version is available. This ensures that the related + * stored and running versions remain in sync, and that stored versions are + * correctly reported as expected. + */ +#define combined(key, active, pending) \ + running(key, active), \ + stored(key, pending, active) enum ice_version_type { ICE_VERSION_FIXED, @@ -158,14 +197,15 @@ static const struct ice_devlink_version { enum ice_version_type type; const char *key; int (*getter)(struct ice_pf *pf, struct ice_info_ctx *ctx); + int (*fallback)(struct ice_pf *pf, struct ice_info_ctx *ctx); } ice_devlink_versions[] = { fixed(DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, ice_info_pba), running(DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, ice_info_fw_mgmt), running("fw.mgmt.api", ice_info_fw_api), running("fw.mgmt.build", ice_info_fw_build), running(DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, ice_info_orom_ver), - running("fw.psid.api", ice_info_nvm_ver), - running(DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, ice_info_eetrack), + combined("fw.psid.api", ice_info_nvm_ver, ice_info_pending_nvm_ver), + combined(DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, ice_info_eetrack, ice_info_pending_eetrack), running("fw.app.name", ice_info_ddp_pkg_name), running(DEVLINK_INFO_VERSION_GENERIC_FW_APP, ice_info_ddp_pkg_version), running("fw.app.bundle_id", ice_info_ddp_pkg_bundle_id), @@ -189,7 +229,10 @@ static int ice_devlink_info_get(struct devlink *devlink, struct netlink_ext_ack *extack) { struct ice_pf *pf = devlink_priv(devlink); + struct device *dev = ice_pf_to_dev(pf); + struct ice_hw *hw = &pf->hw; struct ice_info_ctx *ctx; + enum ice_status status; size_t i; int err; @@ -197,6 +240,24 @@ static int ice_devlink_info_get(struct devlink *devlink, if (!ctx) return -ENOMEM; + /* discover capabilities first */ + status = ice_discover_dev_caps(hw, &ctx->dev_caps); + if (status) { + err = -EIO; + goto out_free_ctx; + } + + if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) { + status = ice_get_inactive_nvm_ver(hw, &ctx->pending_nvm); + if (status) { + dev_dbg(dev, "Unable to read inactive NVM version data, status %s aq_err %s\n", + ice_stat_str(status), ice_aq_str(hw->adminq.sq_last_status)); + + /* disable display of pending Option ROM */ + ctx->dev_caps.common_cap.nvm_update_pending_nvm = false; + } + } + err = devlink_info_driver_name_put(req, KBUILD_MODNAME); if (err) { NL_SET_ERR_MSG_MOD(extack, "Unable to set driver name"); @@ -223,6 +284,19 @@ static int ice_devlink_info_get(struct devlink *devlink, goto out_free_ctx; } + /* If the default getter doesn't report a version, use the + * fallback function. This is primarily useful in the case of + * "stored" versions that want to report the same value as the + * running version in the normal case of no pending update. + */ + if (ctx->buf[0] == '\0' && ice_devlink_versions[i].fallback) { + err = ice_devlink_versions[i].fallback(pf, ctx); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Unable to obtain version info"); + goto out_free_ctx; + } + } + /* Do not report missing versions */ if (ctx->buf[0] == '\0') continue; diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index 2434b7a4626bc..f26e5105d584a 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -569,6 +569,20 @@ ice_get_nvm_ver_info(struct ice_hw *hw, enum ice_bank_select bank, struct ice_nv return 0; } +/** + * ice_get_inactive_nvm_ver - Read Option ROM version from the inactive bank + * @hw: pointer to the HW structure + * @nvm: storage for Option ROM version information + * + * Reads the NVM EETRACK ID, Map version, and security revision of the + * inactive NVM bank. Used to access version data for a pending update that + * has not yet been activated. + */ +enum ice_status ice_get_inactive_nvm_ver(struct ice_hw *hw, struct ice_nvm_info *nvm) +{ + return ice_get_nvm_ver_info(hw, ICE_INACTIVE_FLASH_BANK, nvm); +} + /** * ice_get_orom_ver_info - Read Option ROM version information * @hw: pointer to the HW struct diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h index 8d430909f8464..23843d9b126c5 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.h +++ b/drivers/net/ethernet/intel/ice/ice_nvm.h @@ -14,6 +14,8 @@ enum ice_status ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, u16 module_type); enum ice_status +ice_get_inactive_nvm_ver(struct ice_hw *hw, struct ice_nvm_info *nvm); +enum ice_status ice_read_pba_string(struct ice_hw *hw, u8 *pba_num, u32 pba_num_size); enum ice_status ice_init_nvm(struct ice_hw *hw); enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data); -- GitLab From e120a9ab45d31dfc5f5fd3eb39c2d5b7986320d9 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 11 Nov 2020 16:43:29 -0800 Subject: [PATCH 1930/2012] ice: display stored netlist versions via devlink info Add a function to read the inactive netlist bank for version information. To support this, refactor how we read the netlist version data. Instead of using the firmware AQ interface with a module ID, read from the flash as a flat NVM, using ice_read_flash_module. This change requires a slight adjustment to the offset values used, as reading from the flat NVM includes the type field (which was stripped by firmware previously). Cleanup the macro names and move them to ice_type.h. For clarity in how we calculate the offsets and so that programmers can easily map the offset value to the data sheet, use a wrapper macro to account for the offset adjustments. Use the newly added ice_get_inactive_netlist_ver function to extract the version data from the pending netlist module update. Add the stored variants of "fw.netlist", and "fw.netlist.build" to the info version map array. With this change, we now report the "fw.netlist" and "fw.netlist.build" versions into the stored section of the devlink info report. As with the main NVM module versions, if there is no pending update, we report the currently active values as stored. Signed-off-by: Jacob Keller Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_adminq_cmd.h | 27 --- drivers/net/ethernet/intel/ice/ice_devlink.c | 42 ++++- drivers/net/ethernet/intel/ice/ice_main.c | 2 + drivers/net/ethernet/intel/ice/ice_nvm.c | 160 +++++++++++------- drivers/net/ethernet/intel/ice/ice_nvm.h | 2 + drivers/net/ethernet/intel/ice/ice_status.h | 1 + drivers/net/ethernet/intel/ice/ice_type.h | 35 ++++ 7 files changed, 176 insertions(+), 93 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index b06fbe99d8e93..a51470b68d545 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1334,33 +1334,6 @@ struct ice_aqc_nvm_checksum { u8 rsvd2[12]; }; -/* The result of netlist NVM read comes in a TLV format. The actual data - * (netlist header) starts from word offset 1 (byte 2). The FW strips - * out the type field from the TLV header so all the netlist fields - * should adjust their offset value by 1 word (2 bytes) in order to map - * their correct location. - */ -#define ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID 0x11B -#define ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN_OFFSET 1 -#define ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN 2 /* In bytes */ -#define ICE_AQC_NVM_NETLIST_NODE_COUNT_OFFSET 2 -#define ICE_AQC_NVM_NETLIST_NODE_COUNT_LEN 2 /* In bytes */ -#define ICE_AQC_NVM_NETLIST_NODE_COUNT_M ICE_M(0x3FF, 0) -#define ICE_AQC_NVM_NETLIST_ID_BLK_START_OFFSET 5 -#define ICE_AQC_NVM_NETLIST_ID_BLK_LEN 0x30 /* In words */ - -/* netlist ID block field offsets (word offsets) */ -#define ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_LOW 2 -#define ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_HIGH 3 -#define ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_LOW 4 -#define ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_HIGH 5 -#define ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_LOW 6 -#define ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_HIGH 7 -#define ICE_AQC_NVM_NETLIST_ID_BLK_REV_LOW 8 -#define ICE_AQC_NVM_NETLIST_ID_BLK_REV_HIGH 9 -#define ICE_AQC_NVM_NETLIST_ID_BLK_SHA_HASH 0xA -#define ICE_AQC_NVM_NETLIST_ID_BLK_CUST_VER 0x2F - /* Used for NVM Set Package Data command - 0x070A */ struct ice_aqc_nvm_pkg_data { u8 reserved[3]; diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 757f42aad30b3..56be75c6d77d7 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -10,6 +10,7 @@ struct ice_info_ctx { char buf[128]; struct ice_nvm_info pending_nvm; + struct ice_netlist_info pending_netlist; struct ice_hw_dev_caps dev_caps; }; @@ -169,6 +170,32 @@ static int ice_info_netlist_build(struct ice_pf *pf, struct ice_info_ctx *ctx) return 0; } +static int +ice_info_pending_netlist_ver(struct ice_pf __always_unused *pf, struct ice_info_ctx *ctx) +{ + struct ice_netlist_info *netlist = &ctx->pending_netlist; + + /* The netlist version fields are BCD formatted */ + if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x", + netlist->major, netlist->minor, + netlist->type >> 16, netlist->type & 0xFFFF, netlist->rev, + netlist->cust_ver); + + return 0; +} + +static int +ice_info_pending_netlist_build(struct ice_pf __always_unused *pf, struct ice_info_ctx *ctx) +{ + struct ice_netlist_info *netlist = &ctx->pending_netlist; + + if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) + snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash); + + return 0; +} + #define fixed(key, getter) { ICE_VERSION_FIXED, key, getter, NULL } #define running(key, getter) { ICE_VERSION_RUNNING, key, getter, NULL } #define stored(key, getter, fallback) { ICE_VERSION_STORED, key, getter, fallback } @@ -209,8 +236,8 @@ static const struct ice_devlink_version { running("fw.app.name", ice_info_ddp_pkg_name), running(DEVLINK_INFO_VERSION_GENERIC_FW_APP, ice_info_ddp_pkg_version), running("fw.app.bundle_id", ice_info_ddp_pkg_bundle_id), - running("fw.netlist", ice_info_netlist_ver), - running("fw.netlist.build", ice_info_netlist_build), + combined("fw.netlist", ice_info_netlist_ver, ice_info_pending_netlist_ver), + combined("fw.netlist.build", ice_info_netlist_build, ice_info_pending_netlist_build), }; /** @@ -258,6 +285,17 @@ static int ice_devlink_info_get(struct devlink *devlink, } } + if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) { + status = ice_get_inactive_netlist_ver(hw, &ctx->pending_netlist); + if (status) { + dev_dbg(dev, "Unable to read inactive Netlist version data, status %s aq_err %s\n", + ice_stat_str(status), ice_aq_str(hw->adminq.sq_last_status)); + + /* disable display of pending Option ROM */ + ctx->dev_caps.common_cap.nvm_update_pending_netlist = false; + } + } + err = devlink_info_driver_name_put(req, KBUILD_MODNAME); if (err) { NL_SET_ERR_MSG_MOD(extack, "Unable to set driver name"); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 06aa37549c047..643fbc8d6b6ad 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6250,6 +6250,8 @@ const char *ice_stat_str(enum ice_status stat_err) return "ICE_ERR_OUT_OF_RANGE"; case ICE_ERR_ALREADY_EXISTS: return "ICE_ERR_ALREADY_EXISTS"; + case ICE_ERR_NVM: + return "ICE_ERR_NVM"; case ICE_ERR_NVM_CHECKSUM: return "ICE_ERR_NVM_CHECKSUM"; case ICE_ERR_BUF_TOO_SHORT: diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index f26e5105d584a..6d5218d96bec5 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -383,6 +383,29 @@ ice_read_nvm_sr_copy(struct ice_hw *hw, enum ice_bank_select bank, u32 offset, u return ice_read_nvm_module(hw, bank, ICE_NVM_SR_COPY_WORD_OFFSET + offset, data); } +/** + * ice_read_netlist_module - Read data from the netlist module area + * @hw: pointer to the HW structure + * @bank: whether to read from the active or inactive module + * @offset: offset into the netlist to read from + * @data: storage for returned word value + * + * Read a word from the specified netlist bank. + */ +static enum ice_status +ice_read_netlist_module(struct ice_hw *hw, enum ice_bank_select bank, u32 offset, u16 *data) +{ + enum ice_status status; + __le16 data_local; + + status = ice_read_flash_module(hw, bank, ICE_SR_NETLIST_BANK_PTR, offset * sizeof(u16), + (__force u8 *)&data_local, sizeof(u16)); + if (!status) + *data = le16_to_cpu(data_local); + + return status; +} + /** * ice_read_sr_word - Reads Shadow RAM word and acquire NVM if necessary * @hw: pointer to the HW structure @@ -639,85 +662,94 @@ ice_get_orom_ver_info(struct ice_hw *hw, struct ice_orom_info *orom) } /** - * ice_get_netlist_ver_info + * ice_get_netlist_info * @hw: pointer to the HW struct - * @ver: pointer to netlist version info structure + * @bank: whether to read from the active or inactive flash bank + * @netlist: pointer to netlist version info structure * - * Get the netlist version information + * Get the netlist version information from the requested bank. Reads the Link + * Topology section to find the Netlist ID block and extract the relevant + * information into the netlist version structure. */ static enum ice_status -ice_get_netlist_ver_info(struct ice_hw *hw, struct ice_netlist_info *ver) +ice_get_netlist_info(struct ice_hw *hw, enum ice_bank_select bank, + struct ice_netlist_info *netlist) { - enum ice_status ret; - u32 id_blk_start; - __le16 raw_data; - u16 data, i; - u16 *buff; - - ret = ice_acquire_nvm(hw, ICE_RES_READ); - if (ret) - return ret; - buff = kcalloc(ICE_AQC_NVM_NETLIST_ID_BLK_LEN, sizeof(*buff), - GFP_KERNEL); - if (!buff) { - ret = ICE_ERR_NO_MEMORY; - goto exit_no_mem; + u16 module_id, length, node_count, i; + enum ice_status status; + u16 *id_blk; + + status = ice_read_netlist_module(hw, bank, ICE_NETLIST_TYPE_OFFSET, &module_id); + if (status) + return status; + + if (module_id != ICE_NETLIST_LINK_TOPO_MOD_ID) { + ice_debug(hw, ICE_DBG_NVM, "Expected netlist module_id ID of 0x%04x, but got 0x%04x\n", + ICE_NETLIST_LINK_TOPO_MOD_ID, module_id); + return ICE_ERR_NVM; } - /* read module length */ - ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID, - ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN_OFFSET * 2, - ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN, &raw_data, - false, false, NULL); - if (ret) - goto exit_error; + status = ice_read_netlist_module(hw, bank, ICE_LINK_TOPO_MODULE_LEN, &length); + if (status) + return status; - data = le16_to_cpu(raw_data); - /* exit if length is = 0 */ - if (!data) - goto exit_error; + /* sanity check that we have at least enough words to store the netlist ID block */ + if (length < ICE_NETLIST_ID_BLK_SIZE) { + ice_debug(hw, ICE_DBG_NVM, "Netlist Link Topology module too small. Expected at least %u words, but got %u words.\n", + ICE_NETLIST_ID_BLK_SIZE, length); + return ICE_ERR_NVM; + } - /* read node count */ - ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID, - ICE_AQC_NVM_NETLIST_NODE_COUNT_OFFSET * 2, - ICE_AQC_NVM_NETLIST_NODE_COUNT_LEN, &raw_data, - false, false, NULL); - if (ret) - goto exit_error; - data = le16_to_cpu(raw_data) & ICE_AQC_NVM_NETLIST_NODE_COUNT_M; + status = ice_read_netlist_module(hw, bank, ICE_LINK_TOPO_NODE_COUNT, &node_count); + if (status) + return status; + node_count &= ICE_LINK_TOPO_NODE_COUNT_M; - /* netlist ID block starts from offset 4 + node count * 2 */ - id_blk_start = ICE_AQC_NVM_NETLIST_ID_BLK_START_OFFSET + data * 2; + id_blk = kcalloc(ICE_NETLIST_ID_BLK_SIZE, sizeof(*id_blk), GFP_KERNEL); + if (!id_blk) + return ICE_ERR_NO_MEMORY; - /* read the entire netlist ID block */ - ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID, - id_blk_start * 2, - ICE_AQC_NVM_NETLIST_ID_BLK_LEN * 2, buff, false, - false, NULL); - if (ret) + /* Read out the entire Netlist ID Block at once. */ + status = ice_read_flash_module(hw, bank, ICE_SR_NETLIST_BANK_PTR, + ICE_NETLIST_ID_BLK_OFFSET(node_count) * sizeof(u16), + (u8 *)id_blk, ICE_NETLIST_ID_BLK_SIZE * sizeof(u16)); + if (status) goto exit_error; - for (i = 0; i < ICE_AQC_NVM_NETLIST_ID_BLK_LEN; i++) - buff[i] = le16_to_cpu(((__force __le16 *)buff)[i]); - - ver->major = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_HIGH] << 16) | - buff[ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_LOW]; - ver->minor = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_HIGH] << 16) | - buff[ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_LOW]; - ver->type = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_HIGH] << 16) | - buff[ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_LOW]; - ver->rev = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_REV_HIGH] << 16) | - buff[ICE_AQC_NVM_NETLIST_ID_BLK_REV_LOW]; - ver->cust_ver = buff[ICE_AQC_NVM_NETLIST_ID_BLK_CUST_VER]; + for (i = 0; i < ICE_NETLIST_ID_BLK_SIZE; i++) + id_blk[i] = le16_to_cpu(((__force __le16 *)id_blk)[i]); + + netlist->major = id_blk[ICE_NETLIST_ID_BLK_MAJOR_VER_HIGH] << 16 | + id_blk[ICE_NETLIST_ID_BLK_MAJOR_VER_LOW]; + netlist->minor = id_blk[ICE_NETLIST_ID_BLK_MINOR_VER_HIGH] << 16 | + id_blk[ICE_NETLIST_ID_BLK_MINOR_VER_LOW]; + netlist->type = id_blk[ICE_NETLIST_ID_BLK_TYPE_HIGH] << 16 | + id_blk[ICE_NETLIST_ID_BLK_TYPE_LOW]; + netlist->rev = id_blk[ICE_NETLIST_ID_BLK_REV_HIGH] << 16 | + id_blk[ICE_NETLIST_ID_BLK_REV_LOW]; + netlist->cust_ver = id_blk[ICE_NETLIST_ID_BLK_CUST_VER]; /* Read the left most 4 bytes of SHA */ - ver->hash = buff[ICE_AQC_NVM_NETLIST_ID_BLK_SHA_HASH + 15] << 16 | - buff[ICE_AQC_NVM_NETLIST_ID_BLK_SHA_HASH + 14]; + netlist->hash = id_blk[ICE_NETLIST_ID_BLK_SHA_HASH_WORD(15)] << 16 | + id_blk[ICE_NETLIST_ID_BLK_SHA_HASH_WORD(14)]; exit_error: - kfree(buff); -exit_no_mem: - ice_release_nvm(hw); - return ret; + kfree(id_blk); + + return status; +} + +/** + * ice_get_inactive_netlist_ver + * @hw: pointer to the HW struct + * @netlist: pointer to netlist version info structure + * + * Read the netlist version data from the inactive netlist bank. Used to + * extract version data of a pending flash update in order to display the + * version data. + */ +enum ice_status ice_get_inactive_netlist_ver(struct ice_hw *hw, struct ice_netlist_info *netlist) +{ + return ice_get_netlist_info(hw, ICE_INACTIVE_FLASH_BANK, netlist); } /** @@ -973,7 +1005,7 @@ enum ice_status ice_init_nvm(struct ice_hw *hw) } /* read the netlist version information */ - status = ice_get_netlist_ver_info(hw, &flash->netlist); + status = ice_get_netlist_info(hw, ICE_ACTIVE_FLASH_BANK, &flash->netlist); if (status) ice_debug(hw, ICE_DBG_INIT, "Failed to read netlist info.\n"); diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h index 23843d9b126c5..ca293168b017d 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.h +++ b/drivers/net/ethernet/intel/ice/ice_nvm.h @@ -16,6 +16,8 @@ ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, enum ice_status ice_get_inactive_nvm_ver(struct ice_hw *hw, struct ice_nvm_info *nvm); enum ice_status +ice_get_inactive_netlist_ver(struct ice_hw *hw, struct ice_netlist_info *netlist); +enum ice_status ice_read_pba_string(struct ice_hw *hw, u8 *pba_num, u32 pba_num_size); enum ice_status ice_init_nvm(struct ice_hw *hw); enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data); diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h index 4028c63651726..dbf66057371da 100644 --- a/drivers/net/ethernet/intel/ice/ice_status.h +++ b/drivers/net/ethernet/intel/ice/ice_status.h @@ -29,6 +29,7 @@ enum ice_status { ICE_ERR_HW_TABLE = -19, ICE_ERR_FW_DDP_MISMATCH = -20, + ICE_ERR_NVM = -50, ICE_ERR_NVM_CHECKSUM = -51, ICE_ERR_BUF_TOO_SHORT = -52, ICE_ERR_NVM_BLANK_MODE = -53, diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index f414cac159f3e..a98800a910451 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -842,6 +842,41 @@ struct ice_hw_port_stats { /* Size in bytes of Option ROM trailer */ #define ICE_NVM_OROM_TRAILER_LENGTH (2 * ICE_CSS_HEADER_LENGTH) +/* The Link Topology Netlist section is stored as a series of words. It is + * stored in the NVM as a TLV, with the first two words containing the type + * and length. + */ +#define ICE_NETLIST_LINK_TOPO_MOD_ID 0x011B +#define ICE_NETLIST_TYPE_OFFSET 0x0000 +#define ICE_NETLIST_LEN_OFFSET 0x0001 + +/* The Link Topology section follows the TLV header. When reading the netlist + * using ice_read_netlist_module, we need to account for the 2-word TLV + * header. + */ +#define ICE_NETLIST_LINK_TOPO_OFFSET(n) ((n) + 2) + +#define ICE_LINK_TOPO_MODULE_LEN ICE_NETLIST_LINK_TOPO_OFFSET(0x0000) +#define ICE_LINK_TOPO_NODE_COUNT ICE_NETLIST_LINK_TOPO_OFFSET(0x0001) + +#define ICE_LINK_TOPO_NODE_COUNT_M ICE_M(0x3FF, 0) + +/* The Netlist ID Block is located after all of the Link Topology nodes. */ +#define ICE_NETLIST_ID_BLK_SIZE 0x30 +#define ICE_NETLIST_ID_BLK_OFFSET(n) ICE_NETLIST_LINK_TOPO_OFFSET(0x0004 + 2 * (n)) + +/* netlist ID block field offsets (word offsets) */ +#define ICE_NETLIST_ID_BLK_MAJOR_VER_LOW 0x02 +#define ICE_NETLIST_ID_BLK_MAJOR_VER_HIGH 0x03 +#define ICE_NETLIST_ID_BLK_MINOR_VER_LOW 0x04 +#define ICE_NETLIST_ID_BLK_MINOR_VER_HIGH 0x05 +#define ICE_NETLIST_ID_BLK_TYPE_LOW 0x06 +#define ICE_NETLIST_ID_BLK_TYPE_HIGH 0x07 +#define ICE_NETLIST_ID_BLK_REV_LOW 0x08 +#define ICE_NETLIST_ID_BLK_REV_HIGH 0x09 +#define ICE_NETLIST_ID_BLK_SHA_HASH_WORD(n) (0x0A + (n)) +#define ICE_NETLIST_ID_BLK_CUST_VER 0x2F + /* Auxiliary field, mask, and shift definition for Shadow RAM and NVM Flash */ #define ICE_SR_CTRL_WORD_1_S 0x06 #define ICE_SR_CTRL_WORD_1_M (0x03 << ICE_SR_CTRL_WORD_1_S) -- GitLab From e67fbcfbb4ef0d0bbd978594707381efcadf0c55 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 11 Nov 2020 16:43:30 -0800 Subject: [PATCH 1931/2012] ice: display stored UNDI firmware version via devlink info Just as we recently added support for other stored firmware flash versions, support display of the stored UNDI Option ROM version via devlink info. To do this, we need to introduce a new ice_get_inactive_orom_ver function. This is a little trickier than with other flash versions. The Option ROM version data was being read from a special "Boot Configuration" block of the NVM Preserved Field Area. This block only contains the *active* Option ROM version data. It is populated when the device firmware finishes updating the Option ROM. This method is ineffective at reading the stored Option ROM version data. Instead of reading from this section of the flash, replace this version extraction with one which locates the Combo Version information from within the Option ROM binary. This data is stored within the Option ROM at a 512 byte offset, in a simple structured format. The structure uses a simple modulo 256 checksum for integrity verification. Scan through the Option ROM to locate the CIVD data section, and extract the Combo Version. Refactor ice_get_orom_ver_info so that it takes the bank select enumeration parameter. Use this to implement ice_get_inactive_orom_ver. Although all ice devices have a Boot Configuration block in the NVM PFA, not all devices have a valid Option ROM. In this case, the old ice_get_orom_ver_info would "succeed" but report a version of all zeros. The new implementation would fail to locate the $CIV section in the Option ROM and report an error. Thus, we must ensure that ice_init_nvm does not fail if ice_get_orom_ver_info fails. Use the new ice_get_inactive_orom_ver to allow reporting the Option ROM versions for a pending update via devlink info. Signed-off-by: Jacob Keller Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_devlink.c | 26 ++++- drivers/net/ethernet/intel/ice/ice_nvm.c | 115 +++++++++++++------ drivers/net/ethernet/intel/ice/ice_nvm.h | 10 ++ 3 files changed, 113 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 56be75c6d77d7..cf685eeea198e 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -9,6 +9,7 @@ /* context for devlink info version reporting */ struct ice_info_ctx { char buf[128]; + struct ice_orom_info pending_orom; struct ice_nvm_info pending_nvm; struct ice_netlist_info pending_netlist; struct ice_hw_dev_caps dev_caps; @@ -83,6 +84,18 @@ static int ice_info_orom_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) return 0; } +static int +ice_info_pending_orom_ver(struct ice_pf __always_unused *pf, struct ice_info_ctx *ctx) +{ + struct ice_orom_info *orom = &ctx->pending_orom; + + if (ctx->dev_caps.common_cap.nvm_update_pending_orom) + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", + orom->major, orom->build, orom->patch); + + return 0; +} + static int ice_info_nvm_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_nvm_info *nvm = &pf->hw.flash.nvm; @@ -230,7 +243,7 @@ static const struct ice_devlink_version { running(DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, ice_info_fw_mgmt), running("fw.mgmt.api", ice_info_fw_api), running("fw.mgmt.build", ice_info_fw_build), - running(DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, ice_info_orom_ver), + combined(DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, ice_info_orom_ver, ice_info_pending_orom_ver), combined("fw.psid.api", ice_info_nvm_ver, ice_info_pending_nvm_ver), combined(DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, ice_info_eetrack, ice_info_pending_eetrack), running("fw.app.name", ice_info_ddp_pkg_name), @@ -274,6 +287,17 @@ static int ice_devlink_info_get(struct devlink *devlink, goto out_free_ctx; } + if (ctx->dev_caps.common_cap.nvm_update_pending_orom) { + status = ice_get_inactive_orom_ver(hw, &ctx->pending_orom); + if (status) { + dev_dbg(dev, "Unable to read inactive Option ROM version data, status %s aq_err %s\n", + ice_stat_str(status), ice_aq_str(hw->adminq.sq_last_status)); + + /* disable display of pending Option ROM */ + ctx->dev_caps.common_cap.nvm_update_pending_orom = false; + } + } + if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) { status = ice_get_inactive_nvm_ver(hw, &ctx->pending_nvm); if (status) { diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index 6d5218d96bec5..75ccbfc07f99a 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -607,60 +607,103 @@ enum ice_status ice_get_inactive_nvm_ver(struct ice_hw *hw, struct ice_nvm_info } /** - * ice_get_orom_ver_info - Read Option ROM version information + * ice_get_orom_civd_data - Get the combo version information from Option ROM * @hw: pointer to the HW struct - * @orom: pointer to Option ROM info structure + * @bank: whether to read from the active or inactive flash module + * @civd: storage for the Option ROM CIVD data. * - * Read the Combo Image version data from the Boot Configuration TLV and fill - * in the option ROM version data. + * Searches through the Option ROM flash contents to locate the CIVD data for + * the image. */ static enum ice_status -ice_get_orom_ver_info(struct ice_hw *hw, struct ice_orom_info *orom) +ice_get_orom_civd_data(struct ice_hw *hw, enum ice_bank_select bank, + struct ice_orom_civd_info *civd) { - u16 combo_hi, combo_lo, boot_cfg_tlv, boot_cfg_tlv_len; + struct ice_orom_civd_info tmp; enum ice_status status; - u32 combo_ver; + u32 offset; - status = ice_get_pfa_module_tlv(hw, &boot_cfg_tlv, &boot_cfg_tlv_len, - ICE_SR_BOOT_CFG_PTR); - if (status) { - ice_debug(hw, ICE_DBG_INIT, "Failed to read Boot Configuration Block TLV.\n"); - return status; - } - - /* Boot Configuration Block must have length at least 2 words - * (Combo Image Version High and Combo Image Version Low) + /* The CIVD section is located in the Option ROM aligned to 512 bytes. + * The first 4 bytes must contain the ASCII characters "$CIV". + * A simple modulo 256 sum of all of the bytes of the structure must + * equal 0. */ - if (boot_cfg_tlv_len < 2) { - ice_debug(hw, ICE_DBG_INIT, "Invalid Boot Configuration Block TLV size.\n"); - return ICE_ERR_INVAL_SIZE; - } + for (offset = 0; (offset + 512) <= hw->flash.banks.orom_size; offset += 512) { + u8 sum = 0, i; - status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OROM_VER_OFF), - &combo_hi); - if (status) { - ice_debug(hw, ICE_DBG_INIT, "Failed to read OROM_VER hi.\n"); - return status; + status = ice_read_flash_module(hw, bank, ICE_SR_1ST_OROM_BANK_PTR, + offset, (u8 *)&tmp, sizeof(tmp)); + if (status) { + ice_debug(hw, ICE_DBG_NVM, "Unable to read Option ROM CIVD data\n"); + return status; + } + + /* Skip forward until we find a matching signature */ + if (memcmp("$CIV", tmp.signature, sizeof(tmp.signature)) != 0) + continue; + + /* Verify that the simple checksum is zero */ + for (i = 0; i < sizeof(tmp); i++) + sum += ((u8 *)&tmp)[i]; + + if (sum) { + ice_debug(hw, ICE_DBG_NVM, "Found CIVD data with invalid checksum of %u\n", + sum); + return ICE_ERR_NVM; + } + + *civd = tmp; + return 0; } - status = ice_read_sr_word(hw, (boot_cfg_tlv + ICE_NVM_OROM_VER_OFF + 1), - &combo_lo); + return ICE_ERR_NVM; +} + +/** + * ice_get_orom_ver_info - Read Option ROM version information + * @hw: pointer to the HW struct + * @bank: whether to read from the active or inactive flash module + * @orom: pointer to Option ROM info structure + * + * Read Option ROM version and security revision from the Option ROM flash + * section. + */ +static enum ice_status +ice_get_orom_ver_info(struct ice_hw *hw, enum ice_bank_select bank, struct ice_orom_info *orom) +{ + struct ice_orom_civd_info civd; + enum ice_status status; + u32 combo_ver; + + status = ice_get_orom_civd_data(hw, bank, &civd); if (status) { - ice_debug(hw, ICE_DBG_INIT, "Failed to read OROM_VER lo.\n"); + ice_debug(hw, ICE_DBG_NVM, "Failed to locate valid Option ROM CIVD data\n"); return status; } - combo_ver = ((u32)combo_hi << 16) | combo_lo; + combo_ver = le32_to_cpu(civd.combo_ver); - orom->major = (u8)((combo_ver & ICE_OROM_VER_MASK) >> - ICE_OROM_VER_SHIFT); + orom->major = (u8)((combo_ver & ICE_OROM_VER_MASK) >> ICE_OROM_VER_SHIFT); orom->patch = (u8)(combo_ver & ICE_OROM_VER_PATCH_MASK); - orom->build = (u16)((combo_ver & ICE_OROM_VER_BUILD_MASK) >> - ICE_OROM_VER_BUILD_SHIFT); + orom->build = (u16)((combo_ver & ICE_OROM_VER_BUILD_MASK) >> ICE_OROM_VER_BUILD_SHIFT); return 0; } +/** + * ice_get_inactive_orom_ver - Read Option ROM version from the inactive bank + * @hw: pointer to the HW structure + * @orom: storage for Option ROM version information + * + * Reads the Option ROM version and security revision data for the inactive + * section of flash. Used to access version data for a pending update that has + * not yet been activated. + */ +enum ice_status ice_get_inactive_orom_ver(struct ice_hw *hw, struct ice_orom_info *orom) +{ + return ice_get_orom_ver_info(hw, ICE_INACTIVE_FLASH_BANK, orom); +} + /** * ice_get_netlist_info * @hw: pointer to the HW struct @@ -998,11 +1041,9 @@ enum ice_status ice_init_nvm(struct ice_hw *hw) return status; } - status = ice_get_orom_ver_info(hw, &flash->orom); - if (status) { + status = ice_get_orom_ver_info(hw, ICE_ACTIVE_FLASH_BANK, &flash->orom); + if (status) ice_debug(hw, ICE_DBG_INIT, "Failed to read Option ROM info.\n"); - return status; - } /* read the netlist version information */ status = ice_get_netlist_info(hw, ICE_ACTIVE_FLASH_BANK, &flash->netlist); diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h index ca293168b017d..c6f05f43d593b 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.h +++ b/drivers/net/ethernet/intel/ice/ice_nvm.h @@ -4,6 +4,14 @@ #ifndef _ICE_NVM_H_ #define _ICE_NVM_H_ +struct ice_orom_civd_info { + u8 signature[4]; /* Must match ASCII '$CIV' characters */ + u8 checksum; /* Simple modulo 256 sum of all structure bytes must equal 0 */ + __le32 combo_ver; /* Combo Image Version number */ + u8 combo_name_len; /* Length of the unicode combo image version string, max of 32 */ + __le16 combo_name[32]; /* Unicode string representing the Combo Image version */ +} __packed; + enum ice_status ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access); void ice_release_nvm(struct ice_hw *hw); @@ -14,6 +22,8 @@ enum ice_status ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, u16 module_type); enum ice_status +ice_get_inactive_orom_ver(struct ice_hw *hw, struct ice_orom_info *orom); +enum ice_status ice_get_inactive_nvm_ver(struct ice_hw *hw, struct ice_nvm_info *nvm); enum ice_status ice_get_inactive_netlist_ver(struct ice_hw *hw, struct ice_netlist_info *netlist); -- GitLab From e94c0df984d3f428b81e03a73b31b7a7e30a8361 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 29 Sep 2020 14:01:56 -0500 Subject: [PATCH 1932/2012] ice: Replace one-element array with flexible-array member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. Refactor the code according to the use of a flexible-array member in struct ice_res_tracker, instead of a one-element array and use the struct_size() helper to calculate the size for the allocations. Also, notice that the code below suggests that, currently, two too many bytes are being allocated with devm_kzalloc(), as the total number of entries (pf->irq_tracker->num_entries) for pf->irq_tracker->list[] is _vectors_ and sizeof(*pf->irq_tracker) also includes the size of the one-element array _list_ in struct ice_res_tracker. drivers/net/ethernet/intel/ice/ice_main.c:3511: 3511 /* populate SW interrupts pool with number of OS granted IRQs. */ 3512 pf->num_avail_sw_msix = (u16)vectors; 3513 pf->irq_tracker->num_entries = (u16)vectors; 3514 pf->irq_tracker->end = pf->irq_tracker->num_entries; With this change, the right amount of dynamic memory is now allocated because, contrary to one-element arrays which occupy at least as much space as a single object of the type, flexible-array members don't occupy such space in the containing structure. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays Built-tested-by: kernel test robot Signed-off-by: Gustavo A. R. Silva Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 2 +- drivers/net/ethernet/intel/ice/ice_main.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index fa1e128c24eca..fca428c879ec1 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -166,7 +166,7 @@ struct ice_tc_cfg { struct ice_res_tracker { u16 num_entries; u16 end; - u16 list[1]; + u16 list[]; }; struct ice_qs_cfg { diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 643fbc8d6b6ad..f6177591978d9 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3495,9 +3495,9 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf) return vectors; /* set up vector assignment tracking */ - pf->irq_tracker = - devm_kzalloc(ice_pf_to_dev(pf), sizeof(*pf->irq_tracker) + - (sizeof(u16) * vectors), GFP_KERNEL); + pf->irq_tracker = devm_kzalloc(ice_pf_to_dev(pf), + struct_size(pf->irq_tracker, list, vectors), + GFP_KERNEL); if (!pf->irq_tracker) { ice_dis_msix(pf); return -ENOMEM; -- GitLab From 11404310d58d821714a19bcf2bf69e5c80d4d34c Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Fri, 20 Nov 2020 16:39:36 -0800 Subject: [PATCH 1933/2012] ice: use flex_array_size where possible Use the flex_array_size() helper with the recently added flexible array members in structures. Signed-off-by: Bruce Allan Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 2 +- drivers/net/ethernet/intel/ice/ice_flex_pipe.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 6d7e7dd0ebe22..607d33d05a0ce 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1653,7 +1653,7 @@ ice_aq_alloc_free_res(struct ice_hw *hw, u16 num_entries, if (!buf) return ICE_ERR_PARAM; - if (buf_size < (num_entries * sizeof(buf->elem[0]))) + if (buf_size < flex_array_size(buf, elem, num_entries)) return ICE_ERR_PARAM; ice_fill_dflt_direct_cmd_desc(&desc, opc); diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index f5e81b5553537..cf5b717b92931 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -1525,7 +1525,7 @@ ice_pkg_buf_reserve_section(struct ice_buf_build *bld, u16 count) bld->reserved_section_table_entries += count; data_end = le16_to_cpu(buf->data_end) + - (count * sizeof(buf->section_entry[0])); + flex_array_size(buf, section_entry, count); buf->data_end = cpu_to_le16(data_end); return 0; -- GitLab From 12aae8f1d87906547d7756765bf9fc18f268fad1 Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Mon, 12 Oct 2020 15:53:26 -0700 Subject: [PATCH 1934/2012] ice: remove dead code The check for a NULL pf pointer is moot since the earlier declaration and assignment of struct device *dev already de-referenced the pointer. Also, the only caller of ice_set_dflt_mib() already ensures pf is not NULL. Cc: Dave Ertman Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Bruce Allan Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f6177591978d9..98cd44a3ccf73 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -785,15 +785,9 @@ static void ice_set_dflt_mib(struct ice_pf *pf) u8 mib_type, *buf, *lldpmib = NULL; u16 len, typelen, offset = 0; struct ice_lldp_org_tlv *tlv; - struct ice_hw *hw; + struct ice_hw *hw = &pf->hw; u32 ouisubtype; - if (!pf) { - dev_dbg(dev, "%s NULL pf pointer\n", __func__); - return; - } - - hw = &pf->hw; mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB; lldpmib = kzalloc(ICE_LLDPDU_SIZE, GFP_KERNEL); if (!lldpmib) { -- GitLab From 626899a02e6afcd4b2ce5c0551092e3554cec4aa Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 28 Jan 2021 17:59:23 +0000 Subject: [PATCH 1935/2012] netfilter: nftables: remove redundant assignment of variable err The variable err is being assigned a value that is never read, the same error number is being returned at the error return path via label err1. Clean up the code by removing the assignment. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_cmp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 3640eea8a87b9..eb6a43a180bba 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -266,10 +266,8 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) if (err < 0) return ERR_PTR(err); - if (desc.type != NFT_DATA_VALUE) { - err = -EINVAL; + if (desc.type != NFT_DATA_VALUE) goto err1; - } if (desc.len <= sizeof(u32) && (op == NFT_CMP_EQ || op == NFT_CMP_NEQ)) return &nft_cmp_fast_ops; -- GitLab From b91b3a211542bcd69532a8004452d83f499d23cd Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 2 Feb 2021 18:02:37 +0800 Subject: [PATCH 1936/2012] dpaa2-eth: Simplify the calculation of variables Fix the following coccicheck warnings: ./drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c:1651:36-38: WARNING !A || A && B is equivalent to !A || B. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Acked-by: Ioana Ciornei Link: https://lore.kernel.org/r/1612260157-128026-1-git-send-email-jiapeng.chong@linux.alibaba.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 41e225baf5712..c5ceebca8caeb 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -1660,7 +1660,7 @@ set_cgtd: * CG taildrop threshold, so it won't interfere with it; we also * want frames in non-PFC enabled traffic classes to be kept in check) */ - td.enable = !tx_pause || (tx_pause && pfc); + td.enable = !tx_pause || pfc; if (priv->rx_cgtd_enabled == td.enable) return; -- GitLab From a8225efdf31e9498c5696554e5731da893c93f61 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 3 Feb 2021 19:06:17 +0100 Subject: [PATCH 1937/2012] net: ethernet: ti: fix netdevice stats for XDP Align netdevice statistics when the device is running in XDP mode to other upstream drivers. In particular report to user-space rx packets even if they are not forwarded to the networking stack (XDP_PASS) but if they are redirected (XDP_REDIRECT), dropped (XDP_DROP) or sent back using the same interface (XDP_TX). This patch allows the system administrator to verify the device is receiving data correctly. Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/a457cb17dd9c58c116d64ee34c354b2e89c0ff8f.1612375372.git.lorenzo@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/cpsw.c | 4 +--- drivers/net/ethernet/ti/cpsw_new.c | 4 +--- drivers/net/ethernet/ti/cpsw_priv.c | 12 ++++++++++-- drivers/net/ethernet/ti/cpsw_priv.h | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 5239318e96869..fd966567464ce 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -403,12 +403,10 @@ static void cpsw_rx_handler(void *token, int len, int status) xdp_prepare_buff(&xdp, pa, headroom, size, false); port = priv->emac_port + cpsw->data.dual_emac; - ret = cpsw_run_xdp(priv, ch, &xdp, page, port); + ret = cpsw_run_xdp(priv, ch, &xdp, page, port, &len); if (ret != CPSW_XDP_PASS) goto requeue; - /* XDP prog might have changed packet data and boundaries */ - len = xdp.data_end - xdp.data; headroom = xdp.data - xdp.data_hard_start; /* XDP prog can modify vlan tag, so can't use encap header */ diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 94747f82c60ba..58a64313ac00b 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -345,12 +345,10 @@ static void cpsw_rx_handler(void *token, int len, int status) xdp_prepare_buff(&xdp, pa, headroom, size, false); - ret = cpsw_run_xdp(priv, ch, &xdp, page, priv->emac_port); + ret = cpsw_run_xdp(priv, ch, &xdp, page, priv->emac_port, &len); if (ret != CPSW_XDP_PASS) goto requeue; - /* XDP prog might have changed packet data and boundaries */ - len = xdp.data_end - xdp.data; headroom = xdp.data - xdp.data_hard_start; /* XDP prog can modify vlan tag, so can't use encap header */ diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index 99f44563e10f6..bb59e768915e0 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -1323,7 +1323,7 @@ int cpsw_xdp_tx_frame(struct cpsw_priv *priv, struct xdp_frame *xdpf, } int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp, - struct page *page, int port) + struct page *page, int port, int *len) { struct cpsw_common *cpsw = priv->cpsw; struct net_device *ndev = priv->ndev; @@ -1341,10 +1341,13 @@ int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp, } act = bpf_prog_run_xdp(prog, xdp); + /* XDP prog might have changed packet data and boundaries */ + *len = xdp->data_end - xdp->data; + switch (act) { case XDP_PASS: ret = CPSW_XDP_PASS; - break; + goto out; case XDP_TX: xdpf = xdp_convert_buff_to_frame(xdp); if (unlikely(!xdpf)) @@ -1370,8 +1373,13 @@ int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp, trace_xdp_exception(ndev, prog, act); fallthrough; /* handle aborts by dropping packet */ case XDP_DROP: + ndev->stats.rx_bytes += *len; + ndev->stats.rx_packets++; goto drop; } + + ndev->stats.rx_bytes += *len; + ndev->stats.rx_packets++; out: rcu_read_unlock(); return ret; diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index 7b7f3596b20da..a323bea54faa2 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -438,7 +438,7 @@ int cpsw_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf); int cpsw_xdp_tx_frame(struct cpsw_priv *priv, struct xdp_frame *xdpf, struct page *page, int port); int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp, - struct page *page, int port); + struct page *page, int port, int *len); irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id); irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id); irqreturn_t cpsw_misc_interrupt(int irq, void *dev_id); -- GitLab From 1697291dae7cc582d8f737d788991c01b27de90d Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Thu, 4 Feb 2021 07:05:49 +0000 Subject: [PATCH 1938/2012] net: bridge: mcast: Use ERR_CAST instead of ERR_PTR(PTR_ERR()) Use ERR_CAST inlined function instead of ERR_PTR(PTR_ERR(...)). net/bridge/br_multicast.c:1246:9-16: WARNING: ERR_CAST can be used with mp Generated by: scripts/coccinelle/api/err_cast.cocci Signed-off-by: Xu Wang Link: https://lore.kernel.org/r/20210204070549.83636-1-vulab@iscas.ac.cn Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 6f672eb7ff33e..bf10ef5bbcd98 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1251,7 +1251,7 @@ __br_multicast_add_group(struct net_bridge *br, mp = br_multicast_new_group(br, group); if (IS_ERR(mp)) - return ERR_PTR(PTR_ERR(mp)); + return ERR_CAST(mp); if (!port) { br_multicast_host_join(mp, true); -- GitLab From 247b557ee52a8f404d79d365ac6b2c94d7332381 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Thu, 4 Feb 2021 15:28:20 +0800 Subject: [PATCH 1939/2012] dccp: Return the correct errno code When kalloc or kmemdup failed, should return ENOMEM rather than ENOBUF. Signed-off-by: Zheng Yongjun Link: https://lore.kernel.org/r/20210204072820.17723-1-zhengyongjun3@huawei.com Signed-off-by: Jakub Kicinski --- net/dccp/feat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 305f568048320..54086bb05c42c 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -371,7 +371,7 @@ static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len) fval->sp.vec = kmemdup(val, len, gfp_any()); if (fval->sp.vec == NULL) { fval->sp.len = 0; - return -ENOBUFS; + return -ENOMEM; } } return 0; -- GitLab From a64566a22b6a943105b01f47e8ae97779cab1417 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Thu, 4 Feb 2021 15:39:50 +0800 Subject: [PATCH 1940/2012] net: sched: Return the correct errno code When kalloc or kmemdup failed, should return ENOMEM rather than ENOBUF. Signed-off-by: Zheng Yongjun Link: https://lore.kernel.org/r/20210204073950.18372-1-zhengyongjun3@huawei.com Signed-off-by: Jakub Kicinski --- net/sched/em_nbyte.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index 2c1192a2ee5e1..a83b237cbeb06 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -31,7 +31,7 @@ static int em_nbyte_change(struct net *net, void *data, int data_len, em->datalen = sizeof(*nbyte) + nbyte->len; em->data = (unsigned long)kmemdup(data, em->datalen, GFP_KERNEL); if (em->data == 0UL) - return -ENOBUFS; + return -ENOMEM; return 0; } -- GitLab From d698e6a00a6092381f2966ac5410ac2dbcce88bc Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 4 Feb 2021 09:40:01 +0100 Subject: [PATCH 1941/2012] net: qualcomm: rmnet: Fix rx_handler for non-linear skbs There is no guarantee that rmnet rx_handler is only fed with linear skbs, but current rmnet implementation does not check that, leading to crash in case of non linear skbs processed as linear ones. Fix that by ensuring skb linearization before processing. Signed-off-by: Loic Poulain Acked-by: Willem de Bruijn Reviewed-by: Subash Abhinov Kasiviswanathan Link: https://lore.kernel.org/r/1612428002-12333-2-git-send-email-loic.poulain@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 3d7d3ab383f85..3d00b32323084 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -183,6 +183,11 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) if (!skb) goto done; + if (skb_linearize(skb)) { + kfree_skb(skb); + goto done; + } + if (skb->pkt_type == PACKET_LOOPBACK) return RX_HANDLER_PASS; -- GitLab From c1fcda2bdfd04179dbc81320a24baa539b476281 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 4 Feb 2021 09:40:00 +0100 Subject: [PATCH 1942/2012] net: mhi-net: Add re-aggregation of fragmented packets When device side MTU is larger than host side MTU, the packets (typically rmnet packets) are split over multiple MHI transfers. In that case, fragments must be re-aggregated to recover the packet before forwarding to upper layer. A fragmented packet result in -EOVERFLOW MHI transaction status for each of its fragments, except the final one. Such transfer was previously considered as error and fragments were simply dropped. This change adds re-aggregation mechanism using skb chaining, via skb frag_list. A warning (once) is printed since this behavior usually comes from a misconfiguration of the device (e.g. modem MTU). Signed-off-by: Loic Poulain Acked-by: Jesse Brandeburg Link: https://lore.kernel.org/r/1612428002-12333-1-git-send-email-loic.poulain@linaro.org Signed-off-by: Jakub Kicinski --- drivers/net/mhi_net.c | 74 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 10 deletions(-) diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c index 4f512531b7d0e..8800991937346 100644 --- a/drivers/net/mhi_net.c +++ b/drivers/net/mhi_net.c @@ -32,6 +32,8 @@ struct mhi_net_stats { struct mhi_net_dev { struct mhi_device *mdev; struct net_device *ndev; + struct sk_buff *skbagg_head; + struct sk_buff *skbagg_tail; struct delayed_work rx_refill; struct mhi_net_stats stats; u32 rx_queue_sz; @@ -132,6 +134,32 @@ static void mhi_net_setup(struct net_device *ndev) ndev->tx_queue_len = 1000; } +static struct sk_buff *mhi_net_skb_agg(struct mhi_net_dev *mhi_netdev, + struct sk_buff *skb) +{ + struct sk_buff *head = mhi_netdev->skbagg_head; + struct sk_buff *tail = mhi_netdev->skbagg_tail; + + /* This is non-paged skb chaining using frag_list */ + if (!head) { + mhi_netdev->skbagg_head = skb; + return skb; + } + + if (!skb_shinfo(head)->frag_list) + skb_shinfo(head)->frag_list = skb; + else + tail->next = skb; + + head->len += skb->len; + head->data_len += skb->len; + head->truesize += skb->truesize; + + mhi_netdev->skbagg_tail = skb; + + return mhi_netdev->skbagg_head; +} + static void mhi_net_dl_callback(struct mhi_device *mhi_dev, struct mhi_result *mhi_res) { @@ -142,19 +170,42 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); if (unlikely(mhi_res->transaction_status)) { - dev_kfree_skb_any(skb); - - /* MHI layer stopping/resetting the DL channel */ - if (mhi_res->transaction_status == -ENOTCONN) + switch (mhi_res->transaction_status) { + case -EOVERFLOW: + /* Packet can not fit in one MHI buffer and has been + * split over multiple MHI transfers, do re-aggregation. + * That usually means the device side MTU is larger than + * the host side MTU/MRU. Since this is not optimal, + * print a warning (once). + */ + netdev_warn_once(mhi_netdev->ndev, + "Fragmented packets received, fix MTU?\n"); + skb_put(skb, mhi_res->bytes_xferd); + mhi_net_skb_agg(mhi_netdev, skb); + break; + case -ENOTCONN: + /* MHI layer stopping/resetting the DL channel */ + dev_kfree_skb_any(skb); return; - - u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); - u64_stats_inc(&mhi_netdev->stats.rx_errors); - u64_stats_update_end(&mhi_netdev->stats.rx_syncp); + default: + /* Unknown error, simply drop */ + dev_kfree_skb_any(skb); + u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); + u64_stats_inc(&mhi_netdev->stats.rx_errors); + u64_stats_update_end(&mhi_netdev->stats.rx_syncp); + } } else { + skb_put(skb, mhi_res->bytes_xferd); + + if (mhi_netdev->skbagg_head) { + /* Aggregate the final fragment */ + skb = mhi_net_skb_agg(mhi_netdev, skb); + mhi_netdev->skbagg_head = NULL; + } + u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); u64_stats_inc(&mhi_netdev->stats.rx_packets); - u64_stats_add(&mhi_netdev->stats.rx_bytes, mhi_res->bytes_xferd); + u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len); u64_stats_update_end(&mhi_netdev->stats.rx_syncp); switch (skb->data[0] & 0xf0) { @@ -169,7 +220,6 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, break; } - skb_put(skb, mhi_res->bytes_xferd); netif_rx(skb); } @@ -267,6 +317,7 @@ static int mhi_net_probe(struct mhi_device *mhi_dev, dev_set_drvdata(dev, mhi_netdev); mhi_netdev->ndev = ndev; mhi_netdev->mdev = mhi_dev; + mhi_netdev->skbagg_head = NULL; SET_NETDEV_DEV(ndev, &mhi_dev->dev); SET_NETDEV_DEVTYPE(ndev, &wwan_type); @@ -301,6 +352,9 @@ static void mhi_net_remove(struct mhi_device *mhi_dev) mhi_unprepare_from_transfer(mhi_netdev->mdev); + if (mhi_netdev->skbagg_head) + kfree_skb(mhi_netdev->skbagg_head); + free_netdev(mhi_netdev->ndev); } -- GitLab From a455fcd7c77046d576dcfe41c1361928dd8b5eaf Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 4 Feb 2021 09:49:44 +0000 Subject: [PATCH 1943/2012] net: dwc-xlgmac: Fix spelling mistake in function name There is a spelling mistake in the function name alloc_channles_and_rings. Fix this by renaming it to alloc_channels_and_rings. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210204094944.51460-1-colin.king@canonical.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c | 2 +- drivers/net/ethernet/synopsys/dwc-xlgmac-net.c | 2 +- drivers/net/ethernet/synopsys/dwc-xlgmac.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c index 8c4195a9a2cc6..589797bad1f9d 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c @@ -634,7 +634,7 @@ err_out: void xlgmac_init_desc_ops(struct xlgmac_desc_ops *desc_ops) { - desc_ops->alloc_channles_and_rings = xlgmac_alloc_channels_and_rings; + desc_ops->alloc_channels_and_rings = xlgmac_alloc_channels_and_rings; desc_ops->free_channels_and_rings = xlgmac_free_channels_and_rings; desc_ops->map_tx_skb = xlgmac_map_tx_skb; desc_ops->map_rx_buffer = xlgmac_map_rx_buffer; diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c index 26aa7f32151f1..26d178f8616b6 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c @@ -654,7 +654,7 @@ static int xlgmac_open(struct net_device *netdev) pdata->rx_buf_size = ret; /* Allocate the channels and rings */ - ret = desc_ops->alloc_channles_and_rings(pdata); + ret = desc_ops->alloc_channels_and_rings(pdata); if (ret) return ret; diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac.h b/drivers/net/ethernet/synopsys/dwc-xlgmac.h index cab3e40a86b93..8598aaf3ec994 100644 --- a/drivers/net/ethernet/synopsys/dwc-xlgmac.h +++ b/drivers/net/ethernet/synopsys/dwc-xlgmac.h @@ -379,7 +379,7 @@ struct xlgmac_channel { } ____cacheline_aligned; struct xlgmac_desc_ops { - int (*alloc_channles_and_rings)(struct xlgmac_pdata *pdata); + int (*alloc_channels_and_rings)(struct xlgmac_pdata *pdata); void (*free_channels_and_rings)(struct xlgmac_pdata *pdata); int (*map_tx_skb)(struct xlgmac_channel *channel, struct sk_buff *skb); -- GitLab From b358e2122b9d7aa99f681d4edfafd999845d16ff Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 4 Feb 2021 18:56:35 +0800 Subject: [PATCH 1944/2012] mm: page_frag: Introduce page_frag_alloc_align() In the current implementation of page_frag_alloc(), it doesn't have any align guarantee for the returned buffer address. But for some hardwares they do require the DMA buffer to be aligned correctly, so we would have to use some workarounds like below if the buffers allocated by the page_frag_alloc() are used by these hardwares for DMA. buf = page_frag_alloc(really_needed_size + align); buf = PTR_ALIGN(buf, align); These codes seems ugly and would waste a lot of memories if the buffers are used in a network driver for the TX/RX. So introduce page_frag_alloc_align() to make sure that an aligned buffer address is returned. Signed-off-by: Kevin Hao Acked-by: Vlastimil Babka Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- include/linux/gfp.h | 12 ++++++++++-- mm/page_alloc.c | 8 +++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 6e479e9c48ceb..80544d5c08e7c 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -583,8 +583,16 @@ extern void free_pages(unsigned long addr, unsigned int order); struct page_frag_cache; extern void __page_frag_cache_drain(struct page *page, unsigned int count); -extern void *page_frag_alloc(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask); +extern void *page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align_mask); + +static inline void *page_frag_alloc(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask) +{ + return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); +} + extern void page_frag_free(void *addr); #define __free_page(page) __free_pages((page), 0) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 519a60d5b6f7d..ef5070fed76b9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5137,8 +5137,9 @@ void __page_frag_cache_drain(struct page *page, unsigned int count) } EXPORT_SYMBOL(__page_frag_cache_drain); -void *page_frag_alloc(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask) +void *page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align_mask) { unsigned int size = PAGE_SIZE; struct page *page; @@ -5190,11 +5191,12 @@ refill: } nc->pagecnt_bias--; + offset &= align_mask; nc->offset = offset; return nc->va + offset; } -EXPORT_SYMBOL(page_frag_alloc); +EXPORT_SYMBOL(page_frag_alloc_align); /* * Frees a page fragment allocated out of either a compound or order 0 page. -- GitLab From 3f6e687dff395da43b056c18150a423bc7bf5d14 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 4 Feb 2021 18:56:36 +0800 Subject: [PATCH 1945/2012] net: Introduce {netdev,napi}_alloc_frag_align() In the current implementation of {netdev,napi}_alloc_frag(), it doesn't have any align guarantee for the returned buffer address, But for some hardwares they do require the DMA buffer to be aligned correctly, so we would have to use some workarounds like below if the buffers allocated by the {netdev,napi}_alloc_frag() are used by these hardwares for DMA. buf = napi_alloc_frag(really_needed_size + align); buf = PTR_ALIGN(buf, align); These codes seems ugly and would waste a lot of memories if the buffers are used in a network driver for the TX/RX. We have added the align support for the page_frag functions, so add the corresponding {netdev,napi}_frag functions. Signed-off-by: Kevin Hao Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 36 ++++++++++++++++++++++++++++++++++-- net/core/skbuff.c | 26 ++++++++++---------------- 2 files changed, 44 insertions(+), 18 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0e42c53b8ca93..0a4e91a2f8732 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2818,7 +2818,26 @@ void skb_queue_purge(struct sk_buff_head *list); unsigned int skb_rbtree_purge(struct rb_root *root); -void *netdev_alloc_frag(unsigned int fragsz); +void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); + +/** + * netdev_alloc_frag - allocate a page fragment + * @fragsz: fragment size + * + * Allocates a frag from a page for receive buffer. + * Uses GFP_ATOMIC allocations. + */ +static inline void *netdev_alloc_frag(unsigned int fragsz) +{ + return __netdev_alloc_frag_align(fragsz, ~0u); +} + +static inline void *netdev_alloc_frag_align(unsigned int fragsz, + unsigned int align) +{ + WARN_ON_ONCE(!is_power_of_2(align)); + return __netdev_alloc_frag_align(fragsz, -align); +} struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, gfp_t gfp_mask); @@ -2877,7 +2896,20 @@ static inline void skb_free_frag(void *addr) page_frag_free(addr); } -void *napi_alloc_frag(unsigned int fragsz); +void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); + +static inline void *napi_alloc_frag(unsigned int fragsz) +{ + return __napi_alloc_frag_align(fragsz, ~0u); +} + +static inline void *napi_alloc_frag_align(unsigned int fragsz, + unsigned int align) +{ + WARN_ON_ONCE(!is_power_of_2(align)); + return __napi_alloc_frag_align(fragsz, -align); +} + struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int length, gfp_t gfp_mask); static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3787093239f58..d380c7b5a12dd 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -374,29 +374,23 @@ struct napi_alloc_cache { static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); -static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) +static void *__alloc_frag_align(unsigned int fragsz, gfp_t gfp_mask, + unsigned int align_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); - return page_frag_alloc(&nc->page, fragsz, gfp_mask); + return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align_mask); } -void *napi_alloc_frag(unsigned int fragsz) +void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { fragsz = SKB_DATA_ALIGN(fragsz); - return __napi_alloc_frag(fragsz, GFP_ATOMIC); + return __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask); } -EXPORT_SYMBOL(napi_alloc_frag); +EXPORT_SYMBOL(__napi_alloc_frag_align); -/** - * netdev_alloc_frag - allocate a page fragment - * @fragsz: fragment size - * - * Allocates a frag from a page for receive buffer. - * Uses GFP_ATOMIC allocations. - */ -void *netdev_alloc_frag(unsigned int fragsz) +void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { struct page_frag_cache *nc; void *data; @@ -404,15 +398,15 @@ void *netdev_alloc_frag(unsigned int fragsz) fragsz = SKB_DATA_ALIGN(fragsz); if (in_irq() || irqs_disabled()) { nc = this_cpu_ptr(&netdev_alloc_cache); - data = page_frag_alloc(nc, fragsz, GFP_ATOMIC); + data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask); } else { local_bh_disable(); - data = __napi_alloc_frag(fragsz, GFP_ATOMIC); + data = __alloc_frag_align(fragsz, GFP_ATOMIC, align_mask); local_bh_enable(); } return data; } -EXPORT_SYMBOL(netdev_alloc_frag); +EXPORT_SYMBOL(__netdev_alloc_frag_align); /** * __netdev_alloc_skb - allocate an skbuff for rx on a specific device -- GitLab From 1b041601c798a1a6bb3a651ce17aefd41979a1e2 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 4 Feb 2021 18:56:37 +0800 Subject: [PATCH 1946/2012] net: octeontx2: Use napi_alloc_frag_align() to avoid the memory waste The napi_alloc_frag_align() will guarantee that a correctly align buffer address is returned. So use this function to simplify the buffer alloc and avoid the unnecessary memory waste. Signed-off-by: Kevin Hao Tested-by: Subbaraya Sundeep Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 5ddedc3b754d5..cbd68fa9f1d62 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -488,11 +488,10 @@ dma_addr_t __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool) dma_addr_t iova; u8 *buf; - buf = napi_alloc_frag(pool->rbsize + OTX2_ALIGN); + buf = napi_alloc_frag_align(pool->rbsize, OTX2_ALIGN); if (unlikely(!buf)) return -ENOMEM; - buf = PTR_ALIGN(buf, OTX2_ALIGN); iova = dma_map_single_attrs(pfvf->dev, buf, pool->rbsize, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); if (unlikely(dma_mapping_error(pfvf->dev, iova))) { -- GitLab From d0dfbb9912d9477578f41c5200d7eac3da899dce Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 4 Feb 2021 18:56:38 +0800 Subject: [PATCH 1947/2012] net: dpaa2: Use napi_alloc_frag_align() to avoid the memory waste The napi_alloc_frag_align() will guarantee that a correctly align buffer address is returned. So use this function to simplify the buffer alloc and avoid the unnecessary memory waste. Signed-off-by: Kevin Hao Reviewed-by: Ioana Ciornei Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index c5ceebca8caeb..19f74d4cbb4eb 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -764,12 +764,11 @@ static int dpaa2_eth_build_sg_fd(struct dpaa2_eth_priv *priv, /* Prepare the HW SGT structure */ sgt_buf_size = priv->tx_data_offset + sizeof(struct dpaa2_sg_entry) * num_dma_bufs; - sgt_buf = napi_alloc_frag(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN); + sgt_buf = napi_alloc_frag_align(sgt_buf_size, DPAA2_ETH_TX_BUF_ALIGN); if (unlikely(!sgt_buf)) { err = -ENOMEM; goto sgt_buf_alloc_failed; } - sgt_buf = PTR_ALIGN(sgt_buf, DPAA2_ETH_TX_BUF_ALIGN); memset(sgt_buf, 0, sgt_buf_size); sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset); -- GitLab From 8cc8993cbcee7dd4a8763e70ef46aba327dcac00 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Thu, 4 Feb 2021 18:39:47 +0100 Subject: [PATCH 1948/2012] net: wan: farsync: use new tasklet API This converts the driver to use the new tasklet API introduced in commit 12cc923f1ccc ("tasklet: Introduce new initialization API") The new API changes the argument passed to callback functions, but fortunately it is unused so it is straight forward to use DECLARE_TASKLET rather than DECLARE_TASLKLET_OLD. Signed-off-by: Emil Renner Berthing Link: https://lore.kernel.org/r/20210204173947.92884-1-kernel@esmil.dk Signed-off-by: Jakub Kicinski --- drivers/net/wan/farsync.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index b50cf11d197d5..686a25d3b5121 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -566,11 +566,11 @@ MODULE_DEVICE_TABLE(pci, fst_pci_dev_id); static void do_bottom_half_tx(struct fst_card_info *card); static void do_bottom_half_rx(struct fst_card_info *card); -static void fst_process_tx_work_q(unsigned long work_q); -static void fst_process_int_work_q(unsigned long work_q); +static void fst_process_tx_work_q(struct tasklet_struct *unused); +static void fst_process_int_work_q(struct tasklet_struct *unused); -static DECLARE_TASKLET_OLD(fst_tx_task, fst_process_tx_work_q); -static DECLARE_TASKLET_OLD(fst_int_task, fst_process_int_work_q); +static DECLARE_TASKLET(fst_tx_task, fst_process_tx_work_q); +static DECLARE_TASKLET(fst_int_task, fst_process_int_work_q); static struct fst_card_info *fst_card_array[FST_MAX_CARDS]; static spinlock_t fst_work_q_lock; @@ -600,7 +600,7 @@ fst_q_work_item(u64 * queue, int card_index) } static void -fst_process_tx_work_q(unsigned long /*void **/work_q) +fst_process_tx_work_q(struct tasklet_struct *unused) { unsigned long flags; u64 work_txq; @@ -630,7 +630,7 @@ fst_process_tx_work_q(unsigned long /*void **/work_q) } static void -fst_process_int_work_q(unsigned long /*void **/work_q) +fst_process_int_work_q(struct tasklet_struct *unused) { unsigned long flags; u64 work_intq; -- GitLab From 694a0006c0b15ed22aa53dc4b244d64c5f12e45e Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 5 Feb 2021 10:39:59 +0000 Subject: [PATCH 1949/2012] net: pcs: add pcs-lynx 1000BASE-X support Add support for 1000BASE-X to pcs-lynx for the LX2160A. This commit prepares the ground work for allowing 1G fiber connections to be used with DPAA2 on the SolidRun CEX7 platforms. Reviewed-by: Ioana Ciornei Signed-off-by: Russell King Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-lynx.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c index 62bb9272dcb26..af36cd647bf54 100644 --- a/drivers/net/pcs/pcs-lynx.c +++ b/drivers/net/pcs/pcs-lynx.c @@ -11,6 +11,7 @@ #define LINK_TIMER_VAL(ns) ((u32)((ns) / SGMII_CLOCK_PERIOD_NS)) #define SGMII_AN_LINK_TIMER_NS 1600000 /* defined by SGMII spec */ +#define IEEE8023_LINK_TIMER_NS 10000000 #define LINK_TIMER_LO 0x12 #define LINK_TIMER_HI 0x13 @@ -83,6 +84,7 @@ static void lynx_pcs_get_state(struct phylink_pcs *pcs, struct lynx_pcs *lynx = phylink_pcs_to_lynx(pcs); switch (state->interface) { + case PHY_INTERFACE_MODE_1000BASEX: case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_QSGMII: phylink_mii_c22_pcs_get_state(lynx->mdio, state); @@ -108,6 +110,30 @@ static void lynx_pcs_get_state(struct phylink_pcs *pcs, state->link, state->an_enabled, state->an_complete); } +static int lynx_pcs_config_1000basex(struct mdio_device *pcs, + unsigned int mode, + const unsigned long *advertising) +{ + struct mii_bus *bus = pcs->bus; + int addr = pcs->addr; + u32 link_timer; + int err; + + link_timer = LINK_TIMER_VAL(IEEE8023_LINK_TIMER_NS); + mdiobus_write(bus, addr, LINK_TIMER_LO, link_timer & 0xffff); + mdiobus_write(bus, addr, LINK_TIMER_HI, link_timer >> 16); + + err = mdiobus_modify(bus, addr, IF_MODE, + IF_MODE_SGMII_EN | IF_MODE_USE_SGMII_AN, + 0); + if (err) + return err; + + return phylink_mii_c22_pcs_config(pcs, mode, + PHY_INTERFACE_MODE_1000BASEX, + advertising); +} + static int lynx_pcs_config_sgmii(struct mdio_device *pcs, unsigned int mode, const unsigned long *advertising) { @@ -163,6 +189,8 @@ static int lynx_pcs_config(struct phylink_pcs *pcs, unsigned int mode, struct lynx_pcs *lynx = phylink_pcs_to_lynx(pcs); switch (ifmode) { + case PHY_INTERFACE_MODE_1000BASEX: + return lynx_pcs_config_1000basex(lynx->mdio, mode, advertising); case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_QSGMII: return lynx_pcs_config_sgmii(lynx->mdio, mode, advertising); @@ -185,6 +213,13 @@ static int lynx_pcs_config(struct phylink_pcs *pcs, unsigned int mode, return 0; } +static void lynx_pcs_an_restart(struct phylink_pcs *pcs) +{ + struct lynx_pcs *lynx = phylink_pcs_to_lynx(pcs); + + phylink_mii_c22_pcs_an_restart(lynx->mdio); +} + static void lynx_pcs_link_up_sgmii(struct mdio_device *pcs, unsigned int mode, int speed, int duplex) { @@ -290,6 +325,7 @@ static void lynx_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode, static const struct phylink_pcs_ops lynx_pcs_phylink_ops = { .pcs_get_state = lynx_pcs_get_state, .pcs_config = lynx_pcs_config, + .pcs_an_restart = lynx_pcs_an_restart, .pcs_link_up = lynx_pcs_link_up, }; -- GitLab From 46c518c8145bb23702d5b860c1bcdc7c51bdc3d4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 5 Feb 2021 10:40:04 +0000 Subject: [PATCH 1950/2012] net: dpaa2-mac: add 1000BASE-X support Now that pcs-lynx supports 1000BASE-X, add support for this interface mode to dpaa2-mac. pcs-lynx can be switched at runtime between SGMII and 1000BASE-X mode, so allow dpaa2-mac to switch between these as well. This commit prepares the ground work for allowing 1G fiber connections to be used with DPAA2 on the SolidRun CEX7 platforms. Reviewed-by: Ioana Ciornei Signed-off-by: Russell King Signed-off-by: Jakub Kicinski --- .../net/ethernet/freescale/dpaa2/dpaa2-mac.c | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index 69ad869446cfc..3ddfb40eb5e4d 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -79,10 +79,20 @@ static bool dpaa2_mac_phy_mode_mismatch(struct dpaa2_mac *mac, phy_interface_t interface) { switch (interface) { + /* We can switch between SGMII and 1000BASE-X at runtime with + * pcs-lynx + */ + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_1000BASEX: + if (mac->pcs && + (mac->if_mode == PHY_INTERFACE_MODE_SGMII || + mac->if_mode == PHY_INTERFACE_MODE_1000BASEX)) + return false; + return interface != mac->if_mode; + case PHY_INTERFACE_MODE_10GBASER: case PHY_INTERFACE_MODE_USXGMII: case PHY_INTERFACE_MODE_QSGMII: - case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_RXID: @@ -122,13 +132,17 @@ static void dpaa2_mac_validate(struct phylink_config *config, fallthrough; case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_QSGMII: + case PHY_INTERFACE_MODE_1000BASEX: case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_TXID: - phylink_set(mask, 10baseT_Full); - phylink_set(mask, 100baseT_Full); + phylink_set(mask, 1000baseX_Full); phylink_set(mask, 1000baseT_Full); + if (state->interface == PHY_INTERFACE_MODE_1000BASEX) + break; + phylink_set(mask, 100baseT_Full); + phylink_set(mask, 10baseT_Full); break; default: goto empty_set; -- GitLab From 085f1776fa03bc771876aabf086de11f3e2ce59c Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 5 Feb 2021 10:40:09 +0000 Subject: [PATCH 1951/2012] net: dpaa2-mac: add backplane link mode support Add support for backplane link mode, which is, according to discussions with NXP earlier in the year, is a mode where the OS (Linux) is able to manage the PCS and Serdes itself. This commit prepares the ground work for allowing 1G fiber connections to be used with DPAA2 on the SolidRun CEX7 platforms. Signed-off-by: Russell King Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h | 4 +++- drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index c3d456c45102a..9b6a89709ce14 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -695,7 +695,9 @@ static inline unsigned int dpaa2_eth_rx_head_room(struct dpaa2_eth_priv *priv) static inline bool dpaa2_eth_is_type_phy(struct dpaa2_eth_priv *priv) { - if (priv->mac && priv->mac->attr.link_type == DPMAC_LINK_TYPE_PHY) + if (priv->mac && + (priv->mac->attr.link_type == DPMAC_LINK_TYPE_PHY || + priv->mac->attr.link_type == DPMAC_LINK_TYPE_BACKPLANE)) return true; return false; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index 3ddfb40eb5e4d..ccaf7e35abeba 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -315,8 +315,9 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac) goto err_put_node; } - if (mac->attr.link_type == DPMAC_LINK_TYPE_PHY && - mac->attr.eth_if != DPMAC_ETH_IF_RGMII) { + if ((mac->attr.link_type == DPMAC_LINK_TYPE_PHY && + mac->attr.eth_if != DPMAC_ETH_IF_RGMII) || + mac->attr.link_type == DPMAC_LINK_TYPE_BACKPLANE) { err = dpaa2_pcs_create(mac, dpmac_node, mac->attr.id); if (err) goto err_put_node; -- GitLab From 1002b89f23eaa6d48ca1d2f362e894086bd063f1 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 4 Feb 2021 15:23:29 -0800 Subject: [PATCH 1952/2012] selftests: mptcp: add command line arguments for mptcp_join.sh Since the mptcp_join script is becoming too big, this patch splits it into several smaller chunks, each of them has been defined in a function as a individual test group for several related testcases. Using bash getopts function to parse command line arguments, and invoke each function to do the individual test group. Here are all the arguments: -f subflows_tests -s signal_address_tests -l link_failure_tests -t add_addr_timeout_tests -r remove_tests -a add_tests -6 ipv6_tests -4 v4mapped_tests -b backup_tests -p add_addr_ports_tests -c syncookies_tests -h help Run mptcp_join.sh with no argument will execute all testcases. Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 1068 +++++++++-------- 1 file changed, 590 insertions(+), 478 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index b8fd924033b1e..964db9ed544f9 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -680,6 +680,551 @@ chk_prio_nr() fi } +subflows_tests() +{ + reset + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "no JOIN" "0" "0" "0" + + # subflow limited by client + reset + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "single subflow, limited by client" 0 0 0 + + # subflow limited by server + reset + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "single subflow, limited by server" 1 1 0 + + # subflow + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "single subflow" 1 1 1 + + # multiple subflows + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "multiple subflows" 2 2 2 + + # multiple subflows limited by serverf + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "multiple subflows, limited by server" 2 2 1 +} + +signal_address_tests() +{ + # add_address, unused + reset + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "unused signal address" 0 0 0 + chk_add_nr 1 1 + + # accept and use add_addr + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal address" 1 1 1 + chk_add_nr 1 1 + + # accept and use add_addr with an additional subflow + # note: signal address in server ns and local addresses in client ns must + # belong to different subnets or one of the listed local address could be + # used for 'add_addr' subflow + reset + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 1 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "subflow and signal" 2 2 2 + chk_add_nr 1 1 + + # accept and use add_addr with additional subflows + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "multiple subflows and signal" 3 3 3 + chk_add_nr 1 1 +} + +link_failure_tests() +{ + # accept and use add_addr with additional subflows and link loss + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 1 + chk_join_nr "multiple flows, signal, link failure" 3 3 3 + chk_add_nr 1 1 +} + +add_addr_timeout_tests() +{ + # add_addr timeout + reset_with_add_addr_timeout + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1 + chk_add_nr 4 0 + + # add_addr timeout IPv6 + reset_with_add_addr_timeout 6 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal + run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1 + chk_add_nr 4 0 +} + +remove_tests() +{ + # single subflow, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow + chk_join_nr "remove single subflow" 1 1 1 + chk_rm_nr 1 1 + + # multiple subflows, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 -2 slow + chk_join_nr "remove multiple subflows" 2 2 2 + chk_rm_nr 2 2 + + # single address, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow + chk_join_nr "remove single address" 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 0 0 + + # subflow and signal, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow + chk_join_nr "remove subflow and signal" 2 2 2 + chk_add_nr 1 1 + chk_rm_nr 1 1 + + # subflows and signal, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow + chk_join_nr "remove subflows and signal" 3 3 3 + chk_add_nr 1 1 + chk_rm_nr 2 2 + + # subflows and signal, flush + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + chk_join_nr "flush subflows and signal" 3 3 3 + chk_add_nr 1 1 + chk_rm_nr 2 2 +} + +add_tests() +{ + # add single subflow + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow + chk_join_nr "add single subflow" 1 1 1 + + # add signal address + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow + chk_join_nr "add signal address" 1 1 1 + chk_add_nr 1 1 + + # add multiple subflows + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow + chk_join_nr "add multiple subflows" 2 2 2 + + # add multiple subflows IPv6 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + run_tests $ns1 $ns2 dead:beef:1::1 0 0 2 slow + chk_join_nr "add multiple subflows IPv6" 2 2 2 + + # add multiple addresses IPv6 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + run_tests $ns1 $ns2 dead:beef:1::1 0 2 0 slow + chk_join_nr "add multiple addresses IPv6" 2 2 2 + chk_add_nr 2 2 +} + +ipv6_tests() +{ + # subflow IPv6 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow + run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + chk_join_nr "single subflow IPv6" 1 1 1 + + # add_address, unused IPv6 + reset + ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal + run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + chk_join_nr "unused signal address IPv6" 0 0 0 + chk_add_nr 1 1 + + # signal address IPv6 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + chk_join_nr "single address IPv6" 1 1 1 + chk_add_nr 1 1 + + # single address IPv6, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow + chk_join_nr "remove single address IPv6" 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 0 0 + + # subflow and signal IPv6, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 2 + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow + run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow + chk_join_nr "remove subflow and signal IPv6" 2 2 2 + chk_add_nr 1 1 + chk_rm_nr 1 1 +} + +v4mapped_tests() +{ + # subflow IPv4-mapped to IPv4-mapped + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr "single subflow IPv4-mapped" 1 1 1 + + # signal address IPv4-mapped with IPv4-mapped sk + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr "signal address IPv4-mapped" 1 1 1 + chk_add_nr 1 1 + + # subflow v4-map-v6 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr "single subflow v4-map-v6" 1 1 1 + + # signal address v4-map-v6 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 "::ffff:10.0.1.1" + chk_join_nr "signal address v4-map-v6" 1 1 1 + chk_add_nr 1 1 + + # subflow v6-map-v4 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "single subflow v6-map-v4" 1 1 1 + + # signal address v6-map-v4 + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal address v6-map-v4" 1 1 1 + chk_add_nr 1 1 + + # no subflow IPv6 to v4 address + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "no JOIN with diff families v4-v6" 0 0 0 + + # no subflow IPv6 to v4 address even if v6 has a valid v4 at the end + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "no JOIN with diff families v4-v6-2" 0 0 0 + + # no subflow IPv4 to v6 address, no need to slow down too then + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr "no JOIN with diff families v6-v4" 0 0 0 +} + +backup_tests() +{ + # single subflow, backup + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,backup + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup + chk_join_nr "single subflow, backup" 1 1 1 + chk_prio_nr 0 1 + + # single address, backup + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + chk_join_nr "single address, backup" 1 1 1 + chk_add_nr 1 1 + chk_prio_nr 1 0 +} + +add_addr_ports_tests() +{ + # signal address with port + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal address with port" 1 1 1 + chk_add_nr 1 1 1 + + # subflow and signal with port + reset + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 1 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "subflow and signal with port" 2 2 2 + chk_add_nr 1 1 1 + + # single address with port, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow + chk_join_nr "remove single address with port" 1 1 1 + chk_add_nr 1 1 1 + chk_rm_nr 0 0 + + # subflow and signal with port, remove + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 + ip netns exec $ns2 ./pm_nl_ctl limits 1 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow + chk_join_nr "remove subflow and signal with port" 2 2 2 + chk_add_nr 1 1 1 + chk_rm_nr 1 1 + + # subflows and signal with port, flush + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + chk_join_nr "flush subflows and signal with port" 3 3 3 + chk_add_nr 1 1 + chk_rm_nr 2 2 + + # multiple addresses with port + reset + ip netns exec $ns1 ./pm_nl_ctl limits 2 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10100 + ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "multiple addresses with port" 2 2 2 + chk_add_nr 2 2 2 + + # multiple addresses with ports + reset + ip netns exec $ns1 ./pm_nl_ctl limits 2 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10101 + ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "multiple addresses with ports" 2 2 2 + chk_add_nr 2 2 2 +} + +syncookies_tests() +{ + # single subflow, syncookies + reset_with_cookies + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "single subflow with syn cookies" 1 1 1 + + # multiple subflows with syn cookies + reset_with_cookies + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "multiple subflows with syn cookies" 2 2 2 + + # multiple subflows limited by server + reset_with_cookies + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "subflows limited by server w cookies" 2 2 1 + + # test signal address with cookies + reset_with_cookies + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal address with syn cookies" 1 1 1 + chk_add_nr 1 1 + + # test cookie with subflow and signal + reset_with_cookies + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 1 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "subflow and signal w cookies" 2 2 2 + chk_add_nr 1 1 + + # accept and use add_addr with additional subflows + reset_with_cookies + ip netns exec $ns1 ./pm_nl_ctl limits 0 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 1 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "subflows and signal w. cookies" 3 3 3 + chk_add_nr 1 1 +} + +all_tests() +{ + subflows_tests + signal_address_tests + link_failure_tests + add_addr_timeout_tests + remove_tests + add_tests + ipv6_tests + v4mapped_tests + backup_tests + add_addr_ports_tests + syncookies_tests +} + +usage() +{ + echo "mptcp_join usage:" + echo " -f subflows_tests" + echo " -s signal_address_tests" + echo " -l link_failure_tests" + echo " -t add_addr_timeout_tests" + echo " -r remove_tests" + echo " -a add_tests" + echo " -6 ipv6_tests" + echo " -4 v4mapped_tests" + echo " -b backup_tests" + echo " -p add_addr_ports_tests" + echo " -c syncookies_tests" + echo " -h help" +} + sin=$(mktemp) sout=$(mktemp) cin=$(mktemp) @@ -690,483 +1235,50 @@ make_file "$cin" "client" 1 make_file "$sin" "server" 1 trap cleanup EXIT -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "no JOIN" "0" "0" "0" - -# subflow limted by client -reset -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow, limited by client" 0 0 0 - -# subflow limted by server -reset -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow, limited by server" 1 1 0 - -# subflow -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow" 1 1 1 - -# multiple subflows -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple subflows" 2 2 2 - -# multiple subflows limited by serverf -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple subflows, limited by server" 2 2 1 - -# add_address, unused -reset -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "unused signal address" 0 0 0 -chk_add_nr 1 1 - -# accept and use add_addr -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "signal address" 1 1 1 -chk_add_nr 1 1 - -# accept and use add_addr with an additional subflow -# note: signal address in server ns and local addresses in client ns must -# belong to different subnets or one of the listed local address could be -# used for 'add_addr' subflow -reset -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 1 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "subflow and signal" 2 2 2 -chk_add_nr 1 1 - -# accept and use add_addr with additional subflows -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 3 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple subflows and signal" 3 3 3 -chk_add_nr 1 1 - -# accept and use add_addr with additional subflows and link loss -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 3 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 1 -chk_join_nr "multiple flows, signal, link failure" 3 3 3 -chk_add_nr 1 1 - -# add_addr timeout -reset_with_add_addr_timeout -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow -chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1 -chk_add_nr 4 0 - -# single subflow, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow -chk_join_nr "remove single subflow" 1 1 1 -chk_rm_nr 1 1 - -# multiple subflows, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 0 -2 slow -chk_join_nr "remove multiple subflows" 2 2 2 -chk_rm_nr 2 2 - -# single address, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow -chk_join_nr "remove single address" 1 1 1 -chk_add_nr 1 1 -chk_rm_nr 0 0 - -# subflow and signal, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow -chk_join_nr "remove subflow and signal" 2 2 2 -chk_add_nr 1 1 -chk_rm_nr 1 1 - -# subflows and signal, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 3 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 slow -chk_join_nr "remove subflows and signal" 3 3 3 -chk_add_nr 1 1 -chk_rm_nr 2 2 - -# subflows and signal, flush -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 3 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow -chk_join_nr "flush subflows and signal" 3 3 3 -chk_add_nr 1 1 -chk_rm_nr 2 2 - -# add single subflow -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow -chk_join_nr "add single subflow" 1 1 1 - -# add signal address -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow -chk_join_nr "add signal address" 1 1 1 -chk_add_nr 1 1 - -# add multiple subflows -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow -chk_join_nr "add multiple subflows" 2 2 2 - -# add multiple subflows IPv6 -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -run_tests $ns1 $ns2 dead:beef:1::1 0 0 2 slow -chk_join_nr "add multiple subflows IPv6" 2 2 2 - -# add multiple addresses IPv6 -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 2 2 -run_tests $ns1 $ns2 dead:beef:1::1 0 2 0 slow -chk_join_nr "add multiple addresses IPv6" 2 2 2 -chk_add_nr 2 2 - -# subflow IPv6 -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow -run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow -chk_join_nr "single subflow IPv6" 1 1 1 - -# add_address, unused IPv6 -reset -ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal -run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow -chk_join_nr "unused signal address IPv6" 0 0 0 -chk_add_nr 1 1 - -# signal address IPv6 -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow -chk_join_nr "single address IPv6" 1 1 1 -chk_add_nr 1 1 - -# add_addr timeout IPv6 -reset_with_add_addr_timeout 6 -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal -run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow -chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1 -chk_add_nr 4 0 - -# single address IPv6, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow -chk_join_nr "remove single address IPv6" 1 1 1 -chk_add_nr 1 1 -chk_rm_nr 0 0 - -# subflow and signal IPv6, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 2 -ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow -run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow -chk_join_nr "remove subflow and signal IPv6" 2 2 2 -chk_add_nr 1 1 -chk_rm_nr 1 1 - -# subflow IPv4-mapped to IPv4-mapped -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow -run_tests $ns1 $ns2 "::ffff:10.0.1.1" -chk_join_nr "single subflow IPv4-mapped" 1 1 1 - -# signal address IPv4-mapped with IPv4-mapped sk -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal -run_tests $ns1 $ns2 "::ffff:10.0.1.1" -chk_join_nr "signal address IPv4-mapped" 1 1 1 -chk_add_nr 1 1 - -# subflow v4-map-v6 -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 "::ffff:10.0.1.1" -chk_join_nr "single subflow v4-map-v6" 1 1 1 - -# signal address v4-map-v6 -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 "::ffff:10.0.1.1" -chk_join_nr "signal address v4-map-v6" 1 1 1 -chk_add_nr 1 1 - -# subflow v6-map-v4 -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow v6-map-v4" 1 1 1 - -# signal address v6-map-v4 -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "signal address v6-map-v4" 1 1 1 -chk_add_nr 1 1 - -# no subflow IPv6 to v4 address -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "no JOIN with diff families v4-v6" 0 0 0 - -# no subflow IPv6 to v4 address even if v6 has a valid v4 at the end -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "no JOIN with diff families v4-v6-2" 0 0 0 - -# no subflow IPv4 to v6 address, no need to slow down too then -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 dead:beef:1::1 -chk_join_nr "no JOIN with diff families v6-v4" 0 0 0 - -# single subflow, backup -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,backup -run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup -chk_join_nr "single subflow, backup" 1 1 1 -chk_prio_nr 0 1 - -# single address, backup -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup -chk_join_nr "single address, backup" 1 1 1 -chk_add_nr 1 1 -chk_prio_nr 1 0 - -# signal address with port -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "signal address with port" 1 1 1 -chk_add_nr 1 1 1 - -# subflow and signal with port -reset -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 1 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "subflow and signal with port" 2 2 2 -chk_add_nr 1 1 1 - -# single address with port, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow -chk_join_nr "remove single address with port" 1 1 1 -chk_add_nr 1 1 1 -chk_rm_nr 0 0 - -# subflow and signal with port, remove -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 -ip netns exec $ns2 ./pm_nl_ctl limits 1 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow -chk_join_nr "remove subflow and signal with port" 2 2 2 -chk_add_nr 1 1 1 -chk_rm_nr 1 1 - -# subflows and signal with port, flush -reset -ip netns exec $ns1 ./pm_nl_ctl limits 0 3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 -ip netns exec $ns2 ./pm_nl_ctl limits 1 3 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow -chk_join_nr "flush subflows and signal with port" 3 3 3 -chk_add_nr 1 1 -chk_rm_nr 2 2 - -# multiple addresses with port -reset -ip netns exec $ns1 ./pm_nl_ctl limits 2 2 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10100 -ip netns exec $ns2 ./pm_nl_ctl limits 2 2 -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple addresses with port" 2 2 2 -chk_add_nr 2 2 2 - -# multiple addresses with ports -reset -ip netns exec $ns1 ./pm_nl_ctl limits 2 2 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal port 10100 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal port 10101 -ip netns exec $ns2 ./pm_nl_ctl limits 2 2 -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple addresses with ports" 2 2 2 -chk_add_nr 2 2 2 - -# single subflow, syncookies -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "single subflow with syn cookies" 1 1 1 - -# multiple subflows with syn cookies -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "multiple subflows with syn cookies" 2 2 2 - -# multiple subflows limited by server -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "subflows limited by server w cookies" 2 2 1 - -# test signal address with cookies -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl limits 0 1 -ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "signal address with syn cookies" 1 1 1 -chk_add_nr 1 1 - -# test cookie with subflow and signal -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns1 ./pm_nl_ctl limits 0 2 -ip netns exec $ns2 ./pm_nl_ctl limits 1 2 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "subflow and signal w cookies" 2 2 2 -chk_add_nr 1 1 - -# accept and use add_addr with additional subflows -reset_with_cookies -ip netns exec $ns1 ./pm_nl_ctl limits 0 3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal -ip netns exec $ns2 ./pm_nl_ctl limits 1 3 -ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_tests $ns1 $ns2 10.0.1.1 -chk_join_nr "subflows and signal w. cookies" 3 3 3 -chk_add_nr 1 1 +if [ -z $1 ]; then + all_tests + exit $ret +fi + +while getopts 'fsltra64bpch' opt; do + case $opt in + f) + subflows_tests + ;; + s) + signal_address_tests + ;; + l) + link_failure_tests + ;; + t) + add_addr_timeout_tests + ;; + r) + remove_tests + ;; + a) + add_tests + ;; + 6) + ipv6_tests + ;; + 4) + v4mapped_tests + ;; + b) + backup_tests + ;; + p) + add_addr_ports_tests + ;; + c) + syncookies_tests + ;; + h | *) + usage + ;; + esac +done exit $ret -- GitLab From 3abc05d9ef6fe989706b679e1e6371d6360d3db4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 4 Feb 2021 15:23:30 -0800 Subject: [PATCH 1953/2012] mptcp: pm: add lockdep assertions Add a few assertions to make sure functions are called with the needed locks held. Two functions gain might_sleep annotations because they contain conditional calls to functions that sleep. Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 2 ++ net/mptcp/pm_netlink.c | 13 +++++++++++++ net/mptcp/protocol.c | 4 ++++ net/mptcp/protocol.h | 5 +++++ 4 files changed, 24 insertions(+) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 3a22e73220b99..1a25003fd8e34 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -20,6 +20,8 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, pr_debug("msk=%p, local_id=%d", msk, addr->id); + lockdep_assert_held(&msk->pm.lock); + if (add_addr) { pr_warn("addr_signal error, add_addr=%d", add_addr); return -EINVAL; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index e7b1abb4f0c26..23780a13b9346 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -145,6 +145,8 @@ select_local_address(const struct pm_nl_pernet *pernet, struct mptcp_pm_addr_entry *entry, *ret = NULL; struct sock *sk = (struct sock *)msk; + msk_owned_by_me(msk); + rcu_read_lock(); __mptcp_flush_join_list(msk); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { @@ -246,6 +248,8 @@ lookup_anno_list_by_saddr(struct mptcp_sock *msk, { struct mptcp_pm_add_entry *entry; + lockdep_assert_held(&msk->pm.lock); + list_for_each_entry(entry, &msk->pm.anno_list, list) { if (addresses_equal(&entry->addr, addr, true)) return entry; @@ -342,6 +346,8 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, struct sock *sk = (struct sock *)msk; struct net *net = sock_net(sk); + lockdep_assert_held(&msk->pm.lock); + if (lookup_anno_list_by_saddr(msk, &entry->addr)) return false; @@ -496,6 +502,9 @@ void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; + msk_owned_by_me(msk); + lockdep_assert_held(&msk->pm.lock); + if (!mptcp_pm_should_add_signal(msk)) return; @@ -566,6 +575,8 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) pr_debug("address rm_id %d", msk->pm.rm_id); + msk_owned_by_me(msk); + if (!msk->pm.rm_id) return; @@ -601,6 +612,8 @@ void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id) pr_debug("subflow rm_id %d", rm_id); + msk_owned_by_me(msk); + if (!rm_id) return; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1405e146dd7cc..b9f16a1535d2e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2187,6 +2187,8 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow, *tmp; + might_sleep(); + list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -2529,6 +2531,8 @@ static void __mptcp_destroy_sock(struct sock *sk) pr_debug("msk=%p", msk); + might_sleep(); + /* dispose the ancillatory tcp socket, if any */ if (msk->subflow) { iput(SOCK_INODE(msk->subflow)); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 1cc7948a1826f..73a923d02aad4 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -288,6 +288,11 @@ struct mptcp_sock { #define mptcp_for_each_subflow(__msk, __subflow) \ list_for_each_entry(__subflow, &((__msk)->conn_list), node) +static inline void msk_owned_by_me(const struct mptcp_sock *msk) +{ + sock_owned_by_me((const struct sock *)msk); +} + static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) { return (struct mptcp_sock *)sk; -- GitLab From 1cef42c8474f22d6a8509a19c0b578e5f60138d9 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Fri, 5 Feb 2021 16:32:44 +0800 Subject: [PATCH 1954/2012] net: hns3: add api capability bits for firmware To improve the compatibility of firmware for driver, help firmware to deal with different api commands, add api capability bits when initialize the command queue. Signed-off-by: Jian Shen Signed-off-by: Huazhong Tan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 10 ++++++++++ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 6 +++++- .../net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c | 10 ++++++++++ .../net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h | 6 +++++- 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index b728be4737f88..6546b47bef88e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -363,6 +363,15 @@ static void hclge_parse_capability(struct hclge_dev *hdev, set_bit(HNAE3_DEV_SUPPORT_FD_FORWARD_TC_B, ae_dev->caps); } +static __le32 hclge_build_api_caps(void) +{ + u32 api_caps = 0; + + hnae3_set_bit(api_caps, HCLGE_API_CAP_FLEX_RSS_TBL_B, 1); + + return cpu_to_le32(api_caps); +} + static enum hclge_cmd_status hclge_cmd_query_version_and_capability(struct hclge_dev *hdev) { @@ -373,6 +382,7 @@ hclge_cmd_query_version_and_capability(struct hclge_dev *hdev) hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_FW_VER, 1); resp = (struct hclge_query_version_cmd *)desc.data; + resp->api_caps = hclge_build_api_caps(); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index f861bdbe46c2d..9ceb059079089 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -386,11 +386,15 @@ enum HCLGE_CAP_BITS { HCLGE_CAP_UDP_TUNNEL_CSUM_B, }; +enum HCLGE_API_CAP_BITS { + HCLGE_API_CAP_FLEX_RSS_TBL_B, +}; + #define HCLGE_QUERY_CAP_LENGTH 3 struct hclge_query_version_cmd { __le32 firmware; __le32 hardware; - __le32 rsv; + __le32 api_caps; __le32 caps[HCLGE_QUERY_CAP_LENGTH]; /* capabilities of device */ }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index e04c0cfeb95c2..0f93c2dd890d4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -342,6 +342,15 @@ static void hclgevf_parse_capability(struct hclgevf_dev *hdev, set_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, ae_dev->caps); } +static __le32 hclgevf_build_api_caps(void) +{ + u32 api_caps = 0; + + hnae3_set_bit(api_caps, HCLGEVF_API_CAP_FLEX_RSS_TBL_B, 1); + + return cpu_to_le32(api_caps); +} + static int hclgevf_cmd_query_version_and_capability(struct hclgevf_dev *hdev) { struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); @@ -352,6 +361,7 @@ static int hclgevf_cmd_query_version_and_capability(struct hclgevf_dev *hdev) resp = (struct hclgevf_query_version_cmd *)desc.data; hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_QUERY_FW_VER, 1); + resp->api_caps = hclgevf_build_api_caps(); status = hclgevf_cmd_send(&hdev->hw, &desc, 1); if (status) return status; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h index 82eed258e8c1c..d591b33a56984 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h @@ -161,11 +161,15 @@ enum HCLGEVF_CAP_BITS { HCLGEVF_CAP_UDP_TUNNEL_CSUM_B, }; +enum HCLGEVF_API_CAP_BITS { + HCLGEVF_API_CAP_FLEX_RSS_TBL_B, +}; + #define HCLGEVF_QUERY_CAP_LENGTH 3 struct hclgevf_query_version_cmd { __le32 firmware; __le32 hardware; - __le32 rsv; + __le32 api_caps; __le32 caps[HCLGEVF_QUERY_CAP_LENGTH]; /* capabilities of device */ }; -- GitLab From 87ce161e8c67aca9e64a77355f748e212122ace4 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Fri, 5 Feb 2021 16:32:45 +0800 Subject: [PATCH 1955/2012] net: hns3: RSS indirection table use device specification As RSS indirection table size may be different in different hardware. Instead of using macro, this value is better to use device specification which querying from firmware. BTW, RSS indirection table should be allocated by the queried size instead the static array. .get_rss_indir_size in struct hnae3_ae_ops is not used now, so remove it as well. Signed-off-by: Guangbin Huang Signed-off-by: Huazhong Tan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 3 -- .../ethernet/hisilicon/hns3/hns3_ethtool.c | 6 +-- .../hisilicon/hns3/hns3pf/hclge_main.c | 45 +++++++++++++------ .../hisilicon/hns3/hns3pf/hclge_main.h | 4 +- .../hisilicon/hns3/hns3vf/hclgevf_main.c | 43 ++++++++++++------ .../hisilicon/hns3/hns3vf/hclgevf_main.h | 6 +-- 6 files changed, 66 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index fe09cf6c15f30..ba94b3c52ec53 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -410,8 +410,6 @@ struct hnae3_ae_dev { * Get the len of the regs dump * get_rss_key_size() * Get rss key size - * get_rss_indir_size() - * Get rss indirection table size * get_rss() * Get rss table * set_rss() @@ -555,7 +553,6 @@ struct hnae3_ae_ops { int (*get_regs_len)(struct hnae3_handle *handle); u32 (*get_rss_key_size)(struct hnae3_handle *handle); - u32 (*get_rss_indir_size)(struct hnae3_handle *handle); int (*get_rss)(struct hnae3_handle *handle, u32 *indir, u8 *key, u8 *hfunc); int (*set_rss)(struct hnae3_handle *handle, const u32 *indir, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index e2fc443fe92ca..79e0a9b14b684 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -859,11 +859,9 @@ static u32 hns3_get_rss_key_size(struct net_device *netdev) static u32 hns3_get_rss_indir_size(struct net_device *netdev) { struct hnae3_handle *h = hns3_get_handle(netdev); + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev); - if (!h->ae_algo->ops->get_rss_indir_size) - return 0; - - return h->ae_algo->ops->get_rss_indir_size(h); + return ae_dev->dev_specs.rss_ind_tbl_size; } static int hns3_get_rss(struct net_device *netdev, u32 *indir, u8 *key, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 16ccb1abe3d91..4b89f36e4d6dc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -4237,11 +4237,6 @@ static u32 hclge_get_rss_key_size(struct hnae3_handle *handle) return HCLGE_RSS_KEY_SIZE; } -static u32 hclge_get_rss_indir_size(struct hnae3_handle *handle) -{ - return HCLGE_RSS_IND_TBL_SIZE; -} - static int hclge_set_rss_algo_key(struct hclge_dev *hdev, const u8 hfunc, const u8 *key) { @@ -4283,6 +4278,7 @@ static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u16 *indir) { struct hclge_rss_indirection_table_cmd *req; struct hclge_desc desc; + int rss_cfg_tbl_num; u8 rss_msb_oft; u8 rss_msb_val; int ret; @@ -4291,8 +4287,10 @@ static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u16 *indir) u32 j; req = (struct hclge_rss_indirection_table_cmd *)desc.data; + rss_cfg_tbl_num = hdev->ae_dev->dev_specs.rss_ind_tbl_size / + HCLGE_RSS_CFG_TBL_SIZE; - for (i = 0; i < HCLGE_RSS_CFG_TBL_NUM; i++) { + for (i = 0; i < rss_cfg_tbl_num; i++) { hclge_cmd_setup_basic_desc (&desc, HCLGE_OPC_RSS_INDIR_TABLE, false); @@ -4398,6 +4396,7 @@ static int hclge_set_rss_input_tuple(struct hclge_dev *hdev) static int hclge_get_rss(struct hnae3_handle *handle, u32 *indir, u8 *key, u8 *hfunc) { + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev); struct hclge_vport *vport = hclge_get_vport(handle); int i; @@ -4422,7 +4421,7 @@ static int hclge_get_rss(struct hnae3_handle *handle, u32 *indir, /* Get indirect table */ if (indir) - for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++) + for (i = 0; i < ae_dev->dev_specs.rss_ind_tbl_size; i++) indir[i] = vport->rss_indirection_tbl[i]; return 0; @@ -4431,6 +4430,7 @@ static int hclge_get_rss(struct hnae3_handle *handle, u32 *indir, static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir, const u8 *key, const u8 hfunc) { + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev); struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; u8 hash_algo; @@ -4462,7 +4462,7 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir, } /* Update the shadow RSS table with user specified qids */ - for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++) + for (i = 0; i < ae_dev->dev_specs.rss_ind_tbl_size; i++) vport->rss_indirection_tbl[i] = indir[i]; /* Update the hardware */ @@ -4703,14 +4703,15 @@ void hclge_rss_indir_init_cfg(struct hclge_dev *hdev) int i, j; for (j = 0; j < hdev->num_vmdq_vport + 1; j++) { - for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++) + for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++) vport[j].rss_indirection_tbl[i] = i % vport[j].alloc_rss_size; } } -static void hclge_rss_init_cfg(struct hclge_dev *hdev) +static int hclge_rss_init_cfg(struct hclge_dev *hdev) { + u16 rss_ind_tbl_size = hdev->ae_dev->dev_specs.rss_ind_tbl_size; int i, rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ; struct hclge_vport *vport = hdev->vport; @@ -4718,6 +4719,8 @@ static void hclge_rss_init_cfg(struct hclge_dev *hdev) rss_algo = HCLGE_RSS_HASH_ALGO_SIMPLE; for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { + u16 *rss_ind_tbl; + vport[i].rss_tuple_sets.ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER; vport[i].rss_tuple_sets.ipv4_udp_en = @@ -4739,11 +4742,19 @@ static void hclge_rss_init_cfg(struct hclge_dev *hdev) vport[i].rss_algo = rss_algo; + rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size, + sizeof(*rss_ind_tbl), GFP_KERNEL); + if (!rss_ind_tbl) + return -ENOMEM; + + vport[i].rss_indirection_tbl = rss_ind_tbl; memcpy(vport[i].rss_hash_key, hclge_hash_key, HCLGE_RSS_KEY_SIZE); } hclge_rss_indir_init_cfg(hdev); + + return 0; } int hclge_bind_ring_with_vector(struct hclge_vport *vport, @@ -10581,7 +10592,12 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) goto err_mdiobus_unreg; } - hclge_rss_init_cfg(hdev); + ret = hclge_rss_init_cfg(hdev); + if (ret) { + dev_err(&pdev->dev, "failed to init rss cfg, ret = %d\n", ret); + goto err_mdiobus_unreg; + } + ret = hclge_rss_init_hw(hdev); if (ret) { dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret); @@ -11072,6 +11088,7 @@ static void hclge_get_tqps_and_rss_info(struct hnae3_handle *handle, static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num, bool rxfh_configured) { + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev); struct hclge_vport *vport = hclge_get_vport(handle); struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; u16 tc_offset[HCLGE_MAX_TC_NUM] = {0}; @@ -11115,11 +11132,12 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num, goto out; /* Reinitializes the rss indirect table according to the new RSS size */ - rss_indir = kcalloc(HCLGE_RSS_IND_TBL_SIZE, sizeof(u32), GFP_KERNEL); + rss_indir = kcalloc(ae_dev->dev_specs.rss_ind_tbl_size, sizeof(u32), + GFP_KERNEL); if (!rss_indir) return -ENOMEM; - for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++) + for (i = 0; i < ae_dev->dev_specs.rss_ind_tbl_size; i++) rss_indir[i] = i % kinfo->rss_size; ret = hclge_set_rss(handle, rss_indir, NULL, 0); @@ -11799,7 +11817,6 @@ static const struct hnae3_ae_ops hclge_ops = { .get_fec = hclge_get_fec, .set_fec = hclge_set_fec, .get_rss_key_size = hclge_get_rss_key_size, - .get_rss_indir_size = hclge_get_rss_indir_size, .get_rss = hclge_get_rss, .set_rss = hclge_set_rss, .set_rss_tuple = hclge_set_rss_tuple, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 32e5f82ef615b..a9c67e3891eb1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -97,8 +97,6 @@ #define HCLGE_RSS_HASH_ALGO_SIMPLE 1 #define HCLGE_RSS_HASH_ALGO_SYMMETRIC 2 #define HCLGE_RSS_HASH_ALGO_MASK GENMASK(3, 0) -#define HCLGE_RSS_CFG_TBL_NUM \ - (HCLGE_RSS_IND_TBL_SIZE / HCLGE_RSS_CFG_TBL_SIZE) #define HCLGE_RSS_INPUT_TUPLE_OTHER GENMASK(3, 0) #define HCLGE_RSS_INPUT_TUPLE_SCTP GENMASK(4, 0) @@ -922,7 +920,7 @@ struct hclge_vport { u8 rss_hash_key[HCLGE_RSS_KEY_SIZE]; /* User configured hash keys */ /* User configured lookup table entries */ - u16 rss_indirection_tbl[HCLGE_RSS_IND_TBL_SIZE]; + u16 *rss_indirection_tbl; int rss_algo; /* User configured hash algorithm */ /* User configured rss tuple sets */ struct hclge_rss_tuple_cfg rss_tuple_sets; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 674b3a22e91fe..1bc86be61fa7a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -642,22 +642,20 @@ static u32 hclgevf_get_rss_key_size(struct hnae3_handle *handle) return HCLGEVF_RSS_KEY_SIZE; } -static u32 hclgevf_get_rss_indir_size(struct hnae3_handle *handle) -{ - return HCLGEVF_RSS_IND_TBL_SIZE; -} - static int hclgevf_set_rss_indir_table(struct hclgevf_dev *hdev) { const u8 *indir = hdev->rss_cfg.rss_indirection_tbl; struct hclgevf_rss_indirection_table_cmd *req; struct hclgevf_desc desc; + int rss_cfg_tbl_num; int status; int i, j; req = (struct hclgevf_rss_indirection_table_cmd *)desc.data; + rss_cfg_tbl_num = hdev->ae_dev->dev_specs.rss_ind_tbl_size / + HCLGEVF_RSS_CFG_TBL_SIZE; - for (i = 0; i < HCLGEVF_RSS_CFG_TBL_NUM; i++) { + for (i = 0; i < rss_cfg_tbl_num; i++) { hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_INDIR_TABLE, false); req->start_table_index = i * HCLGEVF_RSS_CFG_TBL_SIZE; @@ -795,7 +793,7 @@ static int hclgevf_get_rss(struct hnae3_handle *handle, u32 *indir, u8 *key, } if (indir) - for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++) + for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++) indir[i] = rss_cfg->rss_indirection_tbl[i]; return 0; @@ -838,7 +836,7 @@ static int hclgevf_set_rss(struct hnae3_handle *handle, const u32 *indir, } /* update the shadow RSS table with user specified qids */ - for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++) + for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++) rss_cfg->rss_indirection_tbl[i] = indir[i]; /* update the hardware */ @@ -2482,8 +2480,9 @@ static int hclgevf_config_gro(struct hclgevf_dev *hdev, bool en) return ret; } -static void hclgevf_rss_init_cfg(struct hclgevf_dev *hdev) +static int hclgevf_rss_init_cfg(struct hclgevf_dev *hdev) { + u16 rss_ind_tbl_size = hdev->ae_dev->dev_specs.rss_ind_tbl_size; struct hclgevf_rss_cfg *rss_cfg = &hdev->rss_cfg; struct hclgevf_rss_tuple_cfg *tuple_sets; u32 i; @@ -2492,7 +2491,16 @@ static void hclgevf_rss_init_cfg(struct hclgevf_dev *hdev) rss_cfg->rss_size = hdev->nic.kinfo.rss_size; tuple_sets = &rss_cfg->rss_tuple_sets; if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) { + u8 *rss_ind_tbl; + rss_cfg->hash_algo = HCLGEVF_RSS_HASH_ALGO_SIMPLE; + + rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size, + sizeof(*rss_ind_tbl), GFP_KERNEL); + if (!rss_ind_tbl) + return -ENOMEM; + + rss_cfg->rss_indirection_tbl = rss_ind_tbl; memcpy(rss_cfg->rss_hash_key, hclgevf_hash_key, HCLGEVF_RSS_KEY_SIZE); @@ -2510,8 +2518,10 @@ static void hclgevf_rss_init_cfg(struct hclgevf_dev *hdev) } /* Initialize RSS indirect table */ - for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++) + for (i = 0; i < rss_ind_tbl_size; i++) rss_cfg->rss_indirection_tbl[i] = i % rss_cfg->rss_size; + + return 0; } static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) @@ -3266,7 +3276,12 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) goto err_config; /* Initialize RSS for this VF */ - hclgevf_rss_init_cfg(hdev); + ret = hclgevf_rss_init_cfg(hdev); + if (ret) { + dev_err(&pdev->dev, "failed to init rss cfg, ret = %d\n", ret); + goto err_config; + } + ret = hclgevf_rss_init_hw(hdev); if (ret) { dev_err(&hdev->pdev->dev, @@ -3444,11 +3459,12 @@ static int hclgevf_set_channels(struct hnae3_handle *handle, u32 new_tqps_num, goto out; /* Reinitializes the rss indirect table according to the new RSS size */ - rss_indir = kcalloc(HCLGEVF_RSS_IND_TBL_SIZE, sizeof(u32), GFP_KERNEL); + rss_indir = kcalloc(hdev->ae_dev->dev_specs.rss_ind_tbl_size, + sizeof(u32), GFP_KERNEL); if (!rss_indir) return -ENOMEM; - for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++) + for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++) rss_indir[i] = i % kinfo->rss_size; hdev->rss_cfg.rss_size = kinfo->rss_size; @@ -3687,7 +3703,6 @@ static const struct hnae3_ae_ops hclgevf_ops = { .get_strings = hclgevf_get_strings, .get_sset_count = hclgevf_get_sset_count, .get_rss_key_size = hclgevf_get_rss_key_size, - .get_rss_indir_size = hclgevf_get_rss_indir_size, .get_rss = hclgevf_get_rss, .set_rss = hclgevf_set_rss, .get_rss_tuple = hclgevf_get_rss_tuple, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index f6d817a3edcb3..6147bd74d4a23 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -113,8 +113,7 @@ #define HCLGEVF_RSS_HASH_ALGO_SIMPLE 1 #define HCLGEVF_RSS_HASH_ALGO_SYMMETRIC 2 #define HCLGEVF_RSS_HASH_ALGO_MASK 0xf -#define HCLGEVF_RSS_CFG_TBL_NUM \ - (HCLGEVF_RSS_IND_TBL_SIZE / HCLGEVF_RSS_CFG_TBL_SIZE) + #define HCLGEVF_RSS_INPUT_TUPLE_OTHER GENMASK(3, 0) #define HCLGEVF_RSS_INPUT_TUPLE_SCTP GENMASK(4, 0) #define HCLGEVF_D_PORT_BIT BIT(0) @@ -217,7 +216,8 @@ struct hclgevf_rss_cfg { u32 hash_algo; u32 rss_size; u8 hw_tc_map; - u8 rss_indirection_tbl[HCLGEVF_RSS_IND_TBL_SIZE]; /* shadow table */ + /* shadow table */ + u8 *rss_indirection_tbl; struct hclgevf_rss_tuple_cfg rss_tuple_sets; }; -- GitLab From 693e44157d31c5a347c55de19e59017fbf0f8b2e Mon Sep 17 00:00:00 2001 From: GuoJia Liao Date: Fri, 5 Feb 2021 16:32:46 +0800 Subject: [PATCH 1956/2012] net: hns3: optimize the code when update the tc info When update the TC info for NIC, there are some differences between PF and VF. Currently, four "vport->vport_id" are used to distinguish PF or VF. So merge them into one to improve readability and maintainability of code. Signed-off-by: GuoJia Liao Signed-off-by: Huazhong Tan Signed-off-by: Jakub Kicinski --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 -- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 ++ .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 15 ++++++++++----- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 4b89f36e4d6dc..9e0d7e1bc9582 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -55,8 +55,6 @@ #define HCLGE_LINK_STATUS_MS 10 -#define HCLGE_VF_VPORT_START_NUM 1 - static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps); static int hclge_init_vlan_config(struct hclge_dev *hdev); static void hclge_sync_vlan_filter(struct hclge_dev *hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index a9c67e3891eb1..a10a17c4c7823 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -17,6 +17,8 @@ #define HCLGE_MAX_PF_NUM 8 +#define HCLGE_VF_VPORT_START_NUM 1 + #define HCLGE_RD_FIRST_STATS_NUM 2 #define HCLGE_RD_OTHER_STATS_NUM 4 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 216ab1e927239..906d98e515aaa 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -640,13 +640,18 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) /* TC configuration is shared by PF/VF in one port, only allow * one tc for VF for simplicity. VF's vport_id is non zero. */ - kinfo->tc_info.num_tc = vport->vport_id ? 1 : + if (vport->vport_id) { + kinfo->tc_info.num_tc = 1; + vport->qs_offset = HNAE3_MAX_TC + + vport->vport_id - HCLGE_VF_VPORT_START_NUM; + vport_max_rss_size = hdev->vf_rss_size_max; + } else { + kinfo->tc_info.num_tc = min_t(u16, vport->alloc_tqps, hdev->tm_info.num_tc); - vport->qs_offset = (vport->vport_id ? HNAE3_MAX_TC : 0) + - (vport->vport_id ? (vport->vport_id - 1) : 0); + vport->qs_offset = 0; + vport_max_rss_size = hdev->pf_rss_size_max; + } - vport_max_rss_size = vport->vport_id ? hdev->vf_rss_size_max : - hdev->pf_rss_size_max; max_rss_size = min_t(u16, vport_max_rss_size, hclge_vport_get_max_rss_size(vport)); -- GitLab From e070c8b91ac1c7810c8448b1e5534d1895a0c7f4 Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Fri, 5 Feb 2021 16:32:47 +0800 Subject: [PATCH 1957/2012] net: hns3: add support for obtaining the maximum frame size Since the newer hardware may supports different frame size, so add support to obtain the capability from the firmware instead of the fixed value. Signed-off-by: Yufeng Mo Signed-off-by: Huazhong Tan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 + drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 1 + drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 3 +-- drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 5 ++--- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 3 ++- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 +++++- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h | 3 ++- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 4 ++++ drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 2 ++ 9 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index ba94b3c52ec53..ed41414762573 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -284,6 +284,7 @@ struct hnae3_dev_specs { u16 int_ql_max; /* max value of interrupt coalesce based on INT_QL */ u16 max_int_gl; /* max value of interrupt coalesce based on INT_GL */ u8 max_non_tso_bd_num; /* max BD number of one non-TSO packet */ + u16 max_frm_size; }; struct hnae3_client_ops { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index c5958754f939a..4f7922a1fa7ca 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -389,6 +389,7 @@ static void hns3_dbg_dev_specs(struct hnae3_handle *h) kinfo->tc_info.num_tc); dev_info(priv->dev, "MAX INT QL: %u\n", dev_specs->int_ql_max); dev_info(priv->dev, "MAX INT GL: %u\n", dev_specs->max_int_gl); + dev_info(priv->dev, "MAX frame size: %u\n", dev_specs->max_frm_size); } static ssize_t hns3_dbg_cmd_read(struct file *filp, char __user *buffer, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index f39f5b1c4cec5..cf16d5f31f268 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -4281,8 +4281,7 @@ static int hns3_client_init(struct hnae3_handle *handle) hns3_dbg_init(handle); - /* MTU range: (ETH_MIN_MTU(kernel default) - 9702) */ - netdev->max_mtu = HNS3_MAX_MTU; + netdev->max_mtu = HNS3_MAX_MTU(ae_dev->dev_specs.max_frm_size); if (test_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps)) set_bit(HNS3_NIC_STATE_HW_TX_CSUM_ENABLE, &priv->state); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 0a7b606e7c938..d70af1d0d5540 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -56,9 +56,8 @@ enum hns3_nic_state { #define HNS3_RING_MIN_PENDING 72 #define HNS3_RING_BD_MULTIPLE 8 /* max frame size of mac */ -#define HNS3_MAC_MAX_FRAME 9728 -#define HNS3_MAX_MTU \ - (HNS3_MAC_MAX_FRAME - (ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN)) +#define HNS3_MAX_MTU(max_frm_size) \ + ((max_frm_size) - (ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN)) #define HNS3_BD_SIZE_512_TYPE 0 #define HNS3_BD_SIZE_1024_TYPE 1 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 9ceb059079089..2ad05d6ffd7e5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -1131,7 +1131,8 @@ struct hclge_dev_specs_0_cmd { #define HCLGE_DEF_MAX_INT_GL 0x1FE0U struct hclge_dev_specs_1_cmd { - __le32 rsv0; + __le16 max_frm_size; + __le16 rsv0; __le16 max_int_gl; u8 rsv1[18]; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 9e0d7e1bc9582..1e1b9eb5551ef 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1371,6 +1371,7 @@ static void hclge_set_default_dev_specs(struct hclge_dev *hdev) ae_dev->dev_specs.rss_key_size = HCLGE_RSS_KEY_SIZE; ae_dev->dev_specs.max_tm_rate = HCLGE_ETHER_MAX_RATE; ae_dev->dev_specs.max_int_gl = HCLGE_DEF_MAX_INT_GL; + ae_dev->dev_specs.max_frm_size = HCLGE_MAC_MAX_FRAME; } static void hclge_parse_dev_specs(struct hclge_dev *hdev, @@ -1390,6 +1391,7 @@ static void hclge_parse_dev_specs(struct hclge_dev *hdev, ae_dev->dev_specs.rss_key_size = le16_to_cpu(req0->rss_key_size); ae_dev->dev_specs.max_tm_rate = le32_to_cpu(req0->max_tm_rate); ae_dev->dev_specs.max_int_gl = le16_to_cpu(req1->max_int_gl); + ae_dev->dev_specs.max_frm_size = le16_to_cpu(req1->max_frm_size); } static void hclge_check_dev_specs(struct hclge_dev *hdev) @@ -1406,6 +1408,8 @@ static void hclge_check_dev_specs(struct hclge_dev *hdev) dev_specs->max_tm_rate = HCLGE_ETHER_MAX_RATE; if (!dev_specs->max_int_gl) dev_specs->max_int_gl = HCLGE_DEF_MAX_INT_GL; + if (!dev_specs->max_frm_size) + dev_specs->max_frm_size = HCLGE_MAC_MAX_FRAME; } static int hclge_query_dev_specs(struct hclge_dev *hdev) @@ -9673,7 +9677,7 @@ int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu) /* HW supprt 2 layer vlan */ max_frm_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN; if (max_frm_size < HCLGE_MAC_MIN_FRAME || - max_frm_size > HCLGE_MAC_MAX_FRAME) + max_frm_size > hdev->ae_dev->dev_specs.max_frm_size) return -EINVAL; max_frm_size = max(max_frm_size, HCLGE_MAC_DEFAULT_FRAME); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h index d591b33a56984..ac2864a7ce8de 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h @@ -296,7 +296,8 @@ struct hclgevf_dev_specs_0_cmd { #define HCLGEVF_DEF_MAX_INT_GL 0x1FE0U struct hclgevf_dev_specs_1_cmd { - __le32 rsv0; + __le16 max_frm_size; + __le16 rsv0; __le16 max_int_gl; u8 rsv1[18]; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 1bc86be61fa7a..cdb1131ba2395 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -3058,6 +3058,7 @@ static void hclgevf_set_default_dev_specs(struct hclgevf_dev *hdev) ae_dev->dev_specs.rss_ind_tbl_size = HCLGEVF_RSS_IND_TBL_SIZE; ae_dev->dev_specs.rss_key_size = HCLGEVF_RSS_KEY_SIZE; ae_dev->dev_specs.max_int_gl = HCLGEVF_DEF_MAX_INT_GL; + ae_dev->dev_specs.max_frm_size = HCLGEVF_MAC_MAX_FRAME; } static void hclgevf_parse_dev_specs(struct hclgevf_dev *hdev, @@ -3076,6 +3077,7 @@ static void hclgevf_parse_dev_specs(struct hclgevf_dev *hdev, ae_dev->dev_specs.int_ql_max = le16_to_cpu(req0->int_ql_max); ae_dev->dev_specs.rss_key_size = le16_to_cpu(req0->rss_key_size); ae_dev->dev_specs.max_int_gl = le16_to_cpu(req1->max_int_gl); + ae_dev->dev_specs.max_frm_size = le16_to_cpu(req1->max_frm_size); } static void hclgevf_check_dev_specs(struct hclgevf_dev *hdev) @@ -3090,6 +3092,8 @@ static void hclgevf_check_dev_specs(struct hclgevf_dev *hdev) dev_specs->rss_key_size = HCLGEVF_RSS_KEY_SIZE; if (!dev_specs->max_int_gl) dev_specs->max_int_gl = HCLGEVF_DEF_MAX_INT_GL; + if (!dev_specs->max_frm_size) + dev_specs->max_frm_size = HCLGEVF_MAC_MAX_FRAME; } static int hclgevf_query_dev_specs(struct hclgevf_dev *hdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 6147bd74d4a23..8c27ecd819af1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -124,6 +124,8 @@ #define HCLGEVF_RSS_INPUT_TUPLE_SCTP_NO_PORT \ (HCLGEVF_D_IP_BIT | HCLGEVF_S_IP_BIT | HCLGEVF_V_TAG_BIT) +#define HCLGEVF_MAC_MAX_FRAME 9728 + #define HCLGEVF_STATS_TIMER_INTERVAL 36U enum hclgevf_evt_cause { -- GitLab From 2783e77b8df96aea2a5719af19b5f85e89d91982 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Fri, 5 Feb 2021 16:32:48 +0800 Subject: [PATCH 1958/2012] net: hns3: debugfs add max tm rate specification print In order to add a method to check the specification of max tm rate for debugging, function hns3_dbg_dev_specs() adds this value print. Signed-off-by: Guangbin Huang Signed-off-by: Huazhong Tan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 4f7922a1fa7ca..d88fc3c7f8729 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -390,6 +390,7 @@ static void hns3_dbg_dev_specs(struct hnae3_handle *h) dev_info(priv->dev, "MAX INT QL: %u\n", dev_specs->int_ql_max); dev_info(priv->dev, "MAX INT GL: %u\n", dev_specs->max_int_gl); dev_info(priv->dev, "MAX frame size: %u\n", dev_specs->max_frm_size); + dev_info(priv->dev, "MAX TM RATE: %uMbps\n", dev_specs->max_tm_rate); } static ssize_t hns3_dbg_cmd_read(struct file *filp, char __user *buffer, -- GitLab From 3f094bd11a3720d2a00e9b6dfc53f1ab25884a49 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Fri, 5 Feb 2021 16:32:49 +0800 Subject: [PATCH 1959/2012] net: hns3: replace macro of max qset number with specification The max qset number is a fixed value now and it is defined by a macro. In order to support other value in different kinds of device, it is better to use specification queried from firmware to replace macro. Signed-off-by: Guangbin Huang Signed-off-by: Huazhong Tan Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 + drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 1 + drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 2 +- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 8 +++----- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++++ drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 ++ 6 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index ed41414762573..e20a1b3267b9a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -285,6 +285,7 @@ struct hnae3_dev_specs { u16 max_int_gl; /* max value of interrupt coalesce based on INT_GL */ u8 max_non_tso_bd_num; /* max BD number of one non-TSO packet */ u16 max_frm_size; + u16 max_qset_num; }; struct hnae3_client_ops { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index d88fc3c7f8729..36c7813b59968 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -391,6 +391,7 @@ static void hns3_dbg_dev_specs(struct hnae3_handle *h) dev_info(priv->dev, "MAX INT GL: %u\n", dev_specs->max_int_gl); dev_info(priv->dev, "MAX frame size: %u\n", dev_specs->max_frm_size); dev_info(priv->dev, "MAX TM RATE: %uMbps\n", dev_specs->max_tm_rate); + dev_info(priv->dev, "MAX QSET number: %u\n", dev_specs->max_qset_num); } static ssize_t hns3_dbg_cmd_read(struct file *filp, char __user *buffer, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 2ad05d6ffd7e5..e7c915eabc8ad 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -1132,7 +1132,7 @@ struct hclge_dev_specs_0_cmd { struct hclge_dev_specs_1_cmd { __le16 max_frm_size; - __le16 rsv0; + __le16 max_qset_num; __le16 max_int_gl; u8 rsv1[18]; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 8f3fefe507196..113efd4ae157b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -1599,8 +1599,6 @@ static void hclge_dbg_dump_qs_shaper_all(struct hclge_dev *hdev) static void hclge_dbg_dump_qs_shaper(struct hclge_dev *hdev, const char *cmd_buf) { -#define HCLGE_MAX_QSET_NUM 1024 - u16 qsid; int ret; @@ -1610,9 +1608,9 @@ static void hclge_dbg_dump_qs_shaper(struct hclge_dev *hdev, return; } - if (qsid >= HCLGE_MAX_QSET_NUM) { - dev_err(&hdev->pdev->dev, "qsid(%u) out of range[0-1023]\n", - qsid); + if (qsid >= hdev->ae_dev->dev_specs.max_qset_num) { + dev_err(&hdev->pdev->dev, "qsid(%u) out of range[0-%u]\n", + qsid, hdev->ae_dev->dev_specs.max_qset_num - 1); return; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 1e1b9eb5551ef..f5a988498cc33 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1372,6 +1372,7 @@ static void hclge_set_default_dev_specs(struct hclge_dev *hdev) ae_dev->dev_specs.max_tm_rate = HCLGE_ETHER_MAX_RATE; ae_dev->dev_specs.max_int_gl = HCLGE_DEF_MAX_INT_GL; ae_dev->dev_specs.max_frm_size = HCLGE_MAC_MAX_FRAME; + ae_dev->dev_specs.max_qset_num = HCLGE_MAX_QSET_NUM; } static void hclge_parse_dev_specs(struct hclge_dev *hdev, @@ -1390,6 +1391,7 @@ static void hclge_parse_dev_specs(struct hclge_dev *hdev, ae_dev->dev_specs.int_ql_max = le16_to_cpu(req0->int_ql_max); ae_dev->dev_specs.rss_key_size = le16_to_cpu(req0->rss_key_size); ae_dev->dev_specs.max_tm_rate = le32_to_cpu(req0->max_tm_rate); + ae_dev->dev_specs.max_qset_num = le16_to_cpu(req1->max_qset_num); ae_dev->dev_specs.max_int_gl = le16_to_cpu(req1->max_int_gl); ae_dev->dev_specs.max_frm_size = le16_to_cpu(req1->max_frm_size); } @@ -1406,6 +1408,8 @@ static void hclge_check_dev_specs(struct hclge_dev *hdev) dev_specs->rss_key_size = HCLGE_RSS_KEY_SIZE; if (!dev_specs->max_tm_rate) dev_specs->max_tm_rate = HCLGE_ETHER_MAX_RATE; + if (!dev_specs->max_qset_num) + dev_specs->max_qset_num = HCLGE_MAX_QSET_NUM; if (!dev_specs->max_int_gl) dev_specs->max_int_gl = HCLGE_DEF_MAX_INT_GL; if (!dev_specs->max_frm_size) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index a10a17c4c7823..33b17a199e18d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -148,6 +148,8 @@ /* Factor used to calculate offset and bitmap of VF num */ #define HCLGE_VF_NUM_PER_CMD 64 +#define HCLGE_MAX_QSET_NUM 1024 + enum HLCGE_PORT_TYPE { HOST_PORT, NETWORK_PORT -- GitLab From 9d5ef190e5615a7b63af89f88c4106a5bc127974 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 5 Feb 2021 15:37:10 +0200 Subject: [PATCH 1960/2012] net: dsa: automatically bring up DSA master when opening user port DSA wants the master interface to be open before the user port is due to historical reasons. The promiscuity of interfaces that are down used to have issues, as referenced Lennert Buytenhek in commit df02c6ff2e39 ("dsa: fix master interface allmulti/promisc handling"). The bugfix mentioned there, commit b6c40d68ff64 ("net: only invoke dev->change_rx_flags when device is UP"), was basically a "don't do that" approach to working around the promiscuity while down issue. Further work done by Vlad Yasevich in commit d2615bf45069 ("net: core: Always propagate flag changes to interfaces") has resolved the underlying issue, and it is strictly up to the DSA and 8021q drivers now, it is no longer mandated by the networking core that the master interface must be up when changing its promiscuity. From DSA's point of view, deciding to error out in dsa_slave_open because the master isn't up is (a) a bad user experience and (b) knocking at an open door. Even if there still was an issue with promiscuity while down, DSA could still just open the master and avoid it. Doing it this way has the additional benefit that user space can now remove DSA-specific workarounds, like systemd-networkd with BindCarrier: https://github.com/systemd/systemd/issues/7478 And we can finally remove one of the 2 bullets in the "Common pitfalls using DSA setups" chapter. Tested with two cascaded DSA switches: $ ip link set sw0p2 up fsl_enetc 0000:00:00.2 eno2: configuring for fixed/internal link mode fsl_enetc 0000:00:00.2 eno2: Link is Up - 1Gbps/Full - flow control rx/tx mscc_felix 0000:00:00.5 swp0: configuring for fixed/sgmii link mode mscc_felix 0000:00:00.5 swp0: Link is Up - 1Gbps/Full - flow control off 8021q: adding VLAN 0 to HW filter on device swp0 sja1105 spi2.0 sw0p2: configuring for phy/rgmii-id link mode IPv6: ADDRCONF(NETDEV_CHANGE): eno2: link becomes ready IPv6: ADDRCONF(NETDEV_CHANGE): swp0: link becomes ready Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- Documentation/networking/dsa/dsa.rst | 4 ---- net/dsa/slave.c | 7 +++++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst index a8d15dd2b42b7..e9517af5fe02e 100644 --- a/Documentation/networking/dsa/dsa.rst +++ b/Documentation/networking/dsa/dsa.rst @@ -273,10 +273,6 @@ will not make us go through the switch tagging protocol transmit function, so the Ethernet switch on the other end, expecting a tag will typically drop this frame. -Slave network devices check that the master network device is UP before allowing -you to administratively bring UP these slave network devices. A common -configuration mistake is forgetting to bring UP the master network device first. - Interactions with other subsystems ================================== diff --git a/net/dsa/slave.c b/net/dsa/slave.c index b0571ab4e5a75..c95e3bdbe690d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -68,8 +68,11 @@ static int dsa_slave_open(struct net_device *dev) struct dsa_port *dp = dsa_slave_to_port(dev); int err; - if (!(master->flags & IFF_UP)) - return -ENETDOWN; + err = dev_open(master, NULL); + if (err < 0) { + netdev_err(dev, "failed to open master %s\n", master->name); + goto out; + } if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) { err = dev_uc_add(master, dev->dev_addr); -- GitLab From c0a8a9c274936543e436aef691499304ce3127dc Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 5 Feb 2021 15:37:11 +0200 Subject: [PATCH 1961/2012] net: dsa: automatically bring user ports down when master goes down This is not fixing any actual bug that I know of, but having a DSA interface that is up even when its lower (master) interface is down is one of those things that just do not sound right. Yes, DSA checks if the master is up before actually bringing the user interface up, but nobody prevents bringing the master interface down immediately afterwards... Then the user ports would attempt dev_queue_xmit on an interface that is down, and wonder what's wrong. This patch prevents that from happening. NETDEV_GOING_DOWN is the notification emitted _before_ the master actually goes down, and we are protected by the rtnl_mutex, so all is well. For those of you reading this because you were doing switch testing such as latency measurements for autonomously forwarded traffic, and you needed a controlled environment with no extra packets sent by the network stack, this patch breaks that, because now the user ports go down too, which may shut down the PHY etc. But please don't do it like that, just do instead: tc qdisc add dev eno2 clsact tc filter add dev eno2 egress flower action drop Tested with two cascaded DSA switches: $ ip link set eno2 down sja1105 spi2.0 sw0p2: Link is Down mscc_felix 0000:00:00.5 swp0: Link is Down fsl_enetc 0000:00:00.2 eno2: Link is Down Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- net/dsa/slave.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index c95e3bdbe690d..f77e9eeb1a620 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -2081,6 +2081,30 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, err = dsa_port_lag_change(dp, info->lower_state_info); return notifier_from_errno(err); } + case NETDEV_GOING_DOWN: { + struct dsa_port *dp, *cpu_dp; + struct dsa_switch_tree *dst; + LIST_HEAD(close_list); + + if (!netdev_uses_dsa(dev)) + return NOTIFY_DONE; + + cpu_dp = dev->dsa_ptr; + dst = cpu_dp->ds->dst; + + list_for_each_entry(dp, &dst->ports, list) { + if (!dsa_is_user_port(dp->ds, dp->index)) + continue; + + list_add(&dp->slave->close_list, &close_list); + } + + dev_close_many(&close_list, true); + + return NOTIFY_OK; + } + default: + break; } return NOTIFY_DONE; -- GitLab From ea92000d5430304b22f46d61508ea95b5342373c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 5 Feb 2021 15:37:12 +0200 Subject: [PATCH 1962/2012] Revert "net: Have netpoll bring-up DSA management interface" This reverts commit 1532b9778478577152201adbafa7738b1e844868. The above commit is good and it works, however it was meant as a bugfix for stable kernels and now we have more self-contained ways in DSA to handle the situation where the DSA master must be brought up. Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 960948290001e..c310c7c1cef7f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -658,15 +657,15 @@ EXPORT_SYMBOL_GPL(__netpoll_setup); int netpoll_setup(struct netpoll *np) { - struct net_device *ndev = NULL, *dev = NULL; - struct net *net = current->nsproxy->net_ns; + struct net_device *ndev = NULL; struct in_device *in_dev; int err; rtnl_lock(); - if (np->dev_name[0]) + if (np->dev_name[0]) { + struct net *net = current->nsproxy->net_ns; ndev = __dev_get_by_name(net, np->dev_name); - + } if (!ndev) { np_err(np, "%s doesn't exist, aborting\n", np->dev_name); err = -ENODEV; @@ -674,19 +673,6 @@ int netpoll_setup(struct netpoll *np) } dev_hold(ndev); - /* bring up DSA management network devices up first */ - for_each_netdev(net, dev) { - if (!netdev_uses_dsa(dev)) - continue; - - err = dev_change_flags(dev, dev->flags | IFF_UP, NULL); - if (err < 0) { - np_err(np, "%s failed to open %s\n", - np->dev_name, dev->name); - goto put; - } - } - if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", np->dev_name); err = -EBUSY; -- GitLab From 46acf7bdbc72f10bb2e86d69c14189c5d45894f4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 5 Feb 2021 15:37:13 +0200 Subject: [PATCH 1963/2012] Revert "net: ipv4: handle DSA enabled master network devices" This reverts commit 728c02089a0e3eefb02e9927bfae50490f40e72e. Since 2015 DSA has gained more integration with the network stack, we can now have the same functionality without explicitly open-coding for it: - It now opens the DSA master netdevice automatically whenever a user netdevice is opened. - The master and switch interfaces are coupled in an upper/lower hierarchy using the netdev adjacency lists. In the nfsroot example below, the interface chosen by autoconfig was swp3, and every interface except that and the DSA master, eth1, was brought down afterwards: [ 8.714215] mscc_felix 0000:00:00.5 swp0 (uninitialized): PHY [0000:00:00.3:10] driver [Microsemi GE VSC8514 SyncE] (irq=POLL) [ 8.978041] mscc_felix 0000:00:00.5 swp1 (uninitialized): PHY [0000:00:00.3:11] driver [Microsemi GE VSC8514 SyncE] (irq=POLL) [ 9.246134] mscc_felix 0000:00:00.5 swp2 (uninitialized): PHY [0000:00:00.3:12] driver [Microsemi GE VSC8514 SyncE] (irq=POLL) [ 9.486203] mscc_felix 0000:00:00.5 swp3 (uninitialized): PHY [0000:00:00.3:13] driver [Microsemi GE VSC8514 SyncE] (irq=POLL) [ 9.512827] mscc_felix 0000:00:00.5: configuring for fixed/internal link mode [ 9.521047] mscc_felix 0000:00:00.5: Link is Up - 2.5Gbps/Full - flow control off [ 9.530382] device eth1 entered promiscuous mode [ 9.535452] DSA: tree 0 setup [ 9.539777] printk: console [netcon0] enabled [ 9.544504] netconsole: network logging started [ 9.555047] fsl_enetc 0000:00:00.2 eth1: configuring for fixed/internal link mode [ 9.562790] fsl_enetc 0000:00:00.2 eth1: Link is Up - 1Gbps/Full - flow control off [ 9.564661] 8021q: adding VLAN 0 to HW filter on device bond0 [ 9.637681] fsl_enetc 0000:00:00.0 eth0: PHY [0000:00:00.0:02] driver [Qualcomm Atheros AR8031/AR8033] (irq=POLL) [ 9.655679] fsl_enetc 0000:00:00.0 eth0: configuring for inband/sgmii link mode [ 9.666611] mscc_felix 0000:00:00.5 swp0: configuring for inband/qsgmii link mode [ 9.676216] 8021q: adding VLAN 0 to HW filter on device swp0 [ 9.682086] mscc_felix 0000:00:00.5 swp1: configuring for inband/qsgmii link mode [ 9.690700] 8021q: adding VLAN 0 to HW filter on device swp1 [ 9.696538] mscc_felix 0000:00:00.5 swp2: configuring for inband/qsgmii link mode [ 9.705131] 8021q: adding VLAN 0 to HW filter on device swp2 [ 9.710964] mscc_felix 0000:00:00.5 swp3: configuring for inband/qsgmii link mode [ 9.719548] 8021q: adding VLAN 0 to HW filter on device swp3 [ 9.747811] Sending DHCP requests .. [ 12.742899] mscc_felix 0000:00:00.5 swp1: Link is Up - 1Gbps/Full - flow control rx/tx [ 12.743828] mscc_felix 0000:00:00.5 swp0: Link is Up - 1Gbps/Full - flow control off [ 12.747062] IPv6: ADDRCONF(NETDEV_CHANGE): swp1: link becomes ready [ 12.755216] fsl_enetc 0000:00:00.0 eth0: Link is Up - 1Gbps/Full - flow control rx/tx [ 12.766603] IPv6: ADDRCONF(NETDEV_CHANGE): swp0: link becomes ready [ 12.783188] mscc_felix 0000:00:00.5 swp2: Link is Up - 1Gbps/Full - flow control rx/tx [ 12.785354] IPv6: ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready [ 12.799535] IPv6: ADDRCONF(NETDEV_CHANGE): swp2: link becomes ready [ 13.803141] mscc_felix 0000:00:00.5 swp3: Link is Up - 1Gbps/Full - flow control rx/tx [ 13.811646] IPv6: ADDRCONF(NETDEV_CHANGE): swp3: link becomes ready [ 15.452018] ., OK [ 15.470336] IP-Config: Got DHCP answer from 10.0.0.1, my address is 10.0.0.39 [ 15.477887] IP-Config: Complete: [ 15.481330] device=swp3, hwaddr=00:04:9f:05:de:0a, ipaddr=10.0.0.39, mask=255.255.255.0, gw=10.0.0.1 [ 15.491846] host=10.0.0.39, domain=(none), nis-domain=(none) [ 15.498429] bootserver=10.0.0.1, rootserver=10.0.0.1, rootpath= [ 15.498481] nameserver0=8.8.8.8 [ 15.627542] fsl_enetc 0000:00:00.0 eth0: Link is Down [ 15.690903] mscc_felix 0000:00:00.5 swp0: Link is Down [ 15.745216] mscc_felix 0000:00:00.5 swp1: Link is Down [ 15.800498] mscc_felix 0000:00:00.5 swp2: Link is Down Signed-off-by: Vladimir Oltean Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Jakub Kicinski --- net/ipv4/ipconfig.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 3cd13e1bc6a70..f9ab1fb219ec7 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -61,7 +61,6 @@ #include #include #include -#include #include #include #include @@ -218,9 +217,9 @@ static int __init ic_open_devs(void) last = &ic_first_dev; rtnl_lock(); - /* bring loopback and DSA master network devices up first */ + /* bring loopback device up first */ for_each_netdev(&init_net, dev) { - if (!(dev->flags & IFF_LOOPBACK) && !netdev_uses_dsa(dev)) + if (!(dev->flags & IFF_LOOPBACK)) continue; if (dev_change_flags(dev, dev->flags | IFF_UP, NULL) < 0) pr_err("IP-Config: Failed to open %s\n", dev->name); @@ -305,6 +304,9 @@ have_carrier: return 0; } +/* Close all network interfaces except the one we've autoconfigured, and its + * lowers, in case it's a stacked virtual interface. + */ static void __init ic_close_devs(void) { struct ic_device *d, *next; @@ -313,9 +315,20 @@ static void __init ic_close_devs(void) rtnl_lock(); next = ic_first_dev; while ((d = next)) { + bool bring_down = (d != ic_dev); + struct net_device *lower_dev; + struct list_head *iter; + next = d->next; dev = d->dev; - if (d != ic_dev && !netdev_uses_dsa(dev)) { + + netdev_for_each_lower_dev(ic_dev->dev, lower_dev, iter) { + if (dev == lower_dev) { + bring_down = false; + break; + } + } + if (bring_down) { pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags, NULL); } -- GitLab From 662981bbda291b8921100e42efa8a46c547dc08c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 6 Feb 2021 00:02:10 +0200 Subject: [PATCH 1964/2012] net: mscc: ocelot: rename ocelot_netdevice_port_event to ocelot_netdevice_changeupper ocelot_netdevice_port_event treats a single event, NETDEV_CHANGEUPPER. So we can remove the check for the type of event, and rename the function to be more suggestive, since there already is a function with a very similar name of ocelot_netdevice_event. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_net.c | 59 ++++++++++++-------------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index e6b33d9df184b..c8106124f134e 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -1110,9 +1110,8 @@ static int ocelot_port_obj_del(struct net_device *dev, return ret; } -static int ocelot_netdevice_port_event(struct net_device *dev, - unsigned long event, - struct netdev_notifier_changeupper_info *info) +static int ocelot_netdevice_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) { struct ocelot_port_private *priv = netdev_priv(dev); struct ocelot_port *ocelot_port = &priv->port; @@ -1120,28 +1119,22 @@ static int ocelot_netdevice_port_event(struct net_device *dev, int port = priv->chip_port; int err = 0; - switch (event) { - case NETDEV_CHANGEUPPER: - if (netif_is_bridge_master(info->upper_dev)) { - if (info->linking) { - err = ocelot_port_bridge_join(ocelot, port, - info->upper_dev); - } else { - err = ocelot_port_bridge_leave(ocelot, port, - info->upper_dev); - } - } - if (netif_is_lag_master(info->upper_dev)) { - if (info->linking) - err = ocelot_port_lag_join(ocelot, port, - info->upper_dev); - else - ocelot_port_lag_leave(ocelot, port, + if (netif_is_bridge_master(info->upper_dev)) { + if (info->linking) { + err = ocelot_port_bridge_join(ocelot, port, info->upper_dev); + } else { + err = ocelot_port_bridge_leave(ocelot, port, + info->upper_dev); } - break; - default: - break; + } + if (netif_is_lag_master(info->upper_dev)) { + if (info->linking) + err = ocelot_port_lag_join(ocelot, port, + info->upper_dev); + else + ocelot_port_lag_leave(ocelot, port, + info->upper_dev); } return err; @@ -1170,17 +1163,19 @@ static int ocelot_netdevice_event(struct notifier_block *unused, } } - if (netif_is_lag_master(dev)) { - struct net_device *slave; - struct list_head *iter; + if (event == NETDEV_CHANGEUPPER) { + if (netif_is_lag_master(dev)) { + struct net_device *slave; + struct list_head *iter; - netdev_for_each_lower_dev(dev, slave, iter) { - ret = ocelot_netdevice_port_event(slave, event, info); - if (ret) - goto notify; + netdev_for_each_lower_dev(dev, slave, iter) { + ret = ocelot_netdevice_changeupper(slave, info); + if (ret) + goto notify; + } + } else { + ret = ocelot_netdevice_changeupper(dev, info); } - } else { - ret = ocelot_netdevice_port_event(dev, event, info); } notify: -- GitLab From 41e66fa28fefc055ad2bf7acd1fbcfa94490ac97 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 6 Feb 2021 00:02:11 +0200 Subject: [PATCH 1965/2012] net: mscc: ocelot: use a switch-case statement in ocelot_netdevice_event Make ocelot's net device event handler more streamlined by structuring it in a similar way with others. The inspiration here was dsa_slave_netdevice_event. Signed-off-by: Vladimir Oltean Reviewed-by: Alexandre Belloni Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot_net.c | 68 +++++++++++++++++--------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index c8106124f134e..ec68cf644522b 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -1137,49 +1137,71 @@ static int ocelot_netdevice_changeupper(struct net_device *dev, info->upper_dev); } - return err; + return notifier_from_errno(err); +} + +static int +ocelot_netdevice_lag_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *lower; + struct list_head *iter; + int err = NOTIFY_DONE; + + netdev_for_each_lower_dev(dev, lower, iter) { + err = ocelot_netdevice_changeupper(lower, info); + if (err) + return notifier_from_errno(err); + } + + return NOTIFY_DONE; } static int ocelot_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct netdev_notifier_changeupper_info *info = ptr; struct net_device *dev = netdev_notifier_info_to_dev(ptr); - int ret = 0; - if (event == NETDEV_PRECHANGEUPPER && - ocelot_netdevice_dev_check(dev) && - netif_is_lag_master(info->upper_dev)) { - struct netdev_lag_upper_info *lag_upper_info = info->upper_info; + switch (event) { + case NETDEV_PRECHANGEUPPER: { + struct netdev_notifier_changeupper_info *info = ptr; + struct netdev_lag_upper_info *lag_upper_info; struct netlink_ext_ack *extack; + if (!ocelot_netdevice_dev_check(dev)) + break; + + if (!netif_is_lag_master(info->upper_dev)) + break; + + lag_upper_info = info->upper_info; + if (lag_upper_info && lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { extack = netdev_notifier_info_to_extack(&info->info); NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type"); - ret = -EINVAL; - goto notify; + return notifier_from_errno(-EINVAL); } + + break; } + case NETDEV_CHANGEUPPER: { + struct netdev_notifier_changeupper_info *info = ptr; - if (event == NETDEV_CHANGEUPPER) { - if (netif_is_lag_master(dev)) { - struct net_device *slave; - struct list_head *iter; + if (ocelot_netdevice_dev_check(dev)) + return ocelot_netdevice_changeupper(dev, info); - netdev_for_each_lower_dev(dev, slave, iter) { - ret = ocelot_netdevice_changeupper(slave, info); - if (ret) - goto notify; - } - } else { - ret = ocelot_netdevice_changeupper(dev, info); - } + if (netif_is_lag_master(dev)) + return ocelot_netdevice_lag_changeupper(dev, info); + + break; + } + default: + break; } -notify: - return notifier_from_errno(ret); + return NOTIFY_DONE; } struct notifier_block ocelot_netdevice_nb __read_mostly = { -- GitLab From 583cbbe3eed95cadd347b5dfe28e5d02f92ed109 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 6 Feb 2021 00:02:12 +0200 Subject: [PATCH 1966/2012] net: mscc: ocelot: don't refuse bonding interfaces we can't offload Since switchdev/DSA exposes network interfaces that fulfill many of the same user space expectations that dedicated NICs do, it makes sense to not deny bonding interfaces with a bonding policy that we cannot offload, but instead allow the bonding driver to select the egress interface in software. Signed-off-by: Vladimir Oltean Reviewed-by: Alexandre Belloni Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 6 ++++- drivers/net/ethernet/mscc/ocelot.h | 3 ++- drivers/net/ethernet/mscc/ocelot_net.c | 36 +++++++------------------- 3 files changed, 17 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 5f21799ad85bc..33274d4fc5aff 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1316,12 +1316,16 @@ static void ocelot_setup_lag(struct ocelot *ocelot, int lag) } int ocelot_port_lag_join(struct ocelot *ocelot, int port, - struct net_device *bond) + struct net_device *bond, + struct netdev_lag_upper_info *info) { struct net_device *ndev; u32 bond_mask = 0; int lag, lp; + if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + return -EOPNOTSUPP; + rcu_read_lock(); for_each_netdev_in_bond_rcu(bond, ndev) { struct ocelot_port_private *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index 76b8d8ce3b48b..12dc744530761 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -110,7 +110,8 @@ int ocelot_mact_learn(struct ocelot *ocelot, int port, int ocelot_mact_forget(struct ocelot *ocelot, const unsigned char mac[ETH_ALEN], unsigned int vid); int ocelot_port_lag_join(struct ocelot *ocelot, int port, - struct net_device *bond); + struct net_device *bond, + struct netdev_lag_upper_info *info); void ocelot_port_lag_leave(struct ocelot *ocelot, int port, struct net_device *bond); struct net_device *ocelot_port_to_netdev(struct ocelot *ocelot, int port); diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index ec68cf644522b..0a4de949f4d99 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -1129,12 +1129,19 @@ static int ocelot_netdevice_changeupper(struct net_device *dev, } } if (netif_is_lag_master(info->upper_dev)) { - if (info->linking) + if (info->linking) { err = ocelot_port_lag_join(ocelot, port, - info->upper_dev); - else + info->upper_dev, + info->upper_info); + if (err == -EOPNOTSUPP) { + NL_SET_ERR_MSG_MOD(info->info.extack, + "Offloading not supported"); + err = 0; + } + } else { ocelot_port_lag_leave(ocelot, port, info->upper_dev); + } } return notifier_from_errno(err); @@ -1163,29 +1170,6 @@ static int ocelot_netdevice_event(struct notifier_block *unused, struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { - case NETDEV_PRECHANGEUPPER: { - struct netdev_notifier_changeupper_info *info = ptr; - struct netdev_lag_upper_info *lag_upper_info; - struct netlink_ext_ack *extack; - - if (!ocelot_netdevice_dev_check(dev)) - break; - - if (!netif_is_lag_master(info->upper_dev)) - break; - - lag_upper_info = info->upper_info; - - if (lag_upper_info && - lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { - extack = netdev_notifier_info_to_extack(&info->info); - NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type"); - - return notifier_from_errno(-EINVAL); - } - - break; - } case NETDEV_CHANGEUPPER: { struct netdev_notifier_changeupper_info *info = ptr; -- GitLab From f79c20c81723221b9c12d41d8d50c3fa93fbe791 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 6 Feb 2021 00:02:13 +0200 Subject: [PATCH 1967/2012] net: mscc: ocelot: use ipv6 in the aggregation code IPv6 header information is not currently part of the entropy source for the 4-bit aggregation code used for LAG offload, even though it could be. The hardware reference manual says about these fields: ANA::AGGR_CFG.AC_IP6_TCPUDP_PORT_ENA Use IPv6 TCP/UDP port when calculating aggregation code. Configure identically for all ports. Recommended value is 1. ANA::AGGR_CFG.AC_IP6_FLOW_LBL_ENA Use IPv6 flow label when calculating AC. Configure identically for all ports. Recommended value is 1. Integration with the xmit_hash_policy of the bonding interface is TBD. Signed-off-by: Vladimir Oltean Reviewed-by: Alexandre Belloni Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 33274d4fc5aff..ef3f10f1e54f5 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1615,7 +1615,10 @@ int ocelot_init(struct ocelot *ocelot) ocelot_write(ocelot, ANA_AGGR_CFG_AC_SMAC_ENA | ANA_AGGR_CFG_AC_DMAC_ENA | ANA_AGGR_CFG_AC_IP4_SIPDIP_ENA | - ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA, ANA_AGGR_CFG); + ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA | + ANA_AGGR_CFG_AC_IP6_FLOW_LBL_ENA | + ANA_AGGR_CFG_AC_IP6_TCPUDP_ENA, + ANA_AGGR_CFG); /* Set MAC age time to default value. The entry is aged after * 2*AGE_PERIOD -- GitLab From b80af659699d212cf8cec6593f6551905c4ae86f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 6 Feb 2021 00:02:14 +0200 Subject: [PATCH 1968/2012] net: mscc: ocelot: set up the bonding mask in a way that avoids a net_device Since this code should be called from pure switchdev as well as from DSA, we must find a way to determine the bonding mask not by looking directly at the net_device lowers of the bonding interface, since those could have different private structures. We keep a pointer to the bonding upper interface, if present, in struct ocelot_port. Then the bonding mask becomes the bitwise OR of all ports that have the same bonding upper interface. This adds a duplication of functionality with the current "lags" array, but the duplication will be short-lived, since further patches will remove the latter completely. Signed-off-by: Vladimir Oltean Reviewed-by: Alexandre Belloni Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 29 ++++++++++++++++++++++------- include/soc/mscc/ocelot.h | 2 ++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index ef3f10f1e54f5..127beedcccdeb 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -889,6 +889,24 @@ int ocelot_get_ts_info(struct ocelot *ocelot, int port, } EXPORT_SYMBOL(ocelot_get_ts_info); +static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond) +{ + u32 mask = 0; + int port; + + for (port = 0; port < ocelot->num_phys_ports; port++) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; + + if (!ocelot_port) + continue; + + if (ocelot_port->bond == bond) + mask |= BIT(port); + } + + return mask; +} + static u32 ocelot_get_dsa_8021q_cpu_mask(struct ocelot *ocelot) { u32 mask = 0; @@ -1319,20 +1337,15 @@ int ocelot_port_lag_join(struct ocelot *ocelot, int port, struct net_device *bond, struct netdev_lag_upper_info *info) { - struct net_device *ndev; u32 bond_mask = 0; int lag, lp; if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) return -EOPNOTSUPP; - rcu_read_lock(); - for_each_netdev_in_bond_rcu(bond, ndev) { - struct ocelot_port_private *priv = netdev_priv(ndev); + ocelot->ports[port]->bond = bond; - bond_mask |= BIT(priv->chip_port); - } - rcu_read_unlock(); + bond_mask = ocelot_get_bond_mask(ocelot, bond); lp = __ffs(bond_mask); @@ -1366,6 +1379,8 @@ void ocelot_port_lag_leave(struct ocelot *ocelot, int port, u32 port_cfg; int i; + ocelot->ports[port]->bond = NULL; + /* Remove port from any lag */ for (i = 0; i < ocelot->num_phys_ports; i++) ocelot->lags[i] &= ~BIT(port); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 6a61c499a30d1..e36a1ed29c019 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -611,6 +611,8 @@ struct ocelot_port { u8 *xmit_template; bool is_dsa_8021q_cpu; + + struct net_device *bond; }; struct ocelot { -- GitLab From 2e9f4afadc702d18f19e8c1183b10307e871a1d7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 6 Feb 2021 00:02:15 +0200 Subject: [PATCH 1969/2012] net: mscc: ocelot: avoid unneeded "lp" variable in LAG join The index of the LAG is equal to the logical port ID that all the physical port members have, which is further equal to the index of the first physical port that is a member of the LAG. The code gets a bit carried away with logic like this: if (a == b) c = a; else c = b; which can be simplified, of course, into: c = b; (with a being port, b being lp, c being lag) This further makes the "lp" variable redundant, since we can use "lag" everywhere where "lp" (logical port) was used. So instead of a "c = b" assignment, we can do a complete deletion of b. Only one comment here: if (bond_mask) { lp = __ffs(bond_mask); ocelot->lags[lp] = 0; } lp was clobbered before, because it was used as a temporary variable to hold the new smallest port ID from the bond. Now that we don't have "lp" any longer, we'll just avoid the temporary variable and zeroize the bonding mask directly. Signed-off-by: Vladimir Oltean Reviewed-by: Alexandre Belloni Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 127beedcccdeb..7f6fb872f588c 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1338,7 +1338,7 @@ int ocelot_port_lag_join(struct ocelot *ocelot, int port, struct netdev_lag_upper_info *info) { u32 bond_mask = 0; - int lag, lp; + int lag; if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) return -EOPNOTSUPP; @@ -1347,22 +1347,18 @@ int ocelot_port_lag_join(struct ocelot *ocelot, int port, bond_mask = ocelot_get_bond_mask(ocelot, bond); - lp = __ffs(bond_mask); + lag = __ffs(bond_mask); /* If the new port is the lowest one, use it as the logical port from * now on */ - if (port == lp) { - lag = port; + if (port == lag) { ocelot->lags[port] = bond_mask; bond_mask &= ~BIT(port); - if (bond_mask) { - lp = __ffs(bond_mask); - ocelot->lags[lp] = 0; - } + if (bond_mask) + ocelot->lags[__ffs(bond_mask)] = 0; } else { - lag = lp; - ocelot->lags[lp] |= BIT(port); + ocelot->lags[lag] |= BIT(port); } ocelot_setup_lag(ocelot, lag); -- GitLab From 2527f2e88fbad9873dfebbea6e8e3540128e2661 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 6 Feb 2021 00:02:16 +0200 Subject: [PATCH 1970/2012] net: mscc: ocelot: set up logical port IDs centrally The setup of logical port IDs is done in two places: from the inconclusively named ocelot_setup_lag and from ocelot_port_lag_leave, a function that also calls ocelot_setup_lag (which apparently does an incomplete setup of the LAG). To improve this situation, we can rename ocelot_setup_lag into ocelot_setup_logical_port_ids, and drop the "lag" argument. It will now set up the logical port IDs of all switch ports, which may be just slightly more inefficient but more maintainable. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 47 ++++++++++++++++++------------ 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 7f6fb872f588c..5d765245c6d37 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1316,20 +1316,36 @@ static void ocelot_set_aggr_pgids(struct ocelot *ocelot) } } -static void ocelot_setup_lag(struct ocelot *ocelot, int lag) +/* When offloading a bonding interface, the switch ports configured under the + * same bond must have the same logical port ID, equal to the physical port ID + * of the lowest numbered physical port in that bond. Otherwise, in standalone/ + * bridged mode, each port has a logical port ID equal to its physical port ID. + */ +static void ocelot_setup_logical_port_ids(struct ocelot *ocelot) { - unsigned long bond_mask = ocelot->lags[lag]; - unsigned int p; + int port; - for_each_set_bit(p, &bond_mask, ocelot->num_phys_ports) { - u32 port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, p); + for (port = 0; port < ocelot->num_phys_ports; port++) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; + struct net_device *bond; + + if (!ocelot_port) + continue; - port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M; + bond = ocelot_port->bond; + if (bond) { + int lag = __ffs(ocelot_get_bond_mask(ocelot, bond)); - /* Use lag port as logical port for port i */ - ocelot_write_gix(ocelot, port_cfg | - ANA_PORT_PORT_CFG_PORTID_VAL(lag), - ANA_PORT_PORT_CFG, p); + ocelot_rmw_gix(ocelot, + ANA_PORT_PORT_CFG_PORTID_VAL(lag), + ANA_PORT_PORT_CFG_PORTID_VAL_M, + ANA_PORT_PORT_CFG, port); + } else { + ocelot_rmw_gix(ocelot, + ANA_PORT_PORT_CFG_PORTID_VAL(port), + ANA_PORT_PORT_CFG_PORTID_VAL_M, + ANA_PORT_PORT_CFG, port); + } } } @@ -1361,7 +1377,7 @@ int ocelot_port_lag_join(struct ocelot *ocelot, int port, ocelot->lags[lag] |= BIT(port); } - ocelot_setup_lag(ocelot, lag); + ocelot_setup_logical_port_ids(ocelot); ocelot_apply_bridge_fwd_mask(ocelot); ocelot_set_aggr_pgids(ocelot); @@ -1372,7 +1388,6 @@ EXPORT_SYMBOL(ocelot_port_lag_join); void ocelot_port_lag_leave(struct ocelot *ocelot, int port, struct net_device *bond) { - u32 port_cfg; int i; ocelot->ports[port]->bond = NULL; @@ -1389,15 +1404,9 @@ void ocelot_port_lag_leave(struct ocelot *ocelot, int port, ocelot->lags[n] = ocelot->lags[port]; ocelot->lags[port] = 0; - - ocelot_setup_lag(ocelot, n); } - port_cfg = ocelot_read_gix(ocelot, ANA_PORT_PORT_CFG, port); - port_cfg &= ~ANA_PORT_PORT_CFG_PORTID_VAL_M; - ocelot_write_gix(ocelot, port_cfg | ANA_PORT_PORT_CFG_PORTID_VAL(port), - ANA_PORT_PORT_CFG, port); - + ocelot_setup_logical_port_ids(ocelot); ocelot_apply_bridge_fwd_mask(ocelot); ocelot_set_aggr_pgids(ocelot); } -- GitLab From 528d3f190c98c8f7d9581f68db4af021696727b2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 6 Feb 2021 00:02:17 +0200 Subject: [PATCH 1971/2012] net: mscc: ocelot: drop the use of the "lags" array We can now simplify the implementation by always using ocelot_get_bond_mask to look up the other ports that are offloading the same bonding interface as us. In ocelot_set_aggr_pgids, the code had a way to uniquely iterate through LAGs. We need to achieve the same behavior by marking each LAG as visited, which we do now by using a temporary 32-bit "visited" bitmask. This is ok and we do not need dynamic memory allocation, because we know that this switch architecture will not have more than 32 ports (the PGID port masks are 32-bit anyway). Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 95 ++++++++++++------------------ include/soc/mscc/ocelot.h | 2 - 2 files changed, 39 insertions(+), 58 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 5d765245c6d37..c906c449d2dde 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -957,21 +957,11 @@ void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot) mask = GENMASK(ocelot->num_phys_ports - 1, 0); mask &= ~cpu_fwd_mask; } else if (ocelot->bridge_fwd_mask & BIT(port)) { - int lag; + struct net_device *bond = ocelot_port->bond; mask = ocelot->bridge_fwd_mask & ~BIT(port); - - for (lag = 0; lag < ocelot->num_phys_ports; lag++) { - unsigned long bond_mask = ocelot->lags[lag]; - - if (!bond_mask) - continue; - - if (bond_mask & BIT(port)) { - mask &= ~bond_mask; - break; - } - } + if (bond) + mask &= ~ocelot_get_bond_mask(ocelot, bond); } else { /* Standalone ports forward only to DSA tag_8021q CPU * ports (if those exist), or to the hardware CPU port @@ -1277,6 +1267,7 @@ EXPORT_SYMBOL(ocelot_port_bridge_leave); static void ocelot_set_aggr_pgids(struct ocelot *ocelot) { + unsigned long visited = GENMASK(ocelot->num_phys_ports - 1, 0); int i, port, lag; /* Reset destination and aggregation PGIDS */ @@ -1287,16 +1278,35 @@ static void ocelot_set_aggr_pgids(struct ocelot *ocelot) ocelot_write_rix(ocelot, GENMASK(ocelot->num_phys_ports - 1, 0), ANA_PGID_PGID, i); - /* Now, set PGIDs for each LAG */ + /* The visited ports bitmask holds the list of ports offloading any + * bonding interface. Initially we mark all these ports as unvisited, + * then every time we visit a port in this bitmask, we know that it is + * the lowest numbered port, i.e. the one whose logical ID == physical + * port ID == LAG ID. So we mark as visited all further ports in the + * bitmask that are offloading the same bonding interface. This way, + * we set up the aggregation PGIDs only once per bonding interface. + */ + for (port = 0; port < ocelot->num_phys_ports; port++) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; + + if (!ocelot_port || !ocelot_port->bond) + continue; + + visited &= ~BIT(port); + } + + /* Now, set PGIDs for each active LAG */ for (lag = 0; lag < ocelot->num_phys_ports; lag++) { + struct net_device *bond = ocelot->ports[lag]->bond; unsigned long bond_mask; int aggr_count = 0; u8 aggr_idx[16]; - bond_mask = ocelot->lags[lag]; - if (!bond_mask) + if (!bond || (visited & BIT(lag))) continue; + bond_mask = ocelot_get_bond_mask(ocelot, bond); + for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) { // Destination mask ocelot_write_rix(ocelot, bond_mask, @@ -1313,6 +1323,19 @@ static void ocelot_set_aggr_pgids(struct ocelot *ocelot) ac |= BIT(aggr_idx[i % aggr_count]); ocelot_write_rix(ocelot, ac, ANA_PGID_PGID, i); } + + /* Mark all ports in the same LAG as visited to avoid applying + * the same config again. + */ + for (port = lag; port < ocelot->num_phys_ports; port++) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; + + if (!ocelot_port) + continue; + + if (ocelot_port->bond == bond) + visited |= BIT(port); + } } } @@ -1353,30 +1376,11 @@ int ocelot_port_lag_join(struct ocelot *ocelot, int port, struct net_device *bond, struct netdev_lag_upper_info *info) { - u32 bond_mask = 0; - int lag; - if (info->tx_type != NETDEV_LAG_TX_TYPE_HASH) return -EOPNOTSUPP; ocelot->ports[port]->bond = bond; - bond_mask = ocelot_get_bond_mask(ocelot, bond); - - lag = __ffs(bond_mask); - - /* If the new port is the lowest one, use it as the logical port from - * now on - */ - if (port == lag) { - ocelot->lags[port] = bond_mask; - bond_mask &= ~BIT(port); - if (bond_mask) - ocelot->lags[__ffs(bond_mask)] = 0; - } else { - ocelot->lags[lag] |= BIT(port); - } - ocelot_setup_logical_port_ids(ocelot); ocelot_apply_bridge_fwd_mask(ocelot); ocelot_set_aggr_pgids(ocelot); @@ -1388,24 +1392,8 @@ EXPORT_SYMBOL(ocelot_port_lag_join); void ocelot_port_lag_leave(struct ocelot *ocelot, int port, struct net_device *bond) { - int i; - ocelot->ports[port]->bond = NULL; - /* Remove port from any lag */ - for (i = 0; i < ocelot->num_phys_ports; i++) - ocelot->lags[i] &= ~BIT(port); - - /* if it was the logical port of the lag, move the lag config to the - * next port - */ - if (ocelot->lags[port]) { - int n = __ffs(ocelot->lags[port]); - - ocelot->lags[n] = ocelot->lags[port]; - ocelot->lags[port] = 0; - } - ocelot_setup_logical_port_ids(ocelot); ocelot_apply_bridge_fwd_mask(ocelot); ocelot_set_aggr_pgids(ocelot); @@ -1587,11 +1575,6 @@ int ocelot_init(struct ocelot *ocelot) } } - ocelot->lags = devm_kcalloc(ocelot->dev, ocelot->num_phys_ports, - sizeof(u32), GFP_KERNEL); - if (!ocelot->lags) - return -ENOMEM; - ocelot->stats = devm_kcalloc(ocelot->dev, ocelot->num_phys_ports * ocelot->num_stats, sizeof(u64), GFP_KERNEL); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index e36a1ed29c019..089e552719e08 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -657,8 +657,6 @@ struct ocelot { enum ocelot_tag_prefix npi_inj_prefix; enum ocelot_tag_prefix npi_xtr_prefix; - u32 *lags; - struct list_head multicast; struct list_head pgids; -- GitLab From 21357b614d3fcf8203b32468d1c8e6332ea25aa1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 6 Feb 2021 00:02:18 +0200 Subject: [PATCH 1972/2012] net: mscc: ocelot: rename aggr_count to num_ports_in_lag It makes it a bit easier to read and understand the code that deals with balancing the 16 aggregation codes among the ports in a certain LAG. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index c906c449d2dde..380a5a6617028 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1298,8 +1298,8 @@ static void ocelot_set_aggr_pgids(struct ocelot *ocelot) /* Now, set PGIDs for each active LAG */ for (lag = 0; lag < ocelot->num_phys_ports; lag++) { struct net_device *bond = ocelot->ports[lag]->bond; + int num_ports_in_lag = 0; unsigned long bond_mask; - int aggr_count = 0; u8 aggr_idx[16]; if (!bond || (visited & BIT(lag))) @@ -1311,8 +1311,7 @@ static void ocelot_set_aggr_pgids(struct ocelot *ocelot) // Destination mask ocelot_write_rix(ocelot, bond_mask, ANA_PGID_PGID, port); - aggr_idx[aggr_count] = port; - aggr_count++; + aggr_idx[num_ports_in_lag++] = port; } for_each_aggr_pgid(ocelot, i) { @@ -1320,7 +1319,7 @@ static void ocelot_set_aggr_pgids(struct ocelot *ocelot) ac = ocelot_read_rix(ocelot, ANA_PGID_PGID, i); ac &= ~bond_mask; - ac |= BIT(aggr_idx[i % aggr_count]); + ac |= BIT(aggr_idx[i % num_ports_in_lag]); ocelot_write_rix(ocelot, ac, ANA_PGID_PGID, i); } -- GitLab From 23ca3b727ee6b432166391607b614d3a6beb6784 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 6 Feb 2021 00:02:19 +0200 Subject: [PATCH 1973/2012] net: mscc: ocelot: rebalance LAGs on link up/down events At present there is an issue when ocelot is offloading a bonding interface, but one of the links of the physical ports goes down. Traffic keeps being hashed towards that destination, and of course gets dropped on egress. Monitor the netdev notifier events emitted by the bonding driver for changes in the physical state of lower interfaces, to determine which ports are active and which ones are no longer. Then extend ocelot_get_bond_mask to return either the configured bonding interfaces, or the active ones, depending on a boolean argument. The code that does rebalancing only needs to do so among the active ports, whereas the bridge forwarding mask and the logical port IDs still need to look at the permanently bonded ports. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 41 ++++++++++++++++++++------ drivers/net/ethernet/mscc/ocelot.h | 1 + drivers/net/ethernet/mscc/ocelot_net.c | 30 +++++++++++++++++++ include/soc/mscc/ocelot.h | 1 + 4 files changed, 64 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 380a5a6617028..f8b85ab8be5d2 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -889,7 +889,8 @@ int ocelot_get_ts_info(struct ocelot *ocelot, int port, } EXPORT_SYMBOL(ocelot_get_ts_info); -static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond) +static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond, + bool only_active_ports) { u32 mask = 0; int port; @@ -900,8 +901,12 @@ static u32 ocelot_get_bond_mask(struct ocelot *ocelot, struct net_device *bond) if (!ocelot_port) continue; - if (ocelot_port->bond == bond) + if (ocelot_port->bond == bond) { + if (only_active_ports && !ocelot_port->lag_tx_active) + continue; + mask |= BIT(port); + } } return mask; @@ -960,8 +965,10 @@ void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot) struct net_device *bond = ocelot_port->bond; mask = ocelot->bridge_fwd_mask & ~BIT(port); - if (bond) - mask &= ~ocelot_get_bond_mask(ocelot, bond); + if (bond) { + mask &= ~ocelot_get_bond_mask(ocelot, bond, + false); + } } else { /* Standalone ports forward only to DSA tag_8021q CPU * ports (if those exist), or to the hardware CPU port @@ -1298,20 +1305,20 @@ static void ocelot_set_aggr_pgids(struct ocelot *ocelot) /* Now, set PGIDs for each active LAG */ for (lag = 0; lag < ocelot->num_phys_ports; lag++) { struct net_device *bond = ocelot->ports[lag]->bond; - int num_ports_in_lag = 0; + int num_active_ports = 0; unsigned long bond_mask; u8 aggr_idx[16]; if (!bond || (visited & BIT(lag))) continue; - bond_mask = ocelot_get_bond_mask(ocelot, bond); + bond_mask = ocelot_get_bond_mask(ocelot, bond, true); for_each_set_bit(port, &bond_mask, ocelot->num_phys_ports) { // Destination mask ocelot_write_rix(ocelot, bond_mask, ANA_PGID_PGID, port); - aggr_idx[num_ports_in_lag++] = port; + aggr_idx[num_active_ports++] = port; } for_each_aggr_pgid(ocelot, i) { @@ -1319,7 +1326,11 @@ static void ocelot_set_aggr_pgids(struct ocelot *ocelot) ac = ocelot_read_rix(ocelot, ANA_PGID_PGID, i); ac &= ~bond_mask; - ac |= BIT(aggr_idx[i % num_ports_in_lag]); + /* Don't do division by zero if there was no active + * port. Just make all aggregation codes zero. + */ + if (num_active_ports) + ac |= BIT(aggr_idx[i % num_active_ports]); ocelot_write_rix(ocelot, ac, ANA_PGID_PGID, i); } @@ -1356,7 +1367,8 @@ static void ocelot_setup_logical_port_ids(struct ocelot *ocelot) bond = ocelot_port->bond; if (bond) { - int lag = __ffs(ocelot_get_bond_mask(ocelot, bond)); + int lag = __ffs(ocelot_get_bond_mask(ocelot, bond, + false)); ocelot_rmw_gix(ocelot, ANA_PORT_PORT_CFG_PORTID_VAL(lag), @@ -1399,6 +1411,17 @@ void ocelot_port_lag_leave(struct ocelot *ocelot, int port, } EXPORT_SYMBOL(ocelot_port_lag_leave); +void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active) +{ + struct ocelot_port *ocelot_port = ocelot->ports[port]; + + ocelot_port->lag_tx_active = lag_tx_active; + + /* Rebalance the LAGs */ + ocelot_set_aggr_pgids(ocelot); +} +EXPORT_SYMBOL(ocelot_port_lag_change); + /* Configure the maximum SDU (L2 payload) on RX to the value specified in @sdu. * The length of VLAN tags is accounted for automatically via DEV_MAC_TAGS_CFG. * In the special case that it's the NPI port that we're configuring, the diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index 12dc744530761..b18f6644726ac 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -114,6 +114,7 @@ int ocelot_port_lag_join(struct ocelot *ocelot, int port, struct netdev_lag_upper_info *info); void ocelot_port_lag_leave(struct ocelot *ocelot, int port, struct net_device *bond); +void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active); struct net_device *ocelot_port_to_netdev(struct ocelot *ocelot, int port); int ocelot_netdev_to_port(struct net_device *dev); diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 0a4de949f4d99..8f12fa45b1b52 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -1164,6 +1164,27 @@ ocelot_netdevice_lag_changeupper(struct net_device *dev, return NOTIFY_DONE; } +static int +ocelot_netdevice_changelowerstate(struct net_device *dev, + struct netdev_lag_lower_state_info *info) +{ + struct ocelot_port_private *priv = netdev_priv(dev); + bool is_active = info->link_up && info->tx_enabled; + struct ocelot_port *ocelot_port = &priv->port; + struct ocelot *ocelot = ocelot_port->ocelot; + int port = priv->chip_port; + + if (!ocelot_port->bond) + return NOTIFY_DONE; + + if (ocelot_port->lag_tx_active == is_active) + return NOTIFY_DONE; + + ocelot_port_lag_change(ocelot, port, is_active); + + return NOTIFY_OK; +} + static int ocelot_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -1181,6 +1202,15 @@ static int ocelot_netdevice_event(struct notifier_block *unused, break; } + case NETDEV_CHANGELOWERSTATE: { + struct netdev_notifier_changelowerstate_info *info = ptr; + + if (!ocelot_netdevice_dev_check(dev)) + break; + + return ocelot_netdevice_changelowerstate(dev, + info->lower_state_info); + } default: break; } diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 089e552719e08..6e806872cd24d 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -613,6 +613,7 @@ struct ocelot_port { bool is_dsa_8021q_cpu; struct net_device *bond; + bool lag_tx_active; }; struct ocelot { -- GitLab From a324d3d48fb3cfb7ee4c3a670ed52250d3535697 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 6 Feb 2021 00:02:20 +0200 Subject: [PATCH 1974/2012] net: dsa: make assisted_learning_on_cpu_port bypass offloaded LAG interfaces Given the following topology, and focusing only on Box A: Box A +----------------------------------+ | Board 1 br0 | | +---------+ | | / \ | | | | | | | bond0 | | | +-----+ | |192.168.1.1 | / \ | | eno0 swp0 swp1 swp2 | +---|--------|-------|-------|-----+ | | | | +--------+ | | Cable | | Cable| |Cable Cable | | +--------+ | | | | | | +---|--------|-------|-------|-----+ | eno0 swp0 swp1 swp2 | |192.168.1.2 | \ / | | | +-----+ | | | bond0 | | | | | | \ / | | +---------+ | | Board 2 br0 | +----------------------------------+ Box B The assisted_learning_on_cpu_port logic will see that swp0 is bridged with a "foreign interface" (bond0) and will therefore install all addresses learnt by the software bridge towards bond0 (including the address of eno0 on Box B) as static addresses towards the CPU port. But that's not what we want - bond0 is not really a "foreign interface" but one we can offload including L2 forwarding from/towards it. So we need to refine our logic for assisted learning such that, whenever we see an address learnt on a non-DSA interface, we search through the tree for any port that offloads that non-DSA interface. Some confusion might arise as to why we search through the whole tree instead of just the local switch returned by dsa_slave_dev_lower_find. Or a different angle of the same confusion: why does dsa_slave_dev_lower_find(br_dev) return a single dp that's under br_dev instead of the whole list of bridged DSA ports? To answer the second question, it should be enough to install the static FDB entry on the CPU port of a single switch in the tree, because dsa_port_fdb_add uses DSA_NOTIFIER_FDB_ADD which ensures that all other switches in the tree get notified of that address, and add the entry themselves using dsa_towards_port(). This should help understand the answer to the first question: the port returned by dsa_slave_dev_lower_find may not be on the same switch as the ports that offload the LAG. Nonetheless, if the driver implements .crosschip_lag_join and .crosschip_bridge_join as mv88e6xxx does, there still isn't any reason for trapping addresses learnt on the remote LAG towards the CPU, and we should prevent that. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- net/dsa/dsa_priv.h | 13 +++++++++++++ net/dsa/slave.c | 8 ++++++++ 2 files changed, 21 insertions(+) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 263593ce94a81..8a1bcb2b42085 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -209,6 +209,19 @@ static inline bool dsa_port_offloads_netdev(struct dsa_port *dp, return false; } +/* Returns true if any port of this tree offloads the given net_device */ +static inline bool dsa_tree_offloads_netdev(struct dsa_switch_tree *dst, + struct net_device *dev) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_offloads_netdev(dp, dev)) + return true; + + return false; +} + /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index f77e9eeb1a620..431bdbdd8473f 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -2242,6 +2242,14 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, if (!dp->ds->assisted_learning_on_cpu_port) return NOTIFY_DONE; + + /* When the bridge learns an address on an offloaded + * LAG we don't want to send traffic to the CPU, the + * other ports bridged with the LAG should be able to + * autonomously forward towards it. + */ + if (dsa_tree_offloads_netdev(dp->ds->dst, dev)) + return NOTIFY_DONE; } if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del) -- GitLab From 8fe6832e96acbf9d5777fc0b13e3e680ff46ba11 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 6 Feb 2021 00:02:21 +0200 Subject: [PATCH 1975/2012] net: dsa: felix: propagate the LAG offload ops towards the ocelot lib The ocelot switch has been supporting LAG offload since its initial commit, however felix could not make use of that, due to lack of a LAG abstraction in DSA. Now that we have that, let's forward DSA's calls towards the ocelot library, who will deal with setting up the bonding. Signed-off-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 32 ++++++++++++++++++++++++++++++ drivers/net/ethernet/mscc/ocelot.h | 6 ------ include/soc/mscc/ocelot.h | 6 ++++++ 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 167463010b556..1bd5aea12b252 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -569,6 +569,35 @@ static void felix_bridge_leave(struct dsa_switch *ds, int port, ocelot_port_bridge_leave(ocelot, port, br); } +static int felix_lag_join(struct dsa_switch *ds, int port, + struct net_device *bond, + struct netdev_lag_upper_info *info) +{ + struct ocelot *ocelot = ds->priv; + + return ocelot_port_lag_join(ocelot, port, bond, info); +} + +static int felix_lag_leave(struct dsa_switch *ds, int port, + struct net_device *bond) +{ + struct ocelot *ocelot = ds->priv; + + ocelot_port_lag_leave(ocelot, port, bond); + + return 0; +} + +static int felix_lag_change(struct dsa_switch *ds, int port) +{ + struct dsa_port *dp = dsa_to_port(ds, port); + struct ocelot *ocelot = ds->priv; + + ocelot_port_lag_change(ocelot, port, dp->lag_tx_enabled); + + return 0; +} + static int felix_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { @@ -1331,6 +1360,9 @@ const struct dsa_switch_ops felix_switch_ops = { .port_mdb_del = felix_mdb_del, .port_bridge_join = felix_bridge_join, .port_bridge_leave = felix_bridge_leave, + .port_lag_join = felix_lag_join, + .port_lag_leave = felix_lag_leave, + .port_lag_change = felix_lag_change, .port_stp_state_set = felix_bridge_stp_state_set, .port_vlan_filtering = felix_vlan_filtering, .port_vlan_add = felix_vlan_add, diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index b18f6644726ac..c485795c606ba 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -109,12 +109,6 @@ int ocelot_mact_learn(struct ocelot *ocelot, int port, unsigned int vid, enum macaccess_entry_type type); int ocelot_mact_forget(struct ocelot *ocelot, const unsigned char mac[ETH_ALEN], unsigned int vid); -int ocelot_port_lag_join(struct ocelot *ocelot, int port, - struct net_device *bond, - struct netdev_lag_upper_info *info); -void ocelot_port_lag_leave(struct ocelot *ocelot, int port, - struct net_device *bond); -void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active); struct net_device *ocelot_port_to_netdev(struct ocelot *ocelot, int port); int ocelot_netdev_to_port(struct net_device *dev); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 6e806872cd24d..d0d48e9620fb7 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -798,6 +798,12 @@ int ocelot_port_mdb_add(struct ocelot *ocelot, int port, const struct switchdev_obj_port_mdb *mdb); int ocelot_port_mdb_del(struct ocelot *ocelot, int port, const struct switchdev_obj_port_mdb *mdb); +int ocelot_port_lag_join(struct ocelot *ocelot, int port, + struct net_device *bond, + struct netdev_lag_upper_info *info); +void ocelot_port_lag_leave(struct ocelot *ocelot, int port, + struct net_device *bond); +void ocelot_port_lag_change(struct ocelot *ocelot, int port, bool lag_tx_active); int ocelot_devlink_sb_register(struct ocelot *ocelot); void ocelot_devlink_sb_unregister(struct ocelot *ocelot); -- GitLab From 63ec9be13372759511ea868dbc59f439e936d2c6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 5 Feb 2021 16:10:54 -0600 Subject: [PATCH 1976/2012] net: ipa: move mutex calls into __gsi_channel_stop() Move the mutex calls out of gsi_channel_stop_retry() and into __gsi_channel_stop(), to make the latter more semantically similar to __gsi_channel_start(). Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 53640447bf123..f0432c965168c 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -910,11 +910,8 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id) static int gsi_channel_stop_retry(struct gsi_channel *channel) { u32 retries = GSI_CHANNEL_STOP_RETRIES; - struct gsi *gsi = channel->gsi; int ret; - mutex_lock(&gsi->mutex); - do { ret = gsi_channel_stop_command(channel); if (ret != -EAGAIN) @@ -922,24 +919,33 @@ static int gsi_channel_stop_retry(struct gsi_channel *channel) usleep_range(3 * USEC_PER_MSEC, 5 * USEC_PER_MSEC); } while (retries--); - mutex_unlock(&gsi->mutex); - return ret; } static int __gsi_channel_stop(struct gsi_channel *channel, bool stop) { + struct gsi *gsi = channel->gsi; int ret; /* Wait for any underway transactions to complete before stopping. */ gsi_channel_trans_quiesce(channel); - ret = stop ? gsi_channel_stop_retry(channel) : 0; - /* Finally, ensure NAPI polling has finished. */ - if (!ret) - napi_synchronize(&channel->napi); + if (!stop) + return 0; - return ret; + mutex_lock(&gsi->mutex); + + ret = gsi_channel_stop_retry(channel); + + mutex_unlock(&gsi->mutex); + + if (ret) + return ret; + + /* Ensure NAPI polling has finished. */ + napi_synchronize(&channel->napi); + + return 0; } /* Stop a started channel */ @@ -948,11 +954,11 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id) struct gsi_channel *channel = &gsi->channel[channel_id]; int ret; - /* Only disable the completion interrupt if stop is successful */ ret = __gsi_channel_stop(channel, true); if (ret) return ret; + /* Disable the completion interrupt and NAPI if successful */ gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id); napi_disable(&channel->napi); -- GitLab From b1750723c99c5a4d9b452b5e51a9fd3227fceecb Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 5 Feb 2021 16:10:55 -0600 Subject: [PATCH 1977/2012] net: ipa: synchronize NAPI only for suspend When stopping a channel, gsi_channel_stop() will ensure NAPI polling is complete when it calls napi_disable(). So there is no need to call napi_synchronize() in that case. Move the call to napi_synchronize() out of __gsi_channel_stop() and into gsi_channel_suspend(), so it's only used where needed. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index f0432c965168c..60eb765c53647 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -939,13 +939,7 @@ static int __gsi_channel_stop(struct gsi_channel *channel, bool stop) mutex_unlock(&gsi->mutex); - if (ret) - return ret; - - /* Ensure NAPI polling has finished. */ - napi_synchronize(&channel->napi); - - return 0; + return ret; } /* Stop a started channel */ @@ -987,8 +981,16 @@ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell) int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop) { struct gsi_channel *channel = &gsi->channel[channel_id]; + int ret; + + ret = __gsi_channel_stop(channel, stop); + if (ret) + return ret; - return __gsi_channel_stop(channel, stop); + /* Ensure NAPI polling has finished. */ + napi_synchronize(&channel->napi); + + return 0; } /* Resume a suspended channel (starting will be requested if STOPPED) */ -- GitLab From 3f77c926f649eed686f36a1e6888abb698146a2a Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 5 Feb 2021 16:10:56 -0600 Subject: [PATCH 1978/2012] net: ipa: do not cache event ring state An event ring's state only needs to be known when it is allocated, reset, or deallocated. We check an event ring's state both before and after performing an event ring control command that changes its state. These are only issued at startup and shutdown, so there is very little value in caching the state. Stop recording a copy of the channel's last known state, and instead fetch the true state from hardware whenever it's needed. In such cases, *do* record the state in a local variable, in case an error message reports it (so the value reported is the value seen). Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi.c | 39 +++++++++++++++++++++------------------ drivers/net/ipa/gsi.h | 1 - 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 60eb765c53647..511c94f66036c 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -408,30 +408,31 @@ static void gsi_evt_ring_command(struct gsi *gsi, u32 evt_ring_id, return; dev_err(dev, "GSI command %u for event ring %u timed out, state %u\n", - opcode, evt_ring_id, evt_ring->state); + opcode, evt_ring_id, gsi_evt_ring_state(gsi, evt_ring_id)); } /* Allocate an event ring in NOT_ALLOCATED state */ static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id) { - struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id]; + enum gsi_evt_ring_state state; /* Get initial event ring state */ - evt_ring->state = gsi_evt_ring_state(gsi, evt_ring_id); - if (evt_ring->state != GSI_EVT_RING_STATE_NOT_ALLOCATED) { + state = gsi_evt_ring_state(gsi, evt_ring_id); + if (state != GSI_EVT_RING_STATE_NOT_ALLOCATED) { dev_err(gsi->dev, "event ring %u bad state %u before alloc\n", - evt_ring_id, evt_ring->state); + evt_ring_id, state); return -EINVAL; } gsi_evt_ring_command(gsi, evt_ring_id, GSI_EVT_ALLOCATE); /* If successful the event ring state will have changed */ - if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED) + state = gsi_evt_ring_state(gsi, evt_ring_id); + if (state == GSI_EVT_RING_STATE_ALLOCATED) return 0; dev_err(gsi->dev, "event ring %u bad state %u after alloc\n", - evt_ring_id, evt_ring->state); + evt_ring_id, state); return -EIO; } @@ -439,45 +440,48 @@ static int gsi_evt_ring_alloc_command(struct gsi *gsi, u32 evt_ring_id) /* Reset a GSI event ring in ALLOCATED or ERROR state. */ static void gsi_evt_ring_reset_command(struct gsi *gsi, u32 evt_ring_id) { - struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id]; - enum gsi_evt_ring_state state = evt_ring->state; + enum gsi_evt_ring_state state; + state = gsi_evt_ring_state(gsi, evt_ring_id); if (state != GSI_EVT_RING_STATE_ALLOCATED && state != GSI_EVT_RING_STATE_ERROR) { dev_err(gsi->dev, "event ring %u bad state %u before reset\n", - evt_ring_id, evt_ring->state); + evt_ring_id, state); return; } gsi_evt_ring_command(gsi, evt_ring_id, GSI_EVT_RESET); /* If successful the event ring state will have changed */ - if (evt_ring->state == GSI_EVT_RING_STATE_ALLOCATED) + state = gsi_evt_ring_state(gsi, evt_ring_id); + if (state == GSI_EVT_RING_STATE_ALLOCATED) return; dev_err(gsi->dev, "event ring %u bad state %u after reset\n", - evt_ring_id, evt_ring->state); + evt_ring_id, state); } /* Issue a hardware de-allocation request for an allocated event ring */ static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id) { - struct gsi_evt_ring *evt_ring = &gsi->evt_ring[evt_ring_id]; + enum gsi_evt_ring_state state; - if (evt_ring->state != GSI_EVT_RING_STATE_ALLOCATED) { + state = gsi_evt_ring_state(gsi, evt_ring_id); + if (state != GSI_EVT_RING_STATE_ALLOCATED) { dev_err(gsi->dev, "event ring %u state %u before dealloc\n", - evt_ring_id, evt_ring->state); + evt_ring_id, state); return; } gsi_evt_ring_command(gsi, evt_ring_id, GSI_EVT_DE_ALLOC); /* If successful the event ring state will have changed */ - if (evt_ring->state == GSI_EVT_RING_STATE_NOT_ALLOCATED) + state = gsi_evt_ring_state(gsi, evt_ring_id); + if (state == GSI_EVT_RING_STATE_NOT_ALLOCATED) return; dev_err(gsi->dev, "event ring %u bad state %u after dealloc\n", - evt_ring_id, evt_ring->state); + evt_ring_id, state); } /* Fetch the current state of a channel from hardware */ @@ -1107,7 +1111,6 @@ static void gsi_isr_evt_ctrl(struct gsi *gsi) event_mask ^= BIT(evt_ring_id); evt_ring = &gsi->evt_ring[evt_ring_id]; - evt_ring->state = gsi_evt_ring_state(gsi, evt_ring_id); complete(&evt_ring->completion); } diff --git a/drivers/net/ipa/gsi.h b/drivers/net/ipa/gsi.h index 96c9aed397aad..d674db0ba4eb0 100644 --- a/drivers/net/ipa/gsi.h +++ b/drivers/net/ipa/gsi.h @@ -142,7 +142,6 @@ enum gsi_evt_ring_state { struct gsi_evt_ring { struct gsi_channel *channel; struct completion completion; /* signals event ring state changes */ - enum gsi_evt_ring_state state; struct gsi_ring ring; }; -- GitLab From d5bc5015eb9d64cbd14e467db1a56db1472d0d6c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 5 Feb 2021 16:10:57 -0600 Subject: [PATCH 1979/2012] net: ipa: remove two unused register definitions We do not support inter-EE channel or event ring commands. Inter-EE interrupts are disabled (and never re-enabled) for all channels and event rings, so we have no need for the GSI registers that clear those interrupt conditions. So remove their definitions. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/gsi_reg.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/net/ipa/gsi_reg.h b/drivers/net/ipa/gsi_reg.h index 0e138bbd82053..299456e70f286 100644 --- a/drivers/net/ipa/gsi_reg.h +++ b/drivers/net/ipa/gsi_reg.h @@ -59,16 +59,6 @@ #define GSI_INTER_EE_N_SRC_EV_CH_IRQ_OFFSET(ee) \ (0x0000c01c + 0x1000 * (ee)) -#define GSI_INTER_EE_SRC_CH_IRQ_CLR_OFFSET \ - GSI_INTER_EE_N_SRC_CH_IRQ_CLR_OFFSET(GSI_EE_AP) -#define GSI_INTER_EE_N_SRC_CH_IRQ_CLR_OFFSET(ee) \ - (0x0000c028 + 0x1000 * (ee)) - -#define GSI_INTER_EE_SRC_EV_CH_IRQ_CLR_OFFSET \ - GSI_INTER_EE_N_SRC_EV_CH_IRQ_CLR_OFFSET(GSI_EE_AP) -#define GSI_INTER_EE_N_SRC_EV_CH_IRQ_CLR_OFFSET(ee) \ - (0x0000c02c + 0x1000 * (ee)) - #define GSI_CH_C_CNTXT_0_OFFSET(ch) \ GSI_EE_N_CH_C_CNTXT_0_OFFSET((ch), GSI_EE_AP) #define GSI_EE_N_CH_C_CNTXT_0_OFFSET(ch, ee) \ -- GitLab From 9af5ccf32383005070092e51b15cee51584323c0 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 5 Feb 2021 16:10:58 -0600 Subject: [PATCH 1980/2012] net: ipa: use a Boolean rather than count when replenishing The count argument to ipa_endpoint_replenish() is only ever 0 or 1, and always will be (because we always handle each receive buffer in a single transaction). Rename the argument to be add_one and change it to be Boolean. Update the function description to reflect the current code. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_endpoint.c | 35 ++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 7a46c790afbef..bff5d6ffd1186 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -1020,31 +1020,34 @@ err_free_pages: } /** - * ipa_endpoint_replenish() - Replenish the Rx packets cache. + * ipa_endpoint_replenish() - Replenish endpoint receive buffers * @endpoint: Endpoint to be replenished - * @count: Number of buffers to send to hardware + * @add_one: Whether this is replacing a just-consumed buffer * - * Allocate RX packet wrapper structures with maximal socket buffers - * for an endpoint. These are supplied to the hardware, which fills - * them with incoming data. + * The IPA hardware can hold a fixed number of receive buffers for an RX + * endpoint, based on the number of entries in the underlying channel ring + * buffer. If an endpoint's "backlog" is non-zero, it indicates how many + * more receive buffers can be supplied to the hardware. Replenishing for + * an endpoint can be disabled, in which case requests to replenish a + * buffer are "saved", and transferred to the backlog once it is re-enabled + * again. */ -static void ipa_endpoint_replenish(struct ipa_endpoint *endpoint, u32 count) +static void ipa_endpoint_replenish(struct ipa_endpoint *endpoint, bool add_one) { struct gsi *gsi; u32 backlog; if (!endpoint->replenish_enabled) { - if (count) - atomic_add(count, &endpoint->replenish_saved); + if (add_one) + atomic_inc(&endpoint->replenish_saved); return; } - while (atomic_dec_not_zero(&endpoint->replenish_backlog)) if (ipa_endpoint_replenish_one(endpoint)) goto try_again_later; - if (count) - atomic_add(count, &endpoint->replenish_backlog); + if (add_one) + atomic_inc(&endpoint->replenish_backlog); return; @@ -1052,8 +1055,8 @@ try_again_later: /* The last one didn't succeed, so fix the backlog */ backlog = atomic_inc_return(&endpoint->replenish_backlog); - if (count) - atomic_add(count, &endpoint->replenish_backlog); + if (add_one) + atomic_inc(&endpoint->replenish_backlog); /* Whenever a receive buffer transaction completes we'll try to * replenish again. It's unlikely, but if we fail to supply even @@ -1080,7 +1083,7 @@ static void ipa_endpoint_replenish_enable(struct ipa_endpoint *endpoint) /* Start replenishing if hardware currently has no buffers */ max_backlog = gsi_channel_tre_max(gsi, endpoint->channel_id); if (atomic_read(&endpoint->replenish_backlog) == max_backlog) - ipa_endpoint_replenish(endpoint, 0); + ipa_endpoint_replenish(endpoint, false); } static void ipa_endpoint_replenish_disable(struct ipa_endpoint *endpoint) @@ -1099,7 +1102,7 @@ static void ipa_endpoint_replenish_work(struct work_struct *work) endpoint = container_of(dwork, struct ipa_endpoint, replenish_work); - ipa_endpoint_replenish(endpoint, 0); + ipa_endpoint_replenish(endpoint, false); } static void ipa_endpoint_skb_copy(struct ipa_endpoint *endpoint, @@ -1300,7 +1303,7 @@ static void ipa_endpoint_rx_complete(struct ipa_endpoint *endpoint, { struct page *page; - ipa_endpoint_replenish(endpoint, 1); + ipa_endpoint_replenish(endpoint, true); if (trans->cancelled) return; -- GitLab From 4873537430e5b6bbfc505a6a7b07a7c5e92ddffc Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 5 Feb 2021 16:10:59 -0600 Subject: [PATCH 1981/2012] net: ipa: get rid of status size constraint There is a build-time check that the packet status structure is a multiple of 4 bytes in size. It's not clear where that constraint comes from, but the structure defines what hardware provides so its definition won't change. Get rid of the check; it adds no value. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_endpoint.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index bff5d6ffd1186..7209ee3c31244 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -174,9 +174,6 @@ static bool ipa_endpoint_data_valid(struct ipa *ipa, u32 count, enum ipa_endpoint_name name; u32 limit; - /* Not sure where this constraint come from... */ - BUILD_BUG_ON(sizeof(struct ipa_status) % 4); - if (count > IPA_ENDPOINT_COUNT) { dev_err(dev, "too many endpoints specified (%u > %u)\n", count, IPA_ENDPOINT_COUNT); -- GitLab From cd1150098f2cc7bd05740c105488c293f6761f5a Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 5 Feb 2021 16:11:00 -0600 Subject: [PATCH 1982/2012] net: ipa: avoid field overflow It's possible that the length passed to ipa_header_size_encoded() is larger than what can be represented by the HDR_LEN field alone (starting with IPA v4.5). If we attempted that, u32_encode_bits() would trigger a build-time error. Avoid this problem by masking off high-order bits of the value encoded as the lower portion of the header length. The same sort of problem exists in ipa_metadata_offset_encoded(), so implement the same fix there. Signed-off-by: Alex Elder Signed-off-by: Jakub Kicinski --- drivers/net/ipa/ipa_reg.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h index e6b0827a244ec..732e691e9aa62 100644 --- a/drivers/net/ipa/ipa_reg.h +++ b/drivers/net/ipa/ipa_reg.h @@ -408,15 +408,18 @@ enum ipa_cs_offload_en { static inline u32 ipa_header_size_encoded(enum ipa_version version, u32 header_size) { + u32 size = header_size & field_mask(HDR_LEN_FMASK); u32 val; - val = u32_encode_bits(header_size, HDR_LEN_FMASK); - if (version < IPA_VERSION_4_5) + val = u32_encode_bits(size, HDR_LEN_FMASK); + if (version < IPA_VERSION_4_5) { + /* ipa_assert(header_size == size); */ return val; + } /* IPA v4.5 adds a few more most-significant bits */ - header_size >>= hweight32(HDR_LEN_FMASK); - val |= u32_encode_bits(header_size, HDR_LEN_MSB_FMASK); + size = header_size >> hweight32(HDR_LEN_FMASK); + val |= u32_encode_bits(size, HDR_LEN_MSB_FMASK); return val; } @@ -425,15 +428,18 @@ static inline u32 ipa_header_size_encoded(enum ipa_version version, static inline u32 ipa_metadata_offset_encoded(enum ipa_version version, u32 offset) { + u32 off = offset & field_mask(HDR_OFST_METADATA_FMASK); u32 val; - val = u32_encode_bits(offset, HDR_OFST_METADATA_FMASK); - if (version < IPA_VERSION_4_5) + val = u32_encode_bits(off, HDR_OFST_METADATA_FMASK); + if (version < IPA_VERSION_4_5) { + /* ipa_assert(offset == off); */ return val; + } /* IPA v4.5 adds a few more most-significant bits */ - offset >>= hweight32(HDR_OFST_METADATA_FMASK); - val |= u32_encode_bits(offset, HDR_OFST_METADATA_MSB_FMASK); + off = offset >> hweight32(HDR_OFST_METADATA_FMASK); + val |= u32_encode_bits(off, HDR_OFST_METADATA_MSB_FMASK); return val; } -- GitLab From 21c85974aab7211619d39364990427af543c88ac Mon Sep 17 00:00:00 2001 From: Xie He Date: Fri, 5 Feb 2021 14:41:24 -0800 Subject: [PATCH 1983/2012] net/packet: Improve the comment about LL header visibility criteria The "dev_has_header" function, recently added in commit d549699048b4 ("net/packet: fix packet receive on L3 devices without visible hard header"), is more accurate as criteria for determining whether a device exposes the LL header to upper layers, because in addition to dev->header_ops, it also checks for dev->header_ops->create. When transmitting an skb on a device, dev_hard_header can be called to generate an LL header. dev_hard_header will only generate a header if dev->header_ops->create is present. Signed-off-by: Xie He Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20210205224124.21345-1-xie.he.0141@gmail.com Signed-off-by: Jakub Kicinski --- net/packet/af_packet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6bbc7a4485938..e24b2841c643a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -135,11 +135,11 @@ Resume On transmit: ------------ -dev->header_ops != NULL +dev_has_header(dev) == true mac_header -> ll header data -> ll header -dev->header_ops == NULL (ll header is invisible to us) +dev_has_header(dev) == false (ll header is invisible to us) mac_header -> data data -> data -- GitLab From 7274c4147afbf46f45b8501edbdad6da8cd013b9 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 5 Feb 2021 22:48:53 +0100 Subject: [PATCH 1984/2012] r8169: don't try to disable interrupts if NAPI is scheduled already There's no benefit in trying to disable interrupts if NAPI is scheduled already. This allows us to save a PCI write in this case. Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/78c7f2fb-9772-1015-8c1d-632cbdff253f@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 7b053a1f6d599..04231585ef79a 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4549,8 +4549,10 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING); } - rtl_irq_disable(tp); - napi_schedule(&tp->napi); + if (napi_schedule_prep(&tp->napi)) { + rtl_irq_disable(tp); + __napi_schedule(&tp->napi); + } out: rtl_ack_events(tp, status); -- GitLab From ca04217add8e6c9de96ffb32c4acc8da3fde890f Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Tue, 26 Jan 2021 18:15:50 +0100 Subject: [PATCH 1985/2012] rtlwifi: use tasklet_setup to initialize rx_work_tasklet In commit d3ccc14dfe95 most of the tasklets in this driver was updated to the new API. However for the rx_work_tasklet only the type of the callback was changed from void _rtl_rx_work(unsigned long data) to void _rtl_rx_work(struct tasklet_struct *t). The initialization of rx_work_tasklet was still open-coded and the function pointer just cast into the old type, and hence nothing sets rx_work_tasklet.use_callback = true and the callback was still called as t->func(t->data); with uninitialized/zero t->data. Commit 6b8c7574a5f8 changed the casting of _rtl_rx_work a bit and initialized t->data to a pointer to the tasklet cast to an unsigned long. This way calling t->func(t->data) might actually work through all the casting, but it still doesn't update the code to use the new tasklet API. Let's use the new tasklet_setup to initialize rx_work_tasklet properly and set rx_work_tasklet.use_callback = true so that the callback is called as t->callback(t); without all the casting. Signed-off-by: Emil Renner Berthing Acked-by: Willem de Bruijn Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210126171550.3066-1-kernel@esmil.dk --- drivers/net/wireless/realtek/rtlwifi/usb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index d62b87f010c9f..6c5e242b1bc57 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -310,8 +310,7 @@ static int _rtl_usb_init_rx(struct ieee80211_hw *hw) init_usb_anchor(&rtlusb->rx_cleanup_urbs); skb_queue_head_init(&rtlusb->rx_queue); - rtlusb->rx_work_tasklet.func = (void(*))_rtl_rx_work; - rtlusb->rx_work_tasklet.data = (unsigned long)&rtlusb->rx_work_tasklet; + tasklet_setup(&rtlusb->rx_work_tasklet, _rtl_rx_work); return 0; } -- GitLab From 711fa16f1dfe1a521dff48f49a95504eeafffa66 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 28 Jan 2021 17:10:48 +0000 Subject: [PATCH 1986/2012] rtlwifi: rtl8192se: remove redundant initialization of variable rtstatus The variable rtstatu is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Acked-by: Willem de Bruijn Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210128171048.644669-1-colin.king@canonical.com --- drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c index 63283d9e74850..aaa004d4d6d0a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c @@ -1017,7 +1017,7 @@ bool rtl92s_phy_bb_config(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_phy *rtlphy = &(rtlpriv->phy); - bool rtstatus = true; + bool rtstatus; u8 pathmap, index, rf_num = 0; u8 path1, path2; -- GitLab From adba838af159914eb98fcd55bfd3a89c9a7d41a8 Mon Sep 17 00:00:00 2001 From: Guo-Feng Fan Date: Tue, 2 Feb 2021 13:50:10 +0800 Subject: [PATCH 1987/2012] rtw88: coex: 8821c: correct antenna switch function This patch fixes a defect that uses incorrect function to access registers. Use 8 and 32 bit access function to access 8 and 32 bit long data respectively. Signed-off-by: Guo-Feng Fan Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210202055012.8296-2-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw88/rtw8821c.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index 74155c999ebb4..8f53afb678704 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -719,8 +719,8 @@ static void rtw8821c_coex_cfg_ant_switch(struct rtw_dev *rtwdev, u8 ctrl_type, regval = (!polarity_inverse ? 0x1 : 0x2); } - rtw_write8_mask(rtwdev, REG_RFE_CTRL8, BIT_MASK_R_RFE_SEL_15, - regval); + rtw_write32_mask(rtwdev, REG_RFE_CTRL8, BIT_MASK_R_RFE_SEL_15, + regval); break; case COEX_SWITCH_CTRL_BY_PTA: rtw_write32_clr(rtwdev, REG_LED_CFG, BIT_DPDT_SEL_EN); @@ -730,8 +730,8 @@ static void rtw8821c_coex_cfg_ant_switch(struct rtw_dev *rtwdev, u8 ctrl_type, PTA_CTRL_PIN); regval = (!polarity_inverse ? 0x2 : 0x1); - rtw_write8_mask(rtwdev, REG_RFE_CTRL8, BIT_MASK_R_RFE_SEL_15, - regval); + rtw_write32_mask(rtwdev, REG_RFE_CTRL8, BIT_MASK_R_RFE_SEL_15, + regval); break; case COEX_SWITCH_CTRL_BY_ANTDIV: rtw_write32_clr(rtwdev, REG_LED_CFG, BIT_DPDT_SEL_EN); @@ -757,11 +757,11 @@ static void rtw8821c_coex_cfg_ant_switch(struct rtw_dev *rtwdev, u8 ctrl_type, } if (ctrl_type == COEX_SWITCH_CTRL_BY_BT) { - rtw_write32_clr(rtwdev, REG_CTRL_TYPE, BIT_CTRL_TYPE1); - rtw_write32_clr(rtwdev, REG_CTRL_TYPE, BIT_CTRL_TYPE2); + rtw_write8_clr(rtwdev, REG_CTRL_TYPE, BIT_CTRL_TYPE1); + rtw_write8_clr(rtwdev, REG_CTRL_TYPE, BIT_CTRL_TYPE2); } else { - rtw_write32_set(rtwdev, REG_CTRL_TYPE, BIT_CTRL_TYPE1); - rtw_write32_set(rtwdev, REG_CTRL_TYPE, BIT_CTRL_TYPE2); + rtw_write8_set(rtwdev, REG_CTRL_TYPE, BIT_CTRL_TYPE1); + rtw_write8_set(rtwdev, REG_CTRL_TYPE, BIT_CTRL_TYPE2); } } -- GitLab From b0d3016f423834177379cc4237964f1162599b5f Mon Sep 17 00:00:00 2001 From: Guo-Feng Fan Date: Tue, 2 Feb 2021 13:50:11 +0800 Subject: [PATCH 1988/2012] rtw88: 8821c: Correct CCK RSSI Incorrect CCK RSSI may cause periodically scan from upper layer. 8821c phy status does NOT has actual value of CCK power. It provides only lna and vga index. Driver have to use these indexes to calculate actual RSSI. Signed-off-by: Guo-Feng Fan Signed-off-by: Ping-Ke Shih Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210202055012.8296-3-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw88/rtw8821c.c | 46 +++++++++++++++++-- drivers/net/wireless/realtek/rtw88/rtw8821c.h | 8 ++++ 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index 8f53afb678704..f0a56f56f0d5e 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -15,6 +15,10 @@ #include "debug.h" #include "bf.h" +static const s8 lna_gain_table_0[8] = {22, 8, -6, -22, -31, -40, -46, -52}; +static const s8 lna_gain_table_1[16] = {10, 6, 2, -2, -6, -10, -14, -17, + -20, -24, -28, -31, -34, -37, -40, -44}; + static void rtw8821ce_efuse_parsing(struct rtw_efuse *efuse, struct rtw8821c_efuse *map) { @@ -426,17 +430,49 @@ static void rtw8821c_set_channel(struct rtw_dev *rtwdev, u8 channel, u8 bw, rtw8821c_set_channel_rxdfir(rtwdev, bw); } +static s8 get_cck_rx_pwr(struct rtw_dev *rtwdev, u8 lna_idx, u8 vga_idx) +{ + struct rtw_efuse *efuse = &rtwdev->efuse; + const s8 *lna_gain_table; + int lna_gain_table_size; + s8 rx_pwr_all = 0; + s8 lna_gain = 0; + + if (efuse->rfe_option == 0) { + lna_gain_table = lna_gain_table_0; + lna_gain_table_size = ARRAY_SIZE(lna_gain_table_0); + } else { + lna_gain_table = lna_gain_table_1; + lna_gain_table_size = ARRAY_SIZE(lna_gain_table_1); + } + + if (lna_idx >= lna_gain_table_size) { + rtw_info(rtwdev, "incorrect lna index (%d)\n", lna_idx); + return -120; + } + + lna_gain = lna_gain_table[lna_idx]; + rx_pwr_all = lna_gain - 2 * vga_idx; + + return rx_pwr_all; +} + static void query_phy_status_page0(struct rtw_dev *rtwdev, u8 *phy_status, struct rtw_rx_pkt_stat *pkt_stat) { - s8 min_rx_power = -120; - u8 pwdb = GET_PHY_STAT_P0_PWDB(phy_status); + s8 rx_power; + u8 lna_idx = 0; + u8 vga_idx = 0; - pkt_stat->rx_power[RF_PATH_A] = pwdb - 100; + vga_idx = GET_PHY_STAT_P0_VGA(phy_status); + lna_idx = FIELD_PREP(BIT_LNA_H_MASK, GET_PHY_STAT_P0_LNA_H(phy_status)) | + FIELD_PREP(BIT_LNA_L_MASK, GET_PHY_STAT_P0_LNA_L(phy_status)); + rx_power = get_cck_rx_pwr(rtwdev, lna_idx, vga_idx); + + pkt_stat->rx_power[RF_PATH_A] = rx_power; pkt_stat->rssi = rtw_phy_rf_power_2_rssi(pkt_stat->rx_power, 1); pkt_stat->bw = RTW_CHANNEL_WIDTH_20; - pkt_stat->signal_power = max(pkt_stat->rx_power[RF_PATH_A], - min_rx_power); + pkt_stat->signal_power = rx_power; } static void query_phy_status_page1(struct rtw_dev *rtwdev, u8 *phy_status, diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.h b/drivers/net/wireless/realtek/rtw88/rtw8821c.h index e11e3fc41c959..4d197541430db 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.h @@ -148,6 +148,14 @@ _rtw_write32s_mask(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 data) /* phy status page0 */ #define GET_PHY_STAT_P0_PWDB(phy_stat) \ le32_get_bits(*((__le32 *)(phy_stat) + 0x00), GENMASK(15, 8)) +#define GET_PHY_STAT_P0_VGA(phy_stat) \ + le32_get_bits(*((__le32 *)(phy_stat) + 0x03), GENMASK(12, 8)) +#define GET_PHY_STAT_P0_LNA_L(phy_stat) \ + le32_get_bits(*((__le32 *)(phy_stat) + 0x03), GENMASK(15, 13)) +#define GET_PHY_STAT_P0_LNA_H(phy_stat) \ + le32_get_bits(*((__le32 *)(phy_stat) + 0x03), BIT(23)) +#define BIT_LNA_H_MASK BIT(3) +#define BIT_LNA_L_MASK GENMASK(2, 0) /* phy status page1 */ #define GET_PHY_STAT_P1_PWDB_A(phy_stat) \ -- GitLab From 5d6651fe85837b11564a2e2c3c6279c057d078d6 Mon Sep 17 00:00:00 2001 From: Guo-Feng Fan Date: Tue, 2 Feb 2021 13:50:12 +0800 Subject: [PATCH 1989/2012] rtw88: 8821c: support RFE type2 wifi NIC RFE type2 is a new NIC which has one RF antenna shares with BT. Update phy parameter to verstion V57 to allow initial procedure to load extra AGC table for sharing antenna NIC. Signed-off-by: Guo-Feng Fan Signed-off-by: Ping-Ke Shih Tested-by: Kai-Heng Feng Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210202055012.8296-4-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw88/main.c | 2 + drivers/net/wireless/realtek/rtw88/main.h | 7 + drivers/net/wireless/realtek/rtw88/rtw8821c.c | 47 +++ drivers/net/wireless/realtek/rtw88/rtw8821c.h | 14 + .../wireless/realtek/rtw88/rtw8821c_table.c | 397 ++++++++++++++++++ .../wireless/realtek/rtw88/rtw8821c_table.h | 1 + 6 files changed, 468 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c index 16bacfadeb589..757aaf45f65e2 100644 --- a/drivers/net/wireless/realtek/rtw88/main.c +++ b/drivers/net/wireless/realtek/rtw88/main.c @@ -1590,6 +1590,8 @@ static int rtw_chip_board_info_setup(struct rtw_dev *rtwdev) rtw_phy_setup_phy_cond(rtwdev, 0); rtw_phy_init_tx_power(rtwdev); + if (rfe_def->agc_btg_tbl) + rtw_load_table(rtwdev, rfe_def->agc_btg_tbl); rtw_load_table(rtwdev, rfe_def->phy_pg_tbl); rtw_load_table(rtwdev, rfe_def->txpwr_lmt_tbl); rtw_phy_tx_power_by_rate_config(hal); diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h index 9a318dfd04f90..87524199d0d63 100644 --- a/drivers/net/wireless/realtek/rtw88/main.h +++ b/drivers/net/wireless/realtek/rtw88/main.h @@ -1042,6 +1042,7 @@ enum rtw_rfe_fem { struct rtw_rfe_def { const struct rtw_table *phy_pg_tbl; const struct rtw_table *txpwr_lmt_tbl; + const struct rtw_table *agc_btg_tbl; }; #define RTW_DEF_RFE(chip, bb_pg, pwrlmt) { \ @@ -1049,6 +1050,12 @@ struct rtw_rfe_def { .txpwr_lmt_tbl = &rtw ## chip ## _txpwr_lmt_type ## pwrlmt ## _tbl, \ } +#define RTW_DEF_RFE_EXT(chip, bb_pg, pwrlmt, btg) { \ + .phy_pg_tbl = &rtw ## chip ## _bb_pg_type ## bb_pg ## _tbl, \ + .txpwr_lmt_tbl = &rtw ## chip ## _txpwr_lmt_type ## pwrlmt ## _tbl, \ + .agc_btg_tbl = &rtw ## chip ## _agc_btg_type ## btg ## _tbl, \ + } + #define RTW_PWR_TRK_5G_1 0 #define RTW_PWR_TRK_5G_2 1 #define RTW_PWR_TRK_5G_3 2 diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c index f0a56f56f0d5e..33c6cf1206c83 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c @@ -25,6 +25,13 @@ static void rtw8821ce_efuse_parsing(struct rtw_efuse *efuse, ether_addr_copy(efuse->addr, map->e.mac_addr); } +enum rtw8821ce_rf_set { + SWITCH_TO_BTG, + SWITCH_TO_WLG, + SWITCH_TO_WLA, + SWITCH_TO_BT, +}; + static int rtw8821c_read_efuse(struct rtw_dev *rtwdev, u8 *log_map) { struct rtw_efuse *efuse = &rtwdev->efuse; @@ -228,6 +235,40 @@ static void rtw8821c_cfg_ldo25(struct rtw_dev *rtwdev, bool enable) rtw_write8(rtwdev, REG_LDO_EFUSE_CTRL + 3, ldo_pwr); } +static void rtw8821c_switch_rf_set(struct rtw_dev *rtwdev, u8 rf_set) +{ + u32 reg; + + rtw_write32_set(rtwdev, REG_DMEM_CTRL, BIT_WL_RST); + rtw_write32_set(rtwdev, REG_SYS_CTRL, BIT_FEN_EN); + + reg = rtw_read32(rtwdev, REG_RFECTL); + switch (rf_set) { + case SWITCH_TO_BTG: + reg |= B_BTG_SWITCH; + reg &= ~(B_CTRL_SWITCH | B_WL_SWITCH | B_WLG_SWITCH | + B_WLA_SWITCH); + rtw_write32_mask(rtwdev, REG_ENRXCCA, MASKBYTE2, BTG_CCA); + rtw_write32_mask(rtwdev, REG_ENTXCCK, MASKLWORD, BTG_LNA); + break; + case SWITCH_TO_WLG: + reg |= B_WL_SWITCH | B_WLG_SWITCH; + reg &= ~(B_BTG_SWITCH | B_CTRL_SWITCH | B_WLA_SWITCH); + rtw_write32_mask(rtwdev, REG_ENRXCCA, MASKBYTE2, WLG_CCA); + rtw_write32_mask(rtwdev, REG_ENTXCCK, MASKLWORD, WLG_LNA); + break; + case SWITCH_TO_WLA: + reg |= B_WL_SWITCH | B_WLA_SWITCH; + reg &= ~(B_BTG_SWITCH | B_CTRL_SWITCH | B_WLG_SWITCH); + break; + case SWITCH_TO_BT: + default: + break; + } + + rtw_write32(rtwdev, REG_RFECTL, reg); +} + static void rtw8821c_set_channel_rf(struct rtw_dev *rtwdev, u8 channel, u8 bw) { u32 rf_reg18; @@ -261,9 +302,14 @@ static void rtw8821c_set_channel_rf(struct rtw_dev *rtwdev, u8 channel, u8 bw) } if (channel <= 14) { + if (rtwdev->efuse.rfe_option == 0) + rtw8821c_switch_rf_set(rtwdev, SWITCH_TO_WLG); + else if (rtwdev->efuse.rfe_option == 2) + rtw8821c_switch_rf_set(rtwdev, SWITCH_TO_BTG); rtw_write_rf(rtwdev, RF_PATH_A, RF_LUTDBG, BIT(6), 0x1); rtw_write_rf(rtwdev, RF_PATH_A, 0x64, 0xf, 0xf); } else { + rtw8821c_switch_rf_set(rtwdev, SWITCH_TO_WLA); rtw_write_rf(rtwdev, RF_PATH_A, RF_LUTDBG, BIT(6), 0x0); } @@ -1450,6 +1496,7 @@ static const struct rtw_intf_phy_para_table phy_para_table_8821c = { static const struct rtw_rfe_def rtw8821c_rfe_defs[] = { [0] = RTW_DEF_RFE(8821c, 0, 0), + [2] = RTW_DEF_RFE_EXT(8821c, 0, 0, 2), }; static struct rtw_hw_reg rtw8821c_dig[] = { diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.h b/drivers/net/wireless/realtek/rtw88/rtw8821c.h index 4d197541430db..112faa60f653e 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.h @@ -181,6 +181,8 @@ _rtw_write32s_mask(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 data) #define GET_PHY_STAT_P1_RXSNR_B(phy_stat) \ le32_get_bits(*((__le32 *)(phy_stat) + 0x06), GENMASK(15, 8)) +#define REG_SYS_CTRL 0x000 +#define BIT_FEN_EN BIT(26) #define REG_INIRTS_RATE_SEL 0x0480 #define REG_HTSTFWT 0x800 #define REG_RXPSEL 0x808 @@ -212,6 +214,11 @@ _rtw_write32s_mask(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 data) #define REG_FA_CCK 0xa5c #define REG_RXDESC 0xa2c #define REG_ENTXCCK 0xa80 +#define BTG_LNA 0xfc84 +#define WLG_LNA 0x7532 +#define REG_ENRXCCA 0xa84 +#define BTG_CCA 0x0e +#define WLG_CCA 0x12 #define REG_PWRTH2 0xaa8 #define REG_CSRATIO 0xaaa #define REG_TXFILTER 0xaac @@ -225,6 +232,11 @@ _rtw_write32s_mask(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 data) #define REG_RFESEL0 0xcb0 #define REG_RFESEL8 0xcb4 #define REG_RFECTL 0xcb8 +#define B_BTG_SWITCH BIT(16) +#define B_CTRL_SWITCH BIT(18) +#define B_WL_SWITCH (BIT(20) | BIT(22)) +#define B_WLG_SWITCH BIT(21) +#define B_WLA_SWITCH BIT(23) #define REG_RFEINV 0xcbc #define REG_AGCTR_B 0xe08 #define REG_RXIGI_B 0xe50 @@ -235,6 +247,8 @@ _rtw_write32s_mask(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 data) #define REG_CCA_OFDM 0xf08 #define REG_FA_OFDM 0xf48 #define REG_CCA_CCK 0xfcc +#define REG_DMEM_CTRL 0x1080 +#define BIT_WL_RST BIT(16) #define REG_ANTWT 0x1904 #define REG_IQKFAILMSK 0x1bf0 #define BIT_MASK_R_RFE_SEL_15 GENMASK(31, 28) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c_table.c b/drivers/net/wireless/realtek/rtw88/rtw8821c_table.c index 970f903f7dc77..8e8915c5c4988 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c_table.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c_table.c @@ -1342,6 +1342,399 @@ static const u32 rtw8821c_agc[] = { RTW_DECL_TABLE_PHY_COND(rtw8821c_agc, rtw_phy_cfg_agc); +static const u32 rtw8821c_agc_btg_type2[] = { + 0x80001004, 0x00000000, 0x40000000, 0x00000000, + 0x81C, 0xFF000013, + 0x81C, 0xFE020013, + 0x81C, 0xFD040013, + 0x81C, 0xFC060013, + 0x81C, 0xFB080013, + 0x81C, 0xFA0A0013, + 0x81C, 0xF90C0013, + 0x81C, 0xF80E0013, + 0x81C, 0xF7100013, + 0x81C, 0xF6120013, + 0x81C, 0xF5140013, + 0x81C, 0xF4160013, + 0x81C, 0xF3180013, + 0x81C, 0xF21A0013, + 0x81C, 0xF11C0013, + 0x81C, 0xF01E0013, + 0x81C, 0xEF200013, + 0x81C, 0xEE220013, + 0x81C, 0xED240013, + 0x81C, 0xEC260013, + 0x81C, 0xEB280013, + 0x81C, 0xEA2A0013, + 0x81C, 0xE92C0013, + 0x81C, 0xE82E0013, + 0x81C, 0xE7300013, + 0x81C, 0x8B320013, + 0x81C, 0x8A340013, + 0x81C, 0x89360013, + 0x81C, 0x88380013, + 0x81C, 0x873A0013, + 0x81C, 0x863C0013, + 0x81C, 0x853E0013, + 0x81C, 0x84400013, + 0x81C, 0x83420013, + 0x81C, 0x82440013, + 0x81C, 0x81460013, + 0x81C, 0x08480013, + 0x81C, 0x074A0013, + 0x81C, 0x064C0013, + 0x81C, 0x054E0013, + 0x81C, 0x04500013, + 0x81C, 0x03520013, + 0x81C, 0x88540003, + 0x81C, 0x87560003, + 0x81C, 0x86580003, + 0x81C, 0x855A0003, + 0x81C, 0x845C0003, + 0x81C, 0x835E0003, + 0x81C, 0x82600003, + 0x81C, 0x81620003, + 0x81C, 0x07640003, + 0x81C, 0x06660003, + 0x81C, 0x05680003, + 0x81C, 0x046A0003, + 0x81C, 0x036C0003, + 0x81C, 0x026E0003, + 0x81C, 0x01700003, + 0x81C, 0x01720003, + 0x81C, 0x01740003, + 0x81C, 0x01760003, + 0x81C, 0x01780003, + 0x81C, 0x017A0003, + 0x81C, 0x017C0003, + 0x81C, 0x017E0003, + 0x81C, 0xFF000813, + 0x81C, 0xFE020813, + 0x81C, 0xFD040813, + 0x81C, 0xFC060813, + 0x81C, 0xFB080813, + 0x81C, 0xFA0A0813, + 0x81C, 0xF90C0813, + 0x81C, 0xF80E0813, + 0x81C, 0xF7100813, + 0x81C, 0xF6120813, + 0x81C, 0xF5140813, + 0x81C, 0xF4160813, + 0x81C, 0xF3180813, + 0x81C, 0xF21A0813, + 0x81C, 0xF11C0813, + 0x81C, 0x941E0813, + 0x81C, 0x93200813, + 0x81C, 0x92220813, + 0x81C, 0x91240813, + 0x81C, 0x90260813, + 0x81C, 0x8F280813, + 0x81C, 0x8E2A0813, + 0x81C, 0x8D2C0813, + 0x81C, 0x8C2E0813, + 0x81C, 0x8B300813, + 0x81C, 0x8A320813, + 0x81C, 0x89340813, + 0x81C, 0x88360813, + 0x81C, 0x87380813, + 0x81C, 0x863A0813, + 0x81C, 0x853C0813, + 0x81C, 0x843E0813, + 0x81C, 0x83400813, + 0x81C, 0x82420813, + 0x81C, 0x81440813, + 0x81C, 0x07460813, + 0x81C, 0x06480813, + 0x81C, 0x054A0813, + 0x81C, 0x044C0813, + 0x81C, 0x034E0813, + 0x81C, 0x02500813, + 0x81C, 0x01520813, + 0x81C, 0x88540803, + 0x81C, 0x87560803, + 0x81C, 0x86580803, + 0x81C, 0x855A0803, + 0x81C, 0x845C0803, + 0x81C, 0x835E0803, + 0x81C, 0x82600803, + 0x81C, 0x81620803, + 0x81C, 0x07640803, + 0x81C, 0x06660803, + 0x81C, 0x05680803, + 0x81C, 0x046A0803, + 0x81C, 0x036C0803, + 0x81C, 0x026E0803, + 0x81C, 0x01700803, + 0x81C, 0x01720803, + 0x81C, 0x01740803, + 0x81C, 0x01760803, + 0x81C, 0x01780803, + 0x81C, 0x017A0803, + 0x81C, 0x017C0803, + 0x81C, 0x017E0803, + 0x90001005, 0x00000000, 0x40000000, 0x00000000, + 0x81C, 0xFF000013, + 0x81C, 0xFE020013, + 0x81C, 0xFD040013, + 0x81C, 0xFC060013, + 0x81C, 0xFB080013, + 0x81C, 0xFA0A0013, + 0x81C, 0xF90C0013, + 0x81C, 0xF80E0013, + 0x81C, 0xF7100013, + 0x81C, 0xF6120013, + 0x81C, 0xF5140013, + 0x81C, 0xF4160013, + 0x81C, 0xF3180013, + 0x81C, 0xF21A0013, + 0x81C, 0xF11C0013, + 0x81C, 0xF01E0013, + 0x81C, 0xEF200013, + 0x81C, 0xEE220013, + 0x81C, 0xED240013, + 0x81C, 0xEC260013, + 0x81C, 0xEB280013, + 0x81C, 0xEA2A0013, + 0x81C, 0xE92C0013, + 0x81C, 0xE82E0013, + 0x81C, 0xE7300013, + 0x81C, 0x8B320013, + 0x81C, 0x8A340013, + 0x81C, 0x89360013, + 0x81C, 0x88380013, + 0x81C, 0x873A0013, + 0x81C, 0x863C0013, + 0x81C, 0x853E0013, + 0x81C, 0x84400013, + 0x81C, 0x83420013, + 0x81C, 0x82440013, + 0x81C, 0x81460013, + 0x81C, 0x08480013, + 0x81C, 0x074A0013, + 0x81C, 0x064C0013, + 0x81C, 0x054E0013, + 0x81C, 0x04500013, + 0x81C, 0x03520013, + 0x81C, 0x88540003, + 0x81C, 0x87560003, + 0x81C, 0x86580003, + 0x81C, 0x855A0003, + 0x81C, 0x845C0003, + 0x81C, 0x835E0003, + 0x81C, 0x82600003, + 0x81C, 0x81620003, + 0x81C, 0x07640003, + 0x81C, 0x06660003, + 0x81C, 0x05680003, + 0x81C, 0x046A0003, + 0x81C, 0x036C0003, + 0x81C, 0x026E0003, + 0x81C, 0x01700003, + 0x81C, 0x01720003, + 0x81C, 0x01740003, + 0x81C, 0x01760003, + 0x81C, 0x01780003, + 0x81C, 0x017A0003, + 0x81C, 0x017C0003, + 0x81C, 0x017E0003, + 0x81C, 0xFF000813, + 0x81C, 0xFE020813, + 0x81C, 0xFD040813, + 0x81C, 0xFC060813, + 0x81C, 0xFB080813, + 0x81C, 0xFA0A0813, + 0x81C, 0xF90C0813, + 0x81C, 0xF80E0813, + 0x81C, 0xF7100813, + 0x81C, 0xF6120813, + 0x81C, 0xF5140813, + 0x81C, 0xF4160813, + 0x81C, 0xF3180813, + 0x81C, 0xF21A0813, + 0x81C, 0xF11C0813, + 0x81C, 0x941E0813, + 0x81C, 0x93200813, + 0x81C, 0x92220813, + 0x81C, 0x91240813, + 0x81C, 0x90260813, + 0x81C, 0x8F280813, + 0x81C, 0x8E2A0813, + 0x81C, 0x8D2C0813, + 0x81C, 0x8C2E0813, + 0x81C, 0x8B300813, + 0x81C, 0x8A320813, + 0x81C, 0x89340813, + 0x81C, 0x88360813, + 0x81C, 0x87380813, + 0x81C, 0x863A0813, + 0x81C, 0x853C0813, + 0x81C, 0x843E0813, + 0x81C, 0x83400813, + 0x81C, 0x82420813, + 0x81C, 0x81440813, + 0x81C, 0x07460813, + 0x81C, 0x06480813, + 0x81C, 0x054A0813, + 0x81C, 0x044C0813, + 0x81C, 0x034E0813, + 0x81C, 0x02500813, + 0x81C, 0x01520813, + 0x81C, 0x88540803, + 0x81C, 0x87560803, + 0x81C, 0x86580803, + 0x81C, 0x855A0803, + 0x81C, 0x845C0803, + 0x81C, 0x835E0803, + 0x81C, 0x82600803, + 0x81C, 0x81620803, + 0x81C, 0x07640803, + 0x81C, 0x06660803, + 0x81C, 0x05680803, + 0x81C, 0x046A0803, + 0x81C, 0x036C0803, + 0x81C, 0x026E0803, + 0x81C, 0x01700803, + 0x81C, 0x01720803, + 0x81C, 0x01740803, + 0x81C, 0x01760803, + 0x81C, 0x01780803, + 0x81C, 0x017A0803, + 0x81C, 0x017C0803, + 0x81C, 0x017E0803, + 0xA0000000, 0x00000000, + 0x81C, 0xFF000013, + 0x81C, 0xFE020013, + 0x81C, 0xFD040013, + 0x81C, 0xFC060013, + 0x81C, 0xFB080013, + 0x81C, 0xFA0A0013, + 0x81C, 0xF90C0013, + 0x81C, 0xF80E0013, + 0x81C, 0xF7100013, + 0x81C, 0xF6120013, + 0x81C, 0xF5140013, + 0x81C, 0xF4160013, + 0x81C, 0xF3180013, + 0x81C, 0xF21A0013, + 0x81C, 0xF11C0013, + 0x81C, 0xF01E0013, + 0x81C, 0xEF200013, + 0x81C, 0xEE220013, + 0x81C, 0xED240013, + 0x81C, 0xEC260013, + 0x81C, 0xEB280013, + 0x81C, 0xEA2A0013, + 0x81C, 0xE92C0013, + 0x81C, 0xE82E0013, + 0x81C, 0xE7300013, + 0x81C, 0x8A320013, + 0x81C, 0x89340013, + 0x81C, 0x88360013, + 0x81C, 0x87380013, + 0x81C, 0x863A0013, + 0x81C, 0x853C0013, + 0x81C, 0x843E0013, + 0x81C, 0x83400013, + 0x81C, 0x82420013, + 0x81C, 0x81440013, + 0x81C, 0x07460013, + 0x81C, 0x06480013, + 0x81C, 0x054A0013, + 0x81C, 0x044C0013, + 0x81C, 0x034E0013, + 0x81C, 0x02500013, + 0x81C, 0x01520013, + 0x81C, 0x88540003, + 0x81C, 0x87560003, + 0x81C, 0x86580003, + 0x81C, 0x855A0003, + 0x81C, 0x845C0003, + 0x81C, 0x835E0003, + 0x81C, 0x82600003, + 0x81C, 0x81620003, + 0x81C, 0x07640003, + 0x81C, 0x06660003, + 0x81C, 0x05680003, + 0x81C, 0x046A0003, + 0x81C, 0x036C0003, + 0x81C, 0x026E0003, + 0x81C, 0x01700003, + 0x81C, 0x01720003, + 0x81C, 0x01740003, + 0x81C, 0x01760003, + 0x81C, 0x01780003, + 0x81C, 0x017A0003, + 0x81C, 0x017C0003, + 0x81C, 0x017E0003, + 0x81C, 0xFF000813, + 0x81C, 0xFE020813, + 0x81C, 0xFD040813, + 0x81C, 0xFC060813, + 0x81C, 0xFB080813, + 0x81C, 0xFA0A0813, + 0x81C, 0xF90C0813, + 0x81C, 0xF80E0813, + 0x81C, 0xF7100813, + 0x81C, 0xF6120813, + 0x81C, 0xF5140813, + 0x81C, 0xF4160813, + 0x81C, 0xF3180813, + 0x81C, 0xF21A0813, + 0x81C, 0xF11C0813, + 0x81C, 0x961E0813, + 0x81C, 0x95200813, + 0x81C, 0x94220813, + 0x81C, 0x93240813, + 0x81C, 0x92260813, + 0x81C, 0x91280813, + 0x81C, 0x8F2A0813, + 0x81C, 0x8E2C0813, + 0x81C, 0x8D2E0813, + 0x81C, 0x8C300813, + 0x81C, 0x8B320813, + 0x81C, 0x8A340813, + 0x81C, 0x89360813, + 0x81C, 0x88380813, + 0x81C, 0x873A0813, + 0x81C, 0x863C0813, + 0x81C, 0x853E0813, + 0x81C, 0x84400813, + 0x81C, 0x83420813, + 0x81C, 0x82440813, + 0x81C, 0x08460813, + 0x81C, 0x07480813, + 0x81C, 0x064A0813, + 0x81C, 0x054C0813, + 0x81C, 0x044E0813, + 0x81C, 0x03500813, + 0x81C, 0x02520813, + 0x81C, 0x89540803, + 0x81C, 0x88560803, + 0x81C, 0x87580803, + 0x81C, 0x865A0803, + 0x81C, 0x855C0803, + 0x81C, 0x845E0803, + 0x81C, 0x83600803, + 0x81C, 0x82620803, + 0x81C, 0x07640803, + 0x81C, 0x06660803, + 0x81C, 0x05680803, + 0x81C, 0x046A0803, + 0x81C, 0x036C0803, + 0x81C, 0x026E0803, + 0x81C, 0x01700803, + 0x81C, 0x01720803, + 0x81C, 0x01740803, + 0x81C, 0x01760803, + 0x81C, 0x01780803, + 0x81C, 0x017A0803, + 0x81C, 0x017C0803, + 0x81C, 0x017E0803, + 0xB0000000, 0x00000000, +}; + +RTW_DECL_TABLE_PHY_COND(rtw8821c_agc_btg_type2, rtw_phy_cfg_agc); + static const u32 rtw8821c_bb[] = { 0x800, 0x9020D010, 0x804, 0x80018180, @@ -1394,7 +1787,11 @@ static const u32 rtw8821c_bb[] = { 0x8C0, 0xFFE04020, 0x8C4, 0x47C00000, 0x8C8, 0x00025165, + 0x82000400, 0x00000000, 0x40000000, 0x00000000, + 0x8CC, 0x08190492, + 0xA0000000, 0x00000000, 0x8CC, 0x08188492, + 0xB0000000, 0x00000000, 0x8D0, 0x0000B800, 0x8D4, 0x860308A0, 0x8D8, 0x290B5612, diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c_table.h b/drivers/net/wireless/realtek/rtw88/rtw8821c_table.h index 5ea8b4fc7fba2..cda98f5c4a01e 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c_table.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c_table.h @@ -7,6 +7,7 @@ extern const struct rtw_table rtw8821c_mac_tbl; extern const struct rtw_table rtw8821c_agc_tbl; +extern const struct rtw_table rtw8821c_agc_btg_type2_tbl; extern const struct rtw_table rtw8821c_bb_tbl; extern const struct rtw_table rtw8821c_bb_pg_type0_tbl; extern const struct rtw_table rtw8821c_rf_a_tbl; -- GitLab From af4b3a6f36d6c2fc5fca026bccf45e0fdcabddd9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 29 Jan 2021 18:14:12 +0100 Subject: [PATCH 1990/2012] brcmfmac: Add DMI nvram filename quirk for Predia Basic tablet The Predia Basic tablet contains quite generic names in the sys_vendor and product_name DMI strings, without this patch brcmfmac will try to load: brcmfmac43340-sdio.Insyde-CherryTrail.txt as nvram file which is a bit too generic. Add a DMI quirk so that a unique and clearly identifiable nvram file name is used on the Predia Basic tablet. Signed-off-by: Hans de Goede Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210129171413.139880-1-hdegoede@redhat.com --- .../net/wireless/broadcom/brcm80211/brcmfmac/dmi.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c index 4aa2561934d77..824a79f243830 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c @@ -40,6 +40,10 @@ static const struct brcmf_dmi_data pov_tab_p1006w_data = { BRCM_CC_43340_CHIP_ID, 2, "pov-tab-p1006w-data" }; +static const struct brcmf_dmi_data predia_basic_data = { + BRCM_CC_43341_CHIP_ID, 2, "predia-basic" +}; + static const struct dmi_system_id dmi_platform_data[] = { { /* ACEPC T8 Cherry Trail Z8350 mini PC */ @@ -111,6 +115,16 @@ static const struct dmi_system_id dmi_platform_data[] = { }, .driver_data = (void *)&pov_tab_p1006w_data, }, + { + /* Predia Basic tablet (+ with keyboard dock) */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_MATCH(DMI_PRODUCT_NAME, "CherryTrail"), + /* Mx.WT107.KUBNGEA02 with the version-nr dropped */ + DMI_MATCH(DMI_BIOS_VERSION, "Mx.WT107.KUBNGEA"), + }, + .driver_data = (void *)&predia_basic_data, + }, {} }; -- GitLab From a338c874d3d9d2463f031e89ae14942929b93db6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 29 Jan 2021 18:14:13 +0100 Subject: [PATCH 1991/2012] brcmfmac: Add DMI nvram filename quirk for Voyo winpad A15 tablet The Voyo winpad A15 tablet contains quite generic names in the sys_vendor and product_name DMI strings, without this patch brcmfmac will try to load: rcmfmac4330-sdio.To be filled by O.E.M.-To be filled by O.E.M..txt as nvram file which is a bit too generic. Add a DMI quirk so that a unique and clearly identifiable nvram file name is used on the Voyo winpad A15 tablet. While preparing a matching linux-firmware update I noticed that the nvram is identical to the nvram used on the Prowise-PT301 tablet, so the new DMI quirk entry simply points to the already existing Prowise-PT301 nvram file. Signed-off-by: Hans de Goede Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210129171413.139880-2-hdegoede@redhat.com --- .../wireless/broadcom/brcm80211/brcmfmac/dmi.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c index 824a79f243830..6d5188b78f2de 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c @@ -44,6 +44,14 @@ static const struct brcmf_dmi_data predia_basic_data = { BRCM_CC_43341_CHIP_ID, 2, "predia-basic" }; +/* Note the Voyo winpad A15 tablet uses the same Ampak AP6330 module, with the + * exact same nvram file as the Prowise-PT301 tablet. Since the nvram for the + * Prowise-PT301 is already in linux-firmware we just point to that here. + */ +static const struct brcmf_dmi_data voyo_winpad_a15_data = { + BRCM_CC_4330_CHIP_ID, 4, "Prowise-PT301" +}; + static const struct dmi_system_id dmi_platform_data[] = { { /* ACEPC T8 Cherry Trail Z8350 mini PC */ @@ -125,6 +133,16 @@ static const struct dmi_system_id dmi_platform_data[] = { }, .driver_data = (void *)&predia_basic_data, }, + { + /* Voyo winpad A15 tablet */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"), + /* Above strings are too generic, also match on BIOS date */ + DMI_MATCH(DMI_BIOS_DATE, "11/20/2014"), + }, + .driver_data = (void *)&voyo_winpad_a15_data, + }, {} }; -- GitLab From 38eb712ada24d3ee3fcf02e0941c03bcb437f1e2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Feb 2021 17:28:35 +0100 Subject: [PATCH 1992/2012] brcmsmac: fix alignment constraints sturct d11txh contains a ieee80211_rts structure, which is required to have at least two byte alignment, and this conflicts with the __packed attribute: drivers/net/wireless/broadcom/brcm80211/brcmsmac/d11.h:786:1: warning: alignment 1 of 'struct d11txh' is less than 2 [-Wpacked-not-aligned] Mark d11txh itself as having two-byte alignment to ensure the inner structure is properly aligned. Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210204162852.3219572-1-arnd@kernel.org --- drivers/net/wireless/broadcom/brcm80211/brcmsmac/d11.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/d11.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/d11.h index 9035cc4d6ff3e..7870093629c37 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/d11.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/d11.h @@ -783,7 +783,7 @@ struct d11txh { u8 RTSPhyHeader[D11_PHY_HDR_LEN]; /* 0x2c - 0x2e */ struct ieee80211_rts rts_frame; /* 0x2f - 0x36 */ u16 PAD; /* 0x37 */ -} __packed; +} __packed __aligned(2); #define D11_TXH_LEN 112 /* bytes */ -- GitLab From ae30a740a1769d7afb37245b058aeb5e6e83f492 Mon Sep 17 00:00:00 2001 From: Zheng Yongjun Date: Thu, 24 Dec 2020 21:24:56 +0800 Subject: [PATCH 1993/2012] atmel: at76c50x: use DEFINE_MUTEX() for mutex lock mutex lock can be initialized automatically with DEFINE_MUTEX() rather than explicitly calling mutex_init(). Signed-off-by: Zheng Yongjun Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20201224132456.31341-1-zhengyongjun3@huawei.com --- drivers/net/wireless/atmel/at76c50x-usb.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/atmel/at76c50x-usb.c b/drivers/net/wireless/atmel/at76c50x-usb.c index 4042578000331..7582761c61e2c 100644 --- a/drivers/net/wireless/atmel/at76c50x-usb.c +++ b/drivers/net/wireless/atmel/at76c50x-usb.c @@ -101,7 +101,7 @@ do { \ static uint at76_debug = DBG_DEFAULTS; /* Protect against concurrent firmware loading and parsing */ -static struct mutex fw_mutex; +static DEFINE_MUTEX(fw_mutex); static struct fwentry firmwares[] = { [0] = { "" }, @@ -2572,8 +2572,6 @@ static int __init at76_mod_init(void) printk(KERN_INFO DRIVER_DESC " " DRIVER_VERSION " loading\n"); - mutex_init(&fw_mutex); - /* register this driver with the USB subsystem */ result = usb_register(&at76_driver); if (result < 0) -- GitLab From 1d5248882d64e327e70c0f15cadc4dfd9539c990 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 28 Jan 2021 16:22:02 +0000 Subject: [PATCH 1994/2012] libertas: remove redundant initialization of variable ret The variable ret is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210128162202.642848-1-colin.king@canonical.com --- drivers/net/wireless/marvell/libertas/if_sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/libertas/if_sdio.c b/drivers/net/wireless/marvell/libertas/if_sdio.c index 44fbd0acb87a8..a63c5e622ee37 100644 --- a/drivers/net/wireless/marvell/libertas/if_sdio.c +++ b/drivers/net/wireless/marvell/libertas/if_sdio.c @@ -981,7 +981,7 @@ out: static int if_sdio_enter_deep_sleep(struct lbs_private *priv) { - int ret = -1; + int ret; struct cmd_header cmd; memset(&cmd, 0, sizeof(cmd)); -- GitLab From 199276b9bcefdffad776287de092160084caf677 Mon Sep 17 00:00:00 2001 From: wengjianfeng Date: Sat, 30 Jan 2021 15:23:10 +0800 Subject: [PATCH 1995/2012] rtl8xxxu: remove unused assignment value at first, ret was assigned to zero, but later assigned to a funciton,so the assignment to zero is no use, which can simple be removed instead. Signed-off-by: wengjianfeng Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210130072310.17252-1-samirweng1979@163.com --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c index 9f1f93d04145d..cfe2dfdae928f 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c @@ -1507,8 +1507,6 @@ static int rtl8192eu_power_on(struct rtl8xxxu_priv *priv) u32 val32; int ret; - ret = 0; - val32 = rtl8xxxu_read32(priv, REG_SYS_CFG); if (val32 & SYS_CFG_SPS_LDO_SEL) { rtl8xxxu_write8(priv, REG_LDO_SW_CTRL, 0xc3); -- GitLab From b7fd26c913f1f639b9d5bbf69266751f84a2a372 Mon Sep 17 00:00:00 2001 From: Yen-lin Lai Date: Mon, 1 Feb 2021 15:06:49 +0800 Subject: [PATCH 1996/2012] mwifiex: Report connected BSS with cfg80211_connect_bss() When a network is moved or reconfigured on the different channel, there can be multiple BSSes with the same BSSID and SSID in scan result before the old one expires. Then, it can cause cfg80211_connect_result to map current_bss to a bss with the wrong channel. Let mwifiex_cfg80211_assoc return the selected BSS and then the caller can report it cfg80211_connect_bss. Signed-off-by: Yen-lin Lai Reviewed-by: Brian Norris Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210201070649.1667209-1-yenlinlai@chromium.org --- .../net/wireless/marvell/mwifiex/cfg80211.c | 35 ++++++++++++++----- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 5553df9132901..a2ed268ce0da1 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -2173,7 +2173,8 @@ static int mwifiex_cfg80211_assoc(struct mwifiex_private *priv, size_t ssid_len, const u8 *ssid, const u8 *bssid, int mode, struct ieee80211_channel *channel, - struct cfg80211_connect_params *sme, bool privacy) + struct cfg80211_connect_params *sme, bool privacy, + struct cfg80211_bss **sel_bss) { struct cfg80211_ssid req_ssid; int ret, auth_type = 0; @@ -2307,17 +2308,31 @@ done: } } + if (bss) + cfg80211_ref_bss(priv->adapter->wiphy, bss); + ret = mwifiex_bss_start(priv, bss, &req_ssid); if (ret) - return ret; + goto cleanup; if (mode == NL80211_IFTYPE_ADHOC) { /* Inform the BSS information to kernel, otherwise * kernel will give a panic after successful assoc */ - if (mwifiex_cfg80211_inform_ibss_bss(priv)) - return -EFAULT; + if (mwifiex_cfg80211_inform_ibss_bss(priv)) { + ret = -EFAULT; + goto cleanup; + } } + /* Pass the selected BSS entry to caller. */ + if (sel_bss) { + *sel_bss = bss; + bss = NULL; + } + +cleanup: + if (bss) + cfg80211_put_bss(priv->adapter->wiphy, bss); return ret; } @@ -2334,6 +2349,7 @@ mwifiex_cfg80211_connect(struct wiphy *wiphy, struct net_device *dev, { struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); struct mwifiex_adapter *adapter = priv->adapter; + struct cfg80211_bss *bss = NULL; int ret; if (GET_BSS_ROLE(priv) != MWIFIEX_BSS_ROLE_STA) { @@ -2369,11 +2385,12 @@ mwifiex_cfg80211_connect(struct wiphy *wiphy, struct net_device *dev, cfg80211_sched_scan_stopped_locked(priv->wdev.wiphy, 0); ret = mwifiex_cfg80211_assoc(priv, sme->ssid_len, sme->ssid, sme->bssid, - priv->bss_mode, sme->channel, sme, 0); + priv->bss_mode, sme->channel, sme, 0, + &bss); if (!ret) { - cfg80211_connect_result(priv->netdev, priv->cfg_bssid, NULL, 0, - NULL, 0, WLAN_STATUS_SUCCESS, - GFP_KERNEL); + cfg80211_connect_bss(priv->netdev, priv->cfg_bssid, bss, NULL, + 0, NULL, 0, WLAN_STATUS_SUCCESS, + GFP_KERNEL, NL80211_TIMEOUT_UNSPECIFIED); mwifiex_dbg(priv->adapter, MSG, "info: associated to bssid %pM successfully\n", priv->cfg_bssid); @@ -2504,7 +2521,7 @@ mwifiex_cfg80211_join_ibss(struct wiphy *wiphy, struct net_device *dev, ret = mwifiex_cfg80211_assoc(priv, params->ssid_len, params->ssid, params->bssid, priv->bss_mode, params->chandef.chan, NULL, - params->privacy); + params->privacy, NULL); done: if (!ret) { cfg80211_ibss_joined(priv->netdev, priv->cfg_bssid, -- GitLab From 05d7f330748881385dad49db56f319a3ea099afd Mon Sep 17 00:00:00 2001 From: wengjianfeng Date: Wed, 3 Feb 2021 14:03:06 +0800 Subject: [PATCH 1997/2012] wl1251: cmd: remove redundant assignment -ENOMEM has been used as a return value,it is not necessary to assign it, and if kzalloc fail,not need free it,so just return -ENOMEM when kzalloc fail. Signed-off-by: wengjianfeng Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210203060306.2832-1-samirweng1979@163.com --- drivers/net/wireless/ti/wl1251/cmd.c | 36 ++++++++++------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/drivers/net/wireless/ti/wl1251/cmd.c b/drivers/net/wireless/ti/wl1251/cmd.c index e1095b8de2bdc..498c8db2eb48b 100644 --- a/drivers/net/wireless/ti/wl1251/cmd.c +++ b/drivers/net/wireless/ti/wl1251/cmd.c @@ -175,10 +175,8 @@ int wl1251_cmd_vbm(struct wl1251 *wl, u8 identity, wl1251_debug(DEBUG_CMD, "cmd vbm"); vbm = kzalloc(sizeof(*vbm), GFP_KERNEL); - if (!vbm) { - ret = -ENOMEM; - goto out; - } + if (!vbm) + return -ENOMEM; /* Count and period will be filled by the target */ vbm->tim.bitmap_ctrl = bitmap_control; @@ -213,10 +211,8 @@ int wl1251_cmd_data_path_rx(struct wl1251 *wl, u8 channel, bool enable) wl1251_debug(DEBUG_CMD, "cmd data path"); cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); - if (!cmd) { - ret = -ENOMEM; - goto out; - } + if (!cmd) + return -ENOMEM; cmd->channel = channel; @@ -279,10 +275,8 @@ int wl1251_cmd_join(struct wl1251 *wl, u8 bss_type, u8 channel, u8 *bssid; join = kzalloc(sizeof(*join), GFP_KERNEL); - if (!join) { - ret = -ENOMEM; - goto out; - } + if (!join) + return -ENOMEM; wl1251_debug(DEBUG_CMD, "cmd join%s ch %d %d/%d", bss_type == BSS_TYPE_IBSS ? " ibss" : "", @@ -324,10 +318,8 @@ int wl1251_cmd_ps_mode(struct wl1251 *wl, u8 ps_mode) wl1251_debug(DEBUG_CMD, "cmd set ps mode"); ps_params = kzalloc(sizeof(*ps_params), GFP_KERNEL); - if (!ps_params) { - ret = -ENOMEM; - goto out; - } + if (!ps_params) + return -ENOMEM; ps_params->ps_mode = ps_mode; ps_params->send_null_data = 1; @@ -356,10 +348,8 @@ int wl1251_cmd_read_memory(struct wl1251 *wl, u32 addr, void *answer, wl1251_debug(DEBUG_CMD, "cmd read memory"); cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); - if (!cmd) { - ret = -ENOMEM; - goto out; - } + if (!cmd) + return -ENOMEM; WARN_ON(len > MAX_READ_SIZE); len = min_t(size_t, len, MAX_READ_SIZE); @@ -401,10 +391,8 @@ int wl1251_cmd_template_set(struct wl1251 *wl, u16 cmd_id, cmd_len = ALIGN(sizeof(*cmd) + buf_len, 4); cmd = kzalloc(cmd_len, GFP_KERNEL); - if (!cmd) { - ret = -ENOMEM; - goto out; - } + if (!cmd) + return -ENOMEM; cmd->size = cpu_to_le16(buf_len); -- GitLab From bb779d476ff74d95e2d299ee001b9063f00676c2 Mon Sep 17 00:00:00 2001 From: wengjianfeng Date: Wed, 3 Feb 2021 14:16:25 +0800 Subject: [PATCH 1998/2012] mwl8k: assign value when defining variables define refilled and then assign value to it, which should do that at the same time. Signed-off-by: wengjianfeng Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210203061625.588-1-samirweng1979@163.com --- drivers/net/wireless/marvell/mwl8k.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c index abf3b0233ccce..435ef77227abf 100644 --- a/drivers/net/wireless/marvell/mwl8k.c +++ b/drivers/net/wireless/marvell/mwl8k.c @@ -1208,9 +1208,8 @@ static int rxq_refill(struct ieee80211_hw *hw, int index, int limit) { struct mwl8k_priv *priv = hw->priv; struct mwl8k_rx_queue *rxq = priv->rxq + index; - int refilled; + int refilled = 0; - refilled = 0; while (rxq->rxd_count < MWL8K_RX_DESCS && limit--) { struct sk_buff *skb; dma_addr_t addr; -- GitLab From d48aea6054d0521b258471a5ff3ca827c6c54b09 Mon Sep 17 00:00:00 2001 From: wengjianfeng Date: Wed, 3 Feb 2021 14:27:17 +0800 Subject: [PATCH 1999/2012] rsi: remove redundant assignment INVALID_QUEUE has been used as a return value,it is not necessary to assign it to q_num,so just return INVALID_QUEUE. Signed-off-by: wengjianfeng Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210203062717.1228-1-samirweng1979@163.com --- drivers/net/wireless/rsi/rsi_91x_core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c index 2d49c5b5eefb4..a48e616e0fb91 100644 --- a/drivers/net/wireless/rsi/rsi_91x_core.c +++ b/drivers/net/wireless/rsi/rsi_91x_core.c @@ -193,8 +193,7 @@ get_queue_num: if (recontend_queue) goto get_queue_num; - q_num = INVALID_QUEUE; - return q_num; + return INVALID_QUEUE; } common->selected_qnum = q_num; -- GitLab From cc1546d6850c4c7784026e155d7e5e65b1d62670 Mon Sep 17 00:00:00 2001 From: wengjianfeng Date: Thu, 4 Feb 2021 08:51:19 +0800 Subject: [PATCH 2000/2012] rt2x00: remove duplicate word and fix typo in comment remove duplicate word 'we' in comment change 'then' to 'than' in comment Signed-off-by: wengjianfeng Acked-by: Randy Dunlap Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210204005119.18060-1-samirweng1979@163.com --- drivers/net/wireless/ralink/rt2x00/rt2x00crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00crypto.c b/drivers/net/wireless/ralink/rt2x00/rt2x00crypto.c index c861811aa6c08..ad95f9eba3019 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00crypto.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00crypto.c @@ -179,7 +179,7 @@ void rt2x00crypto_rx_insert_iv(struct sk_buff *skb, * Make room for new data. There are 2 possibilities * either the alignment is already present between * the 802.11 header and payload. In that case we - * we have to move the header less then the iv_len + * have to move the header less than the iv_len * since we can use the already available l2pad bytes * for the iv data. * When the alignment must be added manually we must -- GitLab From fcb8f3ca4b5bd991fbbc8465cdac8f84cc668410 Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 4 Feb 2021 16:00:08 +0800 Subject: [PATCH 2001/2012] iwlegacy: 4965-mac: Simplify the calculation of variables Fix the following coccicheck warnings: ./drivers/net/wireless/intel/iwlegacy/4965-mac.c:2596:54-56: WARNING !A || A && B is equivalent to !A || B. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1612425608-40450-1-git-send-email-jiapeng.chong@linux.alibaba.com --- drivers/net/wireless/intel/iwlegacy/4965-mac.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index 98cd06287b436..341d6a2bc6900 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -2593,8 +2593,7 @@ out: */ if (ret != IL_INVALID_STATION && (!(il->stations[ret].used & IL_STA_UCODE_ACTIVE) || - ((il->stations[ret].used & IL_STA_UCODE_ACTIVE) && - (il->stations[ret].used & IL_STA_UCODE_INPROGRESS)))) { + (il->stations[ret].used & IL_STA_UCODE_INPROGRESS))) { IL_ERR("Requested station info for sta %d before ready.\n", ret); ret = IL_INVALID_STATION; -- GitLab From 93476ca7445793101b803db881f2d755d5184e36 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 4 Feb 2021 11:19:02 +0100 Subject: [PATCH 2002/2012] rt2800usb: add Sweex LW163V2 id's Add support for Sweex LW163V2 device. Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210204101902.199590-1-stf_xl@wp.pl --- drivers/net/wireless/ralink/rt2x00/rt2800usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800usb.c b/drivers/net/wireless/ralink/rt2x00/rt2800usb.c index d08b251ec5a26..36ac18ca8082e 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2800usb.c @@ -988,6 +988,7 @@ static const struct usb_device_id rt2800usb_device_table[] = { { USB_DEVICE(0x177f, 0x0313) }, { USB_DEVICE(0x177f, 0x0323) }, { USB_DEVICE(0x177f, 0x0324) }, + { USB_DEVICE(0x177f, 0x1163) }, /* U-Media */ { USB_DEVICE(0x157e, 0x300e) }, { USB_DEVICE(0x157e, 0x3013) }, -- GitLab From fb1bc2ce3a55bee62e405364512a5b5e53074418 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Feb 2021 17:26:43 +0100 Subject: [PATCH 2003/2012] wl3501: fix alignment constraints struct wl3501_80211_tx_hdr contains a ieee80211_hdr structure, which is required to have at least two byte alignment, and this conflicts with the __packed attribute: wireless/wl3501.h:553:1: warning: alignment 1 of 'struct wl3501_80211_tx_hdr' is less than 2 [-Wpacked-not-aligned] Mark wl3501_80211_tx_hdr itself as having two-byte alignment to ensure the inner structure is properly aligned. Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210204162653.3113749-1-arnd@kernel.org --- drivers/net/wireless/wl3501.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/wl3501.h b/drivers/net/wireless/wl3501.h index b446cb3695579..e98e04ee9a2c0 100644 --- a/drivers/net/wireless/wl3501.h +++ b/drivers/net/wireless/wl3501.h @@ -550,7 +550,7 @@ struct wl3501_80211_tx_plcp_hdr { struct wl3501_80211_tx_hdr { struct wl3501_80211_tx_plcp_hdr pclp_hdr; struct ieee80211_hdr mac_hdr; -} __packed; +} __packed __aligned(2); /* Reserve the beginning Tx space for descriptor use. -- GitLab From bfdc4d7cbe57276e60b21091976a56f38a090635 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Feb 2021 17:28:04 +0100 Subject: [PATCH 2004/2012] mwl8k: fix alignment constraints sturct mwl8k_dma_data contains a ieee80211_hdr structure, which is required to have at least two byte alignment, and this conflicts with the __packed attribute: vers/net/wireless/marvell/mwl8k.c:811:1: warning: alignment 1 of 'struct mwl8k_dma_data' is less than 2 [-Wpacked-not-aligned] Mark mwl8k_dma_data itself as having two-byte alignment to ensure the inner structure is properly aligned. Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210204162813.3159319-1-arnd@kernel.org --- drivers/net/wireless/marvell/mwl8k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c index 435ef77227abf..c9f8c056aa517 100644 --- a/drivers/net/wireless/marvell/mwl8k.c +++ b/drivers/net/wireless/marvell/mwl8k.c @@ -808,7 +808,7 @@ struct mwl8k_dma_data { __le16 fwlen; struct ieee80211_hdr wh; char data[]; -} __packed; +} __packed __aligned(2); /* Routines to add/remove DMA header from skb. */ static inline void mwl8k_remove_dma_header(struct sk_buff *skb, __le16 qos) -- GitLab From 4331667fa14e6643859d0498b34281185eb8018b Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 5 Feb 2021 14:56:39 +0800 Subject: [PATCH 2005/2012] ssb: Use true and false for bool variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following coccicheck warnings: ./include/linux/ssb/ssb_driver_gige.h:89:8-9: WARNING: return of 0/1 in function 'ssb_gige_one_dma_at_once' with return type bool. ./include/linux/ssb/ssb_driver_gige.h:79:8-9: WARNING: return of 0/1 in function 'ssb_gige_have_roboswitch' with return type bool. ./include/linux/ssb/ssb_driver_gige.h:182:8-9: WARNING: return of 0/1 in function 'ssb_gige_must_flush_posted_writes' with return type bool. ./include/linux/ssb/ssb_driver_gige.h:178:8-9: WARNING: return of 0/1 in function 'ssb_gige_one_dma_at_once' with return type bool. ./include/linux/ssb/ssb_driver_gige.h:174:8-9: WARNING: return of 0/1 in function 'ssb_gige_have_roboswitch' with return type bool. ./include/linux/ssb/ssb_driver_gige.h:170:8-9: WARNING: return of 0/1 in function 'ssb_gige_is_rgmii' with return type bool. ./include/linux/ssb/ssb_driver_gige.h:162:8-9: WARNING: return of 0/1 in function 'pdev_is_ssb_gige_core' with return type bool. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Acked-by: Michael Büsch Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1612508199-92282-1-git-send-email-jiapeng.chong@linux.alibaba.com --- include/linux/ssb/ssb_driver_gige.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/ssb/ssb_driver_gige.h b/include/linux/ssb/ssb_driver_gige.h index 31593b34608e6..15ba0df1ee0df 100644 --- a/include/linux/ssb/ssb_driver_gige.h +++ b/include/linux/ssb/ssb_driver_gige.h @@ -76,7 +76,7 @@ static inline bool ssb_gige_have_roboswitch(struct pci_dev *pdev) if (dev) return !!(dev->dev->bus->sprom.boardflags_lo & SSB_GIGE_BFL_ROBOSWITCH); - return 0; + return false; } /* Returns whether we can only do one DMA at once. */ @@ -86,7 +86,7 @@ static inline bool ssb_gige_one_dma_at_once(struct pci_dev *pdev) if (dev) return ((dev->dev->bus->chip_id == 0x4785) && (dev->dev->bus->chip_rev < 2)); - return 0; + return false; } /* Returns whether we must flush posted writes. */ @@ -159,7 +159,7 @@ static inline void ssb_gige_exit(void) static inline bool pdev_is_ssb_gige_core(struct pci_dev *pdev) { - return 0; + return false; } static inline struct ssb_gige * pdev_to_ssb_gige(struct pci_dev *pdev) { @@ -167,19 +167,19 @@ static inline struct ssb_gige * pdev_to_ssb_gige(struct pci_dev *pdev) } static inline bool ssb_gige_is_rgmii(struct pci_dev *pdev) { - return 0; + return false; } static inline bool ssb_gige_have_roboswitch(struct pci_dev *pdev) { - return 0; + return false; } static inline bool ssb_gige_one_dma_at_once(struct pci_dev *pdev) { - return 0; + return false; } static inline bool ssb_gige_must_flush_posted_writes(struct pci_dev *pdev) { - return 0; + return false; } static inline int ssb_gige_get_macaddr(struct pci_dev *pdev, u8 *macaddr) { -- GitLab From 2615e3cdbd9c0e864f5906279c952a309871d225 Mon Sep 17 00:00:00 2001 From: Anand K Mistry Date: Mon, 8 Feb 2021 13:32:09 +0200 Subject: [PATCH 2006/2012] ath10k: Fix suspicious RCU usage warning in ath10k_wmi_tlv_parse_peer_stats_info() The ieee80211_find_sta_by_ifaddr call in ath10k_wmi_tlv_parse_peer_stats_info must be called while holding the RCU read lock. Otherwise, the following warning will be seen when RCU usage checking is enabled: ============================= WARNING: suspicious RCU usage 5.10.3 #8 Tainted: G W ----------------------------- include/linux/rhashtable.h:594 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 no locks held by ksoftirqd/1/16. stack backtrace: CPU: 1 PID: 16 Comm: ksoftirqd/1 Tainted: G W 5.10.3 #8 Hardware name: HP Grunt/Grunt, BIOS Google_Grunt.11031.104.0 09/05/2019 Call Trace: dump_stack+0xab/0x115 sta_info_hash_lookup+0x71/0x1e9 [mac80211] ? lock_is_held_type+0xe6/0x12f ? __kasan_kmalloc+0xfb/0x112 ieee80211_find_sta_by_ifaddr+0x12/0x61 [mac80211] ath10k_wmi_tlv_parse_peer_stats_info+0xbd/0x10b [ath10k_core] ath10k_wmi_tlv_iter+0x8b/0x1a1 [ath10k_core] ? ath10k_wmi_tlv_iter+0x1a1/0x1a1 [ath10k_core] ath10k_wmi_tlv_event_peer_stats_info+0x103/0x13b [ath10k_core] ath10k_wmi_tlv_op_rx+0x722/0x80d [ath10k_core] ath10k_htc_rx_completion_handler+0x16e/0x1d7 [ath10k_core] ath10k_pci_process_rx_cb+0x116/0x22c [ath10k_pci] ? ath10k_htc_process_trailer+0x332/0x332 [ath10k_core] ? _raw_spin_unlock_irqrestore+0x34/0x61 ? lockdep_hardirqs_on+0x8e/0x12e ath10k_ce_per_engine_service+0x55/0x74 [ath10k_core] ath10k_ce_per_engine_service_any+0x76/0x84 [ath10k_core] ath10k_pci_napi_poll+0x49/0x141 [ath10k_pci] net_rx_action+0x11a/0x347 __do_softirq+0x2d3/0x539 run_ksoftirqd+0x4b/0x86 smpboot_thread_fn+0x1d0/0x2ab ? cpu_report_death+0x7f/0x7f kthread+0x189/0x191 ? cpu_report_death+0x7f/0x7f ? kthread_blkcg+0x31/0x31 ret_from_fork+0x22/0x30 Fixes: 0f7cb26830a6e ("ath10k: add rx bitrate report for SDIO") Signed-off-by: Anand K Mistry Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210202134451.1.I0d2e83c42755671b7143504b62787fd06cd914ed@changeid --- drivers/net/wireless/ath/ath10k/wmi-tlv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index 72d64af8e2296..bfdd017f14052 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -240,8 +240,10 @@ static int ath10k_wmi_tlv_parse_peer_stats_info(struct ath10k *ar, u16 tag, u16 __le32_to_cpu(stat->last_tx_rate_code), __le32_to_cpu(stat->last_tx_bitrate_kbps)); + rcu_read_lock(); sta = ieee80211_find_sta_by_ifaddr(ar->hw, stat->peer_macaddr.addr, NULL); if (!sta) { + rcu_read_unlock(); ath10k_warn(ar, "not found station for peer stats\n"); return -EINVAL; } @@ -251,6 +253,7 @@ static int ath10k_wmi_tlv_parse_peer_stats_info(struct ath10k *ar, u16 tag, u16 arsta->rx_bitrate_kbps = __le32_to_cpu(stat->last_rx_bitrate_kbps); arsta->tx_rate_code = __le32_to_cpu(stat->last_tx_rate_code); arsta->tx_bitrate_kbps = __le32_to_cpu(stat->last_tx_bitrate_kbps); + rcu_read_unlock(); return 0; } -- GitLab From 7df28718928d08034b36168200d67b558ce36f3d Mon Sep 17 00:00:00 2001 From: Anand K Mistry Date: Mon, 8 Feb 2021 13:32:10 +0200 Subject: [PATCH 2007/2012] ath10k: Fix lockdep assertion warning in ath10k_sta_statistics ath10k_debug_fw_stats_request just be called with conf_mutex held, otherwise the following warning is seen when lock debugging is enabled: WARNING: CPU: 0 PID: 793 at drivers/net/wireless/ath/ath10k/debug.c:357 ath10k_debug_fw_stats_request+0x12c/0x133 [ath10k_core] Modules linked in: snd_hda_codec_hdmi designware_i2s snd_hda_intel snd_intel_dspcfg snd_hda_codec i2c_piix4 snd_hwdep snd_hda_core acpi_als kfifo_buf industrialio snd_soc_max98357a snd_soc_adau7002 snd_soc_acp_da7219mx98357_mach snd_soc_da7219 acp_audio_dma ccm xt_MASQUERADE fuse ath10k_pci ath10k_core lzo_rle ath lzo_compress mac80211 zram cfg80211 r8152 mii joydev CPU: 0 PID: 793 Comm: wpa_supplicant Tainted: G W 5.10.9 #5 Hardware name: HP Grunt/Grunt, BIOS Google_Grunt.11031.104.0 09/05/2019 RIP: 0010:ath10k_debug_fw_stats_request+0x12c/0x133 [ath10k_core] Code: 1e bb a1 ff ff ff 4c 89 ef 48 c7 c6 d3 31 2e c0 89 da 31 c0 e8 bd f8 ff ff 89 d8 eb 02 31 c0 5b 41 5c 41 5d 41 5e 41 5f 5d c3 <0f> 0b e9 04 ff ff ff 0f 1f 44 00 00 55 48 89 e5 41 56 53 48 89 fb RSP: 0018:ffffb2478099f7d0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff9e432700cce0 RCX: 11c85cfd6b8e3b00 RDX: ffff9e432700cce0 RSI: ffff9e43127c5668 RDI: ffff9e4318deddf0 RBP: ffffb2478099f7f8 R08: 0000000000000002 R09: 00000003fd7068cc R10: ffffffffc01b2749 R11: ffffffffc029efaf R12: ffff9e432700c000 R13: ffff9e43127c33e0 R14: ffffb2478099f918 R15: ffff9e43127c33e0 FS: 00007f7ea48e2740(0000) GS:ffff9e432aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000059aa799ddf38 CR3: 0000000118de2000 CR4: 00000000001506f0 Call Trace: ath10k_sta_statistics+0x4d/0x270 [ath10k_core] sta_set_sinfo+0x1be/0xaec [mac80211] ieee80211_get_station+0x58/0x76 [mac80211] rdev_get_station+0xf1/0x11e [cfg80211] nl80211_get_station+0x7f/0x146 [cfg80211] genl_rcv_msg+0x32e/0x35e ? nl80211_stop_ap+0x19/0x19 [cfg80211] ? nl80211_get_station+0x146/0x146 [cfg80211] ? genl_rcv+0x19/0x36 ? genl_rcv+0x36/0x36 netlink_rcv_skb+0x89/0xfb genl_rcv+0x28/0x36 netlink_unicast+0x169/0x23b netlink_sendmsg+0x38a/0x402 sock_sendmsg+0x72/0x76 ____sys_sendmsg+0x153/0x1cc ? copy_msghdr_from_user+0x5d/0x85 ___sys_sendmsg+0x7c/0xb5 ? lock_acquire+0x181/0x23d ? syscall_trace_enter+0x15e/0x160 ? find_held_lock+0x3d/0xb2 ? syscall_trace_enter+0x15e/0x160 ? sched_clock_cpu+0x15/0xc6 __sys_sendmsg+0x62/0x9a do_syscall_64+0x43/0x55 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 4913e675630e ("ath10k: enable rx duration report default for wmi tlv") Signed-off-by: Anand K Mistry Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210202144033.1.I9e556f9fb1110d58c31d04a8a1293995fb8bb678@changeid --- drivers/net/wireless/ath/ath10k/mac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index d403c9fd49809..d8937181bbb98 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -9219,7 +9219,9 @@ static void ath10k_sta_statistics(struct ieee80211_hw *hw, if (!ath10k_peer_stats_enabled(ar)) return; + mutex_lock(&ar->conf_mutex); ath10k_debug_fw_stats_request(ar); + mutex_unlock(&ar->conf_mutex); sinfo->rx_duration = arsta->rx_duration; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_DURATION); -- GitLab From 4b965be536eefdd16ca0a88120fee23f5b92cd16 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Mon, 8 Feb 2021 13:32:11 +0200 Subject: [PATCH 2008/2012] ath11k: Update tx descriptor search index properly Tx descriptor search index field should be updated with hw peer id and not by AST Hash as per the HW/FW recommendation. Incorrect search index causes throughput degradation in all scenario for all the platforms. so updated the search index field with hw peer id, which is a common change applicable for all the platforms. Also no need of these configuration for non station type. seen 10% throughput increase in WDS traffic with this change. Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01492-QCAHKSWPL_SILICONZ-1 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1612410960-9120-1-git-send-email-periyasa@codeaurora.org --- drivers/net/wireless/ath/ath11k/core.h | 1 + drivers/net/wireless/ath/ath11k/dp_rx.c | 8 ++++++-- drivers/net/wireless/ath/ath11k/dp_tx.c | 1 + drivers/net/wireless/ath/ath11k/hal_tx.c | 2 ++ drivers/net/wireless/ath/ath11k/hal_tx.h | 1 + drivers/net/wireless/ath/ath11k/peer.c | 9 +++++++-- drivers/net/wireless/ath/ath11k/peer.h | 3 ++- 7 files changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 9db375b193def..8d29845774dfa 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -200,6 +200,7 @@ struct ath11k_vif { u32 beacon_interval; u32 dtim_period; u16 ast_hash; + u16 ast_idx; u16 tcl_metadata; u8 hal_addr_search_flags; u8 search_type; diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 859cfcabceb56..850ad38b888f4 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -1652,6 +1652,7 @@ void ath11k_dp_htt_htc_t2h_msg_handler(struct ath11k_base *ab, u8 mac_addr[ETH_ALEN]; u16 peer_mac_h16; u16 ast_hash; + u16 hw_peer_id; ath11k_dbg(ab, ATH11K_DBG_DP_HTT, "dp_htt rx msg type :0x%0x\n", type); @@ -1672,7 +1673,7 @@ void ath11k_dp_htt_htc_t2h_msg_handler(struct ath11k_base *ab, resp->peer_map_ev.info1); ath11k_dp_get_mac_addr(resp->peer_map_ev.mac_addr_l32, peer_mac_h16, mac_addr); - ath11k_peer_map_event(ab, vdev_id, peer_id, mac_addr, 0); + ath11k_peer_map_event(ab, vdev_id, peer_id, mac_addr, 0, 0); break; case HTT_T2H_MSG_TYPE_PEER_MAP2: vdev_id = FIELD_GET(HTT_T2H_PEER_MAP_INFO_VDEV_ID, @@ -1685,7 +1686,10 @@ void ath11k_dp_htt_htc_t2h_msg_handler(struct ath11k_base *ab, peer_mac_h16, mac_addr); ast_hash = FIELD_GET(HTT_T2H_PEER_MAP_INFO2_AST_HASH_VAL, resp->peer_map_ev.info2); - ath11k_peer_map_event(ab, vdev_id, peer_id, mac_addr, ast_hash); + hw_peer_id = FIELD_GET(HTT_T2H_PEER_MAP_INFO1_HW_PEER_ID, + resp->peer_map_ev.info1); + ath11k_peer_map_event(ab, vdev_id, peer_id, mac_addr, ast_hash, + hw_peer_id); break; case HTT_T2H_MSG_TYPE_PEER_UNMAP: case HTT_T2H_MSG_TYPE_PEER_UNMAP2: diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c index 6a3fcea6c2334..1a0b9be9ce6ae 100644 --- a/drivers/net/wireless/ath/ath11k/dp_tx.c +++ b/drivers/net/wireless/ath/ath11k/dp_tx.c @@ -165,6 +165,7 @@ tcl_ring_sel: ti.pkt_offset = 0; ti.lmac_id = ar->lmac_id; ti.bss_ast_hash = arvif->ast_hash; + ti.bss_ast_idx = arvif->ast_idx; ti.dscp_tid_tbl_idx = 0; if (skb->ip_summed == CHECKSUM_PARTIAL && diff --git a/drivers/net/wireless/ath/ath11k/hal_tx.c b/drivers/net/wireless/ath/ath11k/hal_tx.c index a755aa86c5dee..569e790d83a15 100644 --- a/drivers/net/wireless/ath/ath11k/hal_tx.c +++ b/drivers/net/wireless/ath/ath11k/hal_tx.c @@ -71,6 +71,8 @@ void ath11k_hal_tx_cmd_desc_setup(struct ath11k_base *ab, void *cmd, tcl_cmd->info3 = FIELD_PREP(HAL_TCL_DATA_CMD_INFO3_DSCP_TID_TABLE_IDX, ti->dscp_tid_tbl_idx) | FIELD_PREP(HAL_TCL_DATA_CMD_INFO3_SEARCH_INDEX, + ti->bss_ast_idx) | + FIELD_PREP(HAL_TCL_DATA_CMD_INFO3_CACHE_SET_NUM, ti->bss_ast_hash); tcl_cmd->info4 = 0; } diff --git a/drivers/net/wireless/ath/ath11k/hal_tx.h b/drivers/net/wireless/ath/ath11k/hal_tx.h index d4760a20fdacb..c291e59c3ca6f 100644 --- a/drivers/net/wireless/ath/ath11k/hal_tx.h +++ b/drivers/net/wireless/ath/ath11k/hal_tx.h @@ -29,6 +29,7 @@ struct hal_tx_info { u32 flags1; /* %HAL_TCL_DATA_CMD_INFO2_ */ u16 addr_search_flags; /* %HAL_TCL_DATA_CMD_INFO0_ADDR(X/Y)_ */ u16 bss_ast_hash; + u16 bss_ast_idx; u8 tid; u8 search_type; /* %HAL_TX_ADDR_SEARCH_ */ u8 lmac_id; diff --git a/drivers/net/wireless/ath/ath11k/peer.c b/drivers/net/wireless/ath/ath11k/peer.c index b69e7ebfa9303..f49abefa9618e 100644 --- a/drivers/net/wireless/ath/ath11k/peer.c +++ b/drivers/net/wireless/ath/ath11k/peer.c @@ -118,7 +118,7 @@ exit: } void ath11k_peer_map_event(struct ath11k_base *ab, u8 vdev_id, u16 peer_id, - u8 *mac_addr, u16 ast_hash) + u8 *mac_addr, u16 ast_hash, u16 hw_peer_id) { struct ath11k_peer *peer; @@ -132,6 +132,7 @@ void ath11k_peer_map_event(struct ath11k_base *ab, u8 vdev_id, u16 peer_id, peer->vdev_id = vdev_id; peer->peer_id = peer_id; peer->ast_hash = ast_hash; + peer->hw_peer_id = hw_peer_id; ether_addr_copy(peer->addr, mac_addr); list_add(&peer->list, &ab->peers); wake_up(&ab->peer_mapping_wq); @@ -309,7 +310,11 @@ int ath11k_peer_create(struct ath11k *ar, struct ath11k_vif *arvif, peer->pdev_idx = ar->pdev_idx; peer->sta = sta; - arvif->ast_hash = peer->ast_hash; + + if (arvif->vif->type == NL80211_IFTYPE_STATION) { + arvif->ast_hash = peer->ast_hash; + arvif->ast_idx = peer->hw_peer_id; + } peer->sec_type = HAL_ENCRYPT_TYPE_OPEN; peer->sec_type_grp = HAL_ENCRYPT_TYPE_OPEN; diff --git a/drivers/net/wireless/ath/ath11k/peer.h b/drivers/net/wireless/ath/ath11k/peer.h index 8553ed061aeaa..619db001be8e9 100644 --- a/drivers/net/wireless/ath/ath11k/peer.h +++ b/drivers/net/wireless/ath/ath11k/peer.h @@ -14,6 +14,7 @@ struct ath11k_peer { int peer_id; u16 ast_hash; u8 pdev_idx; + u16 hw_peer_id; /* protected by ab->data_lock */ struct ieee80211_key_conf *keys[WMI_MAX_KEY_INDEX + 1]; @@ -31,7 +32,7 @@ struct ath11k_peer { void ath11k_peer_unmap_event(struct ath11k_base *ab, u16 peer_id); void ath11k_peer_map_event(struct ath11k_base *ab, u8 vdev_id, u16 peer_id, - u8 *mac_addr, u16 ast_hash); + u8 *mac_addr, u16 ast_hash, u16 hw_peer_id); struct ath11k_peer *ath11k_peer_find(struct ath11k_base *ab, int vdev_id, const u8 *addr); struct ath11k_peer *ath11k_peer_find_by_addr(struct ath11k_base *ab, -- GitLab From 84da2a84027c2bb88662dbfad8ebddc357c5c5ae Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 8 Feb 2021 13:32:11 +0200 Subject: [PATCH 2009/2012] wcn36xx: del BA session on TX stop Deleting BA session was not correcly performed, causing communication issues with APs that dynamically stop/start new BA sessions. Signed-off-by: Loic Poulain Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1611328304-1010-1-git-send-email-loic.poulain@linaro.org --- drivers/net/wireless/ath/wcn36xx/main.c | 3 ++- drivers/net/wireless/ath/wcn36xx/smd.c | 4 ++-- drivers/net/wireless/ath/wcn36xx/smd.h | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index 5867bd9c2f646..afb4877eaad8f 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -1140,7 +1140,7 @@ static int wcn36xx_ampdu_action(struct ieee80211_hw *hw, session); break; case IEEE80211_AMPDU_RX_STOP: - wcn36xx_smd_del_ba(wcn, tid, get_sta_index(vif, sta_priv)); + wcn36xx_smd_del_ba(wcn, tid, 0, get_sta_index(vif, sta_priv)); break; case IEEE80211_AMPDU_TX_START: spin_lock_bh(&sta_priv->ampdu_lock); @@ -1164,6 +1164,7 @@ static int wcn36xx_ampdu_action(struct ieee80211_hw *hw, sta_priv->ampdu_state[tid] = WCN36XX_AMPDU_NONE; spin_unlock_bh(&sta_priv->ampdu_lock); + wcn36xx_smd_del_ba(wcn, tid, 1, get_sta_index(vif, sta_priv)); ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); break; default: diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c index 28d3ee3ad4b9c..d0c3a1557e8d6 100644 --- a/drivers/net/wireless/ath/wcn36xx/smd.c +++ b/drivers/net/wireless/ath/wcn36xx/smd.c @@ -2466,7 +2466,7 @@ out: return ret; } -int wcn36xx_smd_del_ba(struct wcn36xx *wcn, u16 tid, u8 sta_index) +int wcn36xx_smd_del_ba(struct wcn36xx *wcn, u16 tid, u8 direction, u8 sta_index) { struct wcn36xx_hal_del_ba_req_msg msg_body; int ret; @@ -2476,7 +2476,7 @@ int wcn36xx_smd_del_ba(struct wcn36xx *wcn, u16 tid, u8 sta_index) msg_body.sta_index = sta_index; msg_body.tid = tid; - msg_body.direction = 0; + msg_body.direction = direction; PREPARE_HAL_BUF(wcn->hal_buf, msg_body); ret = wcn36xx_smd_send_and_wait(wcn, msg_body.header.len); diff --git a/drivers/net/wireless/ath/wcn36xx/smd.h b/drivers/net/wireless/ath/wcn36xx/smd.h index b1d8083d9d9d5..462860572e1f8 100644 --- a/drivers/net/wireless/ath/wcn36xx/smd.h +++ b/drivers/net/wireless/ath/wcn36xx/smd.h @@ -135,7 +135,7 @@ int wcn36xx_smd_add_ba_session(struct wcn36xx *wcn, u8 direction, u8 sta_index); int wcn36xx_smd_add_ba(struct wcn36xx *wcn, u8 session_id); -int wcn36xx_smd_del_ba(struct wcn36xx *wcn, u16 tid, u8 sta_index); +int wcn36xx_smd_del_ba(struct wcn36xx *wcn, u16 tid, u8 direction, u8 sta_index); int wcn36xx_smd_trigger_ba(struct wcn36xx *wcn, u8 sta_index, u16 tid, u8 session_id); int wcn36xx_smd_update_cfg(struct wcn36xx *wcn, u32 cfg_id, u32 value); -- GitLab From ca9ad549e4042089d55a68f0312647fca4cc6e87 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Feb 2021 17:29:17 +0100 Subject: [PATCH 2010/2012] carl9170: fix struct alignment conflict Multiple structures in the carl9170 driver have alignment impossible alignment constraints that gcc warns about when building with 'make W=1': drivers/net/wireless/ath/carl9170/fwcmd.h:243:2: warning: alignment 1 of 'union ' is less than 4 [-Wpacked-not-aligned] drivers/net/wireless/ath/carl9170/wlan.h:373:1: warning: alignment 1 of 'struct ar9170_rx_frame_single' is less than 2 [-Wpacked-not-aligned] In the carl9170_cmd structure, multiple members that have an explicit alignment requirement of four bytes are added into a union with explicit byte alignment, but this in turn is part of a structure that also has four-byte alignment. In the wlan.h header, multiple structures contain a ieee80211_hdr member that is required to be two-byte aligned to avoid alignmnet faults when processing network headers, but all members are forced to be byte-aligned using the __packed tag at the end of the struct definition. In both cases, leaving out the packing does not change the internal layout of the structure but changes the alignment constraint of the structure itself. Change all affected structures to only apply packing where it does not violate the alignment requirement of the contained structure. Signed-off-by: Arnd Bergmann Acked-by: Christian Lamparter Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210204162926.3262598-1-arnd@kernel.org --- drivers/net/wireless/ath/carl9170/fwcmd.h | 2 +- drivers/net/wireless/ath/carl9170/wlan.h | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/ath/carl9170/fwcmd.h b/drivers/net/wireless/ath/carl9170/fwcmd.h index 56999a3b9d3bc..4a500095555c6 100644 --- a/drivers/net/wireless/ath/carl9170/fwcmd.h +++ b/drivers/net/wireless/ath/carl9170/fwcmd.h @@ -240,7 +240,7 @@ struct carl9170_cmd { struct carl9170_bcn_ctrl_cmd bcn_ctrl; struct carl9170_rx_filter_cmd rx_filter; u8 data[CARL9170_MAX_CMD_PAYLOAD_LEN]; - } __packed; + } __packed __aligned(4); } __packed __aligned(4); #define CARL9170_TX_STATUS_QUEUE 3 diff --git a/drivers/net/wireless/ath/carl9170/wlan.h b/drivers/net/wireless/ath/carl9170/wlan.h index ea17995b32f4e..bb73553fd7c27 100644 --- a/drivers/net/wireless/ath/carl9170/wlan.h +++ b/drivers/net/wireless/ath/carl9170/wlan.h @@ -367,27 +367,27 @@ struct ar9170_rx_macstatus { struct ar9170_rx_frame_single { struct ar9170_rx_head phy_head; - struct ieee80211_hdr i3e; + struct ieee80211_hdr i3e __packed __aligned(2); struct ar9170_rx_phystatus phy_tail; struct ar9170_rx_macstatus macstatus; -} __packed; +}; struct ar9170_rx_frame_head { struct ar9170_rx_head phy_head; - struct ieee80211_hdr i3e; + struct ieee80211_hdr i3e __packed __aligned(2); struct ar9170_rx_macstatus macstatus; -} __packed; +}; struct ar9170_rx_frame_middle { - struct ieee80211_hdr i3e; + struct ieee80211_hdr i3e __packed __aligned(2); struct ar9170_rx_macstatus macstatus; -} __packed; +}; struct ar9170_rx_frame_tail { - struct ieee80211_hdr i3e; + struct ieee80211_hdr i3e __packed __aligned(2); struct ar9170_rx_phystatus phy_tail; struct ar9170_rx_macstatus macstatus; -} __packed; +}; struct ar9170_rx_frame { union { @@ -395,8 +395,8 @@ struct ar9170_rx_frame { struct ar9170_rx_frame_head head; struct ar9170_rx_frame_middle middle; struct ar9170_rx_frame_tail tail; - } __packed; -} __packed; + }; +}; static inline u8 ar9170_get_decrypt_type(struct ar9170_rx_macstatus *t) { -- GitLab From 97614c59cb72b26ebebec4334921c9ae8fb895e6 Mon Sep 17 00:00:00 2001 From: Tamizh Chelvam Date: Fri, 5 Feb 2021 00:05:38 +0530 Subject: [PATCH 2011/2012] ath10k: Add new debug level for sta related logs Add new level ATH10K_DBG_STA debug_mask for printing sta related logs. This will be useful to check the debug logs of connection and changes related to station. Tested-on: QCA9984 hw1.0 PCI 10.4-3.9.0.2-00021 Signed-off-by: Tamizh Chelvam Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1612463738-16542-1-git-send-email-tamizhr@codeaurora.org --- drivers/net/wireless/ath/ath10k/debug.h | 1 + drivers/net/wireless/ath/ath10k/mac.c | 20 ++++++++++---------- drivers/net/wireless/ath/ath10k/wmi.c | 4 ++-- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h index 997c1c80aba76..0af787f49b338 100644 --- a/drivers/net/wireless/ath/ath10k/debug.h +++ b/drivers/net/wireless/ath/ath10k/debug.h @@ -34,6 +34,7 @@ enum ath10k_debug_mask { ATH10K_DBG_USB_BULK = 0x00080000, ATH10K_DBG_SNOC = 0x00100000, ATH10K_DBG_QMI = 0x00200000, + ATH10K_DBG_STA = 0x00400000, ATH10K_DBG_ANY = 0xffffffff, }; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index d8937181bbb98..c202b167d8c6c 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -6667,7 +6667,7 @@ static void ath10k_sta_rc_update_wk(struct work_struct *wk) enum wmi_phy_mode mode; mode = chan_to_phymode(&def); - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac update sta %pM peer bw %d phymode %d\n", + ath10k_dbg(ar, ATH10K_DBG_STA, "mac update sta %pM peer bw %d phymode %d\n", sta->addr, bw, mode); err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr, @@ -6686,7 +6686,7 @@ static void ath10k_sta_rc_update_wk(struct work_struct *wk) } if (changed & IEEE80211_RC_NSS_CHANGED) { - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac update sta %pM nss %d\n", + ath10k_dbg(ar, ATH10K_DBG_STA, "mac update sta %pM nss %d\n", sta->addr, nss); err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr, @@ -6697,7 +6697,7 @@ static void ath10k_sta_rc_update_wk(struct work_struct *wk) } if (changed & IEEE80211_RC_SMPS_CHANGED) { - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac update sta %pM smps %d\n", + ath10k_dbg(ar, ATH10K_DBG_STA, "mac update sta %pM smps %d\n", sta->addr, smps); err = ath10k_wmi_peer_set_param(ar, arvif->vdev_id, sta->addr, @@ -6708,7 +6708,7 @@ static void ath10k_sta_rc_update_wk(struct work_struct *wk) } if (changed & IEEE80211_RC_SUPP_RATES_CHANGED) { - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac update sta %pM supp rates\n", + ath10k_dbg(ar, ATH10K_DBG_STA, "mac update sta %pM supp rates\n", sta->addr); err = ath10k_station_assoc(ar, arvif->vif, sta, true); @@ -7404,7 +7404,7 @@ static int ath10k_sta_state(struct ieee80211_hw *hw, enum wmi_peer_type peer_type = WMI_PEER_TYPE_DEFAULT; u32 num_tdls_stations; - ath10k_dbg(ar, ATH10K_DBG_MAC, + ath10k_dbg(ar, ATH10K_DBG_STA, "mac vdev %d peer create %pM (new sta) sta %d / %d peer %d / %d\n", arvif->vdev_id, sta->addr, ar->num_stations + 1, ar->max_num_stations, @@ -7504,7 +7504,7 @@ static int ath10k_sta_state(struct ieee80211_hw *hw, /* * Existing station deletion. */ - ath10k_dbg(ar, ATH10K_DBG_MAC, + ath10k_dbg(ar, ATH10K_DBG_STA, "mac vdev %d peer delete %pM sta %pK (sta gone)\n", arvif->vdev_id, sta->addr, sta); @@ -7576,7 +7576,7 @@ static int ath10k_sta_state(struct ieee80211_hw *hw, /* * New association. */ - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac sta %pM associated\n", + ath10k_dbg(ar, ATH10K_DBG_STA, "mac sta %pM associated\n", sta->addr); ret = ath10k_station_assoc(ar, vif, sta, false); @@ -7589,7 +7589,7 @@ static int ath10k_sta_state(struct ieee80211_hw *hw, /* * Tdls station authorized. */ - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac tdls sta %pM authorized\n", + ath10k_dbg(ar, ATH10K_DBG_STA, "mac tdls sta %pM authorized\n", sta->addr); ret = ath10k_station_assoc(ar, vif, sta, false); @@ -7612,7 +7612,7 @@ static int ath10k_sta_state(struct ieee80211_hw *hw, /* * Disassociation. */ - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac sta %pM disassociated\n", + ath10k_dbg(ar, ATH10K_DBG_STA, "mac sta %pM disassociated\n", sta->addr); ret = ath10k_station_disassoc(ar, vif, sta); @@ -8429,7 +8429,7 @@ static void ath10k_sta_rc_update(struct ieee80211_hw *hw, return; } - ath10k_dbg(ar, ATH10K_DBG_MAC, + ath10k_dbg(ar, ATH10K_DBG_STA, "mac sta rc update for %pM changed %08x bw %d nss %d smps %d\n", sta->addr, changed, sta->bandwidth, sta->rx_nss, sta->smps_mode); diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 29f64315a3b50..d48b922215eb6 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -3497,7 +3497,7 @@ void ath10k_wmi_event_peer_sta_kickout(struct ath10k *ar, struct sk_buff *skb) return; } - ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi event peer sta kickout %pM\n", + ath10k_dbg(ar, ATH10K_DBG_STA, "wmi event peer sta kickout %pM\n", arg.mac_addr); rcu_read_lock(); @@ -7506,7 +7506,7 @@ ath10k_wmi_op_gen_set_sta_ps(struct ath10k *ar, u32 vdev_id, cmd->param_id = __cpu_to_le32(param_id); cmd->param_value = __cpu_to_le32(value); - ath10k_dbg(ar, ATH10K_DBG_WMI, + ath10k_dbg(ar, ATH10K_DBG_STA, "wmi sta ps param vdev_id 0x%x param %d value %d\n", vdev_id, param_id, value); return skb; -- GitLab From 7064e2193cabcdb8faa9008744e6ceb7f86d314d Mon Sep 17 00:00:00 2001 From: Max Chen Date: Wed, 6 Jan 2021 15:50:50 -0800 Subject: [PATCH 2012/2012] wil6210: Add Support for Extended DMG MCS 12.1 FW reports Tx/Rx extended MCS 12.1 to driver as "26". Driver will convert this into base MCS 7 + EXTENDED_SC_DMG flag so kernel can do the correct phy rate conversion. Also add log prints to print "12.1" instead of "26" for extended MCS. Signed-off-by: Max Chen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/1609977050-7089-3-git-send-email-mxchen@codeaurora.org --- drivers/net/wireless/ath/wil6210/cfg80211.c | 38 +++++++++++++++----- drivers/net/wireless/ath/wil6210/debugfs.c | 17 +++++---- drivers/net/wireless/ath/wil6210/txrx_edma.c | 2 ++ drivers/net/wireless/ath/wil6210/wil6210.h | 3 ++ drivers/net/wireless/ath/wil6210/wmi.c | 11 +++--- 5 files changed, 52 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index 60bba5b491e0f..6746fd206d2a9 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -441,7 +441,9 @@ int wil_cid_fill_sinfo(struct wil6210_vif *vif, int cid, } __packed reply; struct wil_net_stats *stats = &wil->sta[cid].stats; int rc; - u8 txflag = RATE_INFO_FLAGS_DMG; + u8 tx_mcs, rx_mcs; + u8 tx_rate_flag = RATE_INFO_FLAGS_DMG; + u8 rx_rate_flag = RATE_INFO_FLAGS_DMG; memset(&reply, 0, sizeof(reply)); @@ -451,13 +453,15 @@ int wil_cid_fill_sinfo(struct wil6210_vif *vif, int cid, if (rc) return rc; + tx_mcs = le16_to_cpu(reply.evt.bf_mcs); + wil_dbg_wmi(wil, "Link status for CID %d MID %d: {\n" - " MCS %d TSF 0x%016llx\n" + " MCS %s TSF 0x%016llx\n" " BF status 0x%08x RSSI %d SQI %d%%\n" " Tx Tpt %d goodput %d Rx goodput %d\n" " Sectors(rx:tx) my %d:%d peer %d:%d\n" " Tx mode %d}\n", - cid, vif->mid, le16_to_cpu(reply.evt.bf_mcs), + cid, vif->mid, WIL_EXTENDED_MCS_CHECK(tx_mcs), le64_to_cpu(reply.evt.tsf), reply.evt.status, reply.evt.rssi, reply.evt.sqi, @@ -481,12 +485,30 @@ int wil_cid_fill_sinfo(struct wil6210_vif *vif, int cid, BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC) | BIT_ULL(NL80211_STA_INFO_TX_FAILED); - if (wil->use_enhanced_dma_hw && reply.evt.tx_mode != WMI_TX_MODE_DMG) - txflag = RATE_INFO_FLAGS_EDMG; + if (wil->use_enhanced_dma_hw && reply.evt.tx_mode != WMI_TX_MODE_DMG) { + tx_rate_flag = RATE_INFO_FLAGS_EDMG; + rx_rate_flag = RATE_INFO_FLAGS_EDMG; + } + + rx_mcs = stats->last_mcs_rx; + + /* check extended MCS (12.1) and convert it into + * base MCS (7) + EXTENDED_SC_DMG flag + */ + if (tx_mcs == WIL_EXTENDED_MCS_26) { + tx_rate_flag = RATE_INFO_FLAGS_EXTENDED_SC_DMG; + tx_mcs = WIL_BASE_MCS_FOR_EXTENDED_26; + } + if (rx_mcs == WIL_EXTENDED_MCS_26) { + rx_rate_flag = RATE_INFO_FLAGS_EXTENDED_SC_DMG; + rx_mcs = WIL_BASE_MCS_FOR_EXTENDED_26; + } + + sinfo->txrate.flags = tx_rate_flag; + sinfo->rxrate.flags = rx_rate_flag; + sinfo->txrate.mcs = tx_mcs; + sinfo->rxrate.mcs = rx_mcs; - sinfo->txrate.flags = txflag; - sinfo->txrate.mcs = le16_to_cpu(reply.evt.bf_mcs); - sinfo->rxrate.mcs = stats->last_mcs_rx; sinfo->txrate.n_bonded_ch = wil_tx_cb_mode_to_n_bonded(reply.evt.tx_mode); sinfo->rxrate.n_bonded_ch = diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c index 2d618f90afa7b..4c944e595978b 100644 --- a/drivers/net/wireless/ath/wil6210/debugfs.c +++ b/drivers/net/wireless/ath/wil6210/debugfs.c @@ -1294,6 +1294,7 @@ static int bf_show(struct seq_file *s, void *data) for (i = 0; i < wil->max_assoc_sta; i++) { u32 status; + u8 bf_mcs; cmd.cid = i; rc = wmi_call(wil, WMI_NOTIFY_REQ_CMDID, vif->mid, @@ -1305,9 +1306,10 @@ static int bf_show(struct seq_file *s, void *data) continue; status = le32_to_cpu(reply.evt.status); + bf_mcs = le16_to_cpu(reply.evt.bf_mcs); seq_printf(s, "CID %d {\n" " TSF = 0x%016llx\n" - " TxMCS = %2d TxTpt = %4d\n" + " TxMCS = %s TxTpt = %4d\n" " SQI = %4d\n" " RSSI = %4d\n" " Status = 0x%08x %s\n" @@ -1316,7 +1318,7 @@ static int bf_show(struct seq_file *s, void *data) "}\n", i, le64_to_cpu(reply.evt.tsf), - le16_to_cpu(reply.evt.bf_mcs), + WIL_EXTENDED_MCS_CHECK(bf_mcs), le32_to_cpu(reply.evt.tx_tpt), reply.evt.sqi, reply.evt.rssi, @@ -1443,8 +1445,10 @@ static int link_show(struct seq_file *s, void *data) if (rc) goto out; - seq_printf(s, " Tx_mcs = %d\n", sinfo->txrate.mcs); - seq_printf(s, " Rx_mcs = %d\n", sinfo->rxrate.mcs); + seq_printf(s, " Tx_mcs = %s\n", + WIL_EXTENDED_MCS_CHECK(sinfo->txrate.mcs)); + seq_printf(s, " Rx_mcs = %s\n", + WIL_EXTENDED_MCS_CHECK(sinfo->rxrate.mcs)); seq_printf(s, " SQ = %d\n", sinfo->signal); } else { seq_puts(s, " INVALID MID\n"); @@ -1848,7 +1852,7 @@ static void wil_link_stats_print_basic(struct wil6210_vif *vif, snprintf(per, sizeof(per), "%d%%", basic->per_average); seq_printf(s, "CID %d {\n" - "\tTxMCS %d TxTpt %d\n" + "\tTxMCS %s TxTpt %d\n" "\tGoodput(rx:tx) %d:%d\n" "\tRxBcastFrames %d\n" "\tRSSI %d SQI %d SNR %d PER %s\n" @@ -1856,7 +1860,8 @@ static void wil_link_stats_print_basic(struct wil6210_vif *vif, "\tSectors(rx:tx) my %d:%d peer %d:%d\n" "}\n", basic->cid, - basic->bf_mcs, le32_to_cpu(basic->tx_tpt), + WIL_EXTENDED_MCS_CHECK(basic->bf_mcs), + le32_to_cpu(basic->tx_tpt), le32_to_cpu(basic->rx_goodput), le32_to_cpu(basic->tx_goodput), le32_to_cpu(basic->rx_bcast_frames), diff --git a/drivers/net/wireless/ath/wil6210/txrx_edma.c b/drivers/net/wireless/ath/wil6210/txrx_edma.c index 8ca2ce51c83ef..201c8c35e0c9e 100644 --- a/drivers/net/wireless/ath/wil6210/txrx_edma.c +++ b/drivers/net/wireless/ath/wil6210/txrx_edma.c @@ -1026,6 +1026,8 @@ skipping: stats->last_mcs_rx = wil_rx_status_get_mcs(msg); if (stats->last_mcs_rx < ARRAY_SIZE(stats->rx_per_mcs)) stats->rx_per_mcs[stats->last_mcs_rx]++; + else if (stats->last_mcs_rx == WIL_EXTENDED_MCS_26) + stats->rx_per_mcs[WIL_BASE_MCS_FOR_EXTENDED_26]++; stats->last_cb_mode_rx = wil_rx_status_get_cb_mode(msg); } diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index 5dc881d3c057d..30392eb1cbbd5 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -89,6 +89,9 @@ static inline u32 WIL_GET_BITS(u32 x, int b0, int b1) #define WIL_MAX_AGG_WSIZE_64 (64) /* FW/HW limit */ #define WIL6210_MAX_STATUS_RINGS (8) #define WIL_WMI_CALL_GENERAL_TO_MS 100 +#define WIL_EXTENDED_MCS_26 (26) /* FW reports MCS 12.1 to driver as "26" */ +#define WIL_BASE_MCS_FOR_EXTENDED_26 (7) /* MCS 7 is base MCS for MCS 12.1 */ +#define WIL_EXTENDED_MCS_CHECK(x) (((x) == WIL_EXTENDED_MCS_26) ? "12.1" : #x) /* Hardware offload block adds the following: * 26 bytes - 3-address QoS data header diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index 8699f8279a8be..823ec6e78a22c 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -851,9 +851,9 @@ static void wmi_evt_rx_mgmt(struct wil6210_vif *vif, int id, void *d, int len) d_status = le16_to_cpu(data->info.status); fc = rx_mgmt_frame->frame_control; - wil_dbg_wmi(wil, "MGMT Rx: channel %d MCS %d RSSI %d SQI %d%%\n", - data->info.channel, data->info.mcs, data->info.rssi, - data->info.sqi); + wil_dbg_wmi(wil, "MGMT Rx: channel %d MCS %s RSSI %d SQI %d%%\n", + data->info.channel, WIL_EXTENDED_MCS_CHECK(data->info.mcs), + data->info.rssi, data->info.sqi); wil_dbg_wmi(wil, "status 0x%04x len %d fc 0x%04x\n", d_status, d_len, le16_to_cpu(fc)); wil_dbg_wmi(wil, "qid %d mid %d cid %d\n", @@ -1422,8 +1422,9 @@ wmi_evt_sched_scan_result(struct wil6210_vif *vif, int id, void *d, int len) else signal = data->info.sqi; - wil_dbg_wmi(wil, "sched scan result: channel %d MCS %d RSSI %d\n", - data->info.channel, data->info.mcs, data->info.rssi); + wil_dbg_wmi(wil, "sched scan result: channel %d MCS %s RSSI %d\n", + data->info.channel, WIL_EXTENDED_MCS_CHECK(data->info.mcs), + data->info.rssi); wil_dbg_wmi(wil, "len %d qid %d mid %d cid %d\n", d_len, data->info.qid, data->info.mid, data->info.cid); wil_hex_dump_wmi("PROBE ", DUMP_PREFIX_OFFSET, 16, 1, rx_mgmt_frame, -- GitLab